From 9b616dd566a67734ce20bcf8da1e99a4b7ed92b2 Mon Sep 17 00:00:00 2001
From: Mark Niehaus <manieh@microsoft.com>
Date: Thu, 12 May 2022 14:03:12 -0700
Subject: [PATCH 1/3] remove unused imports

---
 .../azure/synapse/ml/cognitive/split1/OpenAISuite.scala        | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
index aa770ad423..df3b3058a8 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
@@ -5,12 +5,9 @@ package com.microsoft.azure.synapse.ml.cognitive.split1
 
 import com.microsoft.azure.synapse.ml.Secrets
 import com.microsoft.azure.synapse.ml.cognitive._
-import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._
 import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase}
 import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
-import org.apache.spark.ml.NamespaceInjections.pipelineModel
 import org.apache.spark.ml.util.MLReadable
-import org.apache.spark.sql.functions.typedLit
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.scalactic.Equality
 

From ad733ec5ec883d8e0a6a584b164401215154e99b Mon Sep 17 00:00:00 2001
From: Mark Niehaus <manieh@microsoft.com>
Date: Tue, 21 Jun 2022 16:34:50 -0700
Subject: [PATCH 2/3] batch prompts

---
 .../azure/synapse/ml/cognitive/OpenAI.scala   | 63 +++++++++++++++--
 .../ml/cognitive/split1/OpenAISuite.scala     | 67 +++++++++++++++++--
 .../spark/ml/param/UntypedArrayParam.scala    |  5 ++
 3 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala
index 719e607803..9f91750959 100644
--- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala
+++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala
@@ -34,7 +34,7 @@ trait HasSetServiceName extends Wrappable with HasURL {
 
 trait HasPrompt extends HasServiceParams {
   val prompt: ServiceParam[String] = new ServiceParam[String](
-    this, "prompt", "The text to complete", isRequired = true)
+    this, "prompt", "The text to complete", isRequired = false)
 
   def getPrompt: String = getScalarParam(prompt)
 
@@ -45,6 +45,45 @@ trait HasPrompt extends HasServiceParams {
   def setPromptCol(v: String): this.type = setVectorParam(prompt, v)
 }
 
+trait HasBatchPrompt extends HasServiceParams {
+  val batchPrompt: ServiceParam[Seq[String]] = new ServiceParam[Seq[String]](
+    this, "batchPrompt", "Sequence of prompts to complete", isRequired = false)
+
+  def getBatchPrompt: Seq[String] = getScalarParam(batchPrompt)
+
+  def setBatchPrompt(v: Seq[String]): this.type = setScalarParam(batchPrompt, v)
+
+  def getBatchPromptCol: String = getVectorParam(batchPrompt)
+
+  def setBatchPromptCol(v: String): this.type = setVectorParam(batchPrompt, v)
+}
+
+trait HasIndexPrompt extends HasServiceParams {
+  val indexPrompt: ServiceParam[Seq[Int]] = new ServiceParam[Seq[Int]](
+    this, "indexPrompt", "Sequence of indexes to complete", isRequired = false)
+
+  def getIndexPrompt: Seq[Int] = getScalarParam(indexPrompt)
+
+  def setIndexPrompt(v: Seq[Int]): this.type = setScalarParam(indexPrompt, v)
+
+  def getIndexPromptCol: String = getVectorParam(indexPrompt)
+
+  def setIndexPromptCol(v: String): this.type = setVectorParam(indexPrompt, v)
+}
+
+trait HasBatchIndexPrompt extends HasServiceParams {
+  val batchIndexPrompt: ServiceParam[Seq[Seq[Int]]] = new ServiceParam[Seq[Seq[Int]]](
+    this, "batchIndexPrompt", "Sequence of index sequences to complete", isRequired = false)
+
+  def getBatchIndexPrompt: Seq[Seq[Int]] = getScalarParam(batchIndexPrompt)
+
+  def setBatchIndexPrompt(v: Seq[Seq[Int]]): this.type = setScalarParam(batchIndexPrompt, v)
+
+  def getBatchIndexPromptCol: String = getVectorParam(batchIndexPrompt)
+
+  def setBatchIndexPromptCol(v: String): this.type = setVectorParam(batchIndexPrompt, v)
+}
+
 trait HasAPIVersion extends HasServiceParams {
   val apiVersion: ServiceParam[String] = new ServiceParam[String](
     this, "apiVersion", "version of the api", isRequired = true, isURLParam = true) {
@@ -93,7 +132,8 @@ trait HasMaxTokens extends HasServiceParams {
 }
 
 trait HasOpenAIParams extends HasServiceParams
-  with HasSetServiceName with HasPrompt with HasAPIVersion with HasDeploymentName with HasMaxTokens {
+  with HasSetServiceName with HasPrompt  with HasBatchPrompt with HasIndexPrompt with HasBatchIndexPrompt
+  with HasAPIVersion with HasDeploymentName with HasMaxTokens {
 
   val temperature: ServiceParam[Double] = new ServiceParam[Double](
     this, "temperature",
@@ -299,14 +339,25 @@ class OpenAICompletion(override val uid: String) extends CognitiveServicesBase(u
         getValueOpt(r, logProbs).map(v => ("logprobs", v))
       ).flatten).toMap
 
-      getValueOpt(r, prompt).map { prompt =>
-        val fullPayload = optionalParams.updated("prompt", prompt)
-        new StringEntity(fullPayload.toJson.compactPrint, ContentType.APPLICATION_JSON)
-      }
+      getValueOpt(r, prompt)
+        .map(prompt => getStringEntity(prompt, optionalParams))
+        .orElse(getValueOpt(r, batchPrompt)
+          .map(batchPrompt => getStringEntity(batchPrompt, optionalParams)))
+        .orElse(getValueOpt(r, indexPrompt)
+          .map(indexPrompt => getStringEntity(indexPrompt, optionalParams)))
+        .orElse(getValueOpt(r, batchIndexPrompt)
+          .map(batchIndexPrompt => getStringEntity(batchIndexPrompt, optionalParams)))
+        .orElse(throw new IllegalArgumentException(
+          "Please set one of prompt, batchPrompt, indexPrompt or batchIndexPrompt."))
   }
 
   override val subscriptionKeyHeaderName: String = "api-key"
 
   override def responseDataType: DataType = CompletionResponse.schema
 
+  private[this] def getStringEntity[A](prompt: A, optionalParams: Map[String, Any]): StringEntity = {
+    val fullPayload = optionalParams.updated("prompt", prompt)
+    new StringEntity(fullPayload.toJson.compactPrint, ContentType.APPLICATION_JSON)
+  }
+
 }
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
index df3b3058a8..88a0fc592d 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
@@ -5,15 +5,15 @@ package com.microsoft.azure.synapse.ml.cognitive.split1
 
 import com.microsoft.azure.synapse.ml.Secrets
 import com.microsoft.azure.synapse.ml.cognitive._
-import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase}
+import com.microsoft.azure.synapse.ml.core.test.base.Flaky
 import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing}
 import org.apache.spark.ml.util.MLReadable
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Row}
 import org.scalactic.Equality
 
 trait OpenAIAPIKey {
   lazy val openAIAPIKey: String = sys.env.getOrElse("OPENAI_API_KEY", Secrets.OpenAIApiKey)
-  lazy val openAIServiceName: String = "bugbashtest6"
+  lazy val openAIServiceName: String = "m3test11"
 }
 
 class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with OpenAIAPIKey with Flaky {
@@ -29,17 +29,72 @@ class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with Op
     .setPromptCol("prompt")
     .setOutputCol("out")
 
+  lazy val promptCompletion: OpenAICompletion = newCompletion.setPromptCol("prompt")
+  lazy val batchPromptCompletion: OpenAICompletion = newCompletion.setBatchPromptCol("batchPrompt")
+  lazy val indexPromptCompletion: OpenAICompletion = newCompletion.setIndexPromptCol("indexPrompt")
+  lazy val batchIndexPromptCompletion: OpenAICompletion = newCompletion.setBatchIndexPromptCol("batchIndexPrompt")
+
   lazy val df: DataFrame = Seq(
     "Once upon a time",
     "Best programming language award goes to",
     "SynapseML is "
   ).toDF("prompt")
 
+  lazy val promptDF: DataFrame = Seq(
+    "Once upon a time",
+    "Best programming language award goes to",
+    "SynapseML is "
+  ).toDF("prompt")
+
+  lazy val batchPromptDF: DataFrame = Seq(
+    Seq(
+      "This is a test",
+      "Now is the time",
+      "Knock, knock")
+  ).toDF("batchPrompt")
+
+  lazy val indexPromptDF: DataFrame = Seq(
+    Seq(3, 1, 5, 4)
+  ).toDF("indexPrompt")
+
+  lazy val batchIndexPromptDF: DataFrame = Seq(
+    Seq(
+      Seq(1, 8, 4, 2),
+      Seq(7, 3, 8, 5, 9),
+      Seq(8, 0, 11, 3, 14, 1))
+  ).toDF("batchIndexPrompt")
+
   test("Basic Usage") {
+    testCompletion(promptCompletion, promptDF)
+  }
+
+  test("Batch Prompt") {
+    testCompletion(batchPromptCompletion, batchPromptDF)
+  }
+
+  test("Index Prompt") {
+    testCompletion(indexPromptCompletion, indexPromptDF)
+  }
+
+  test("Batch Index Prompt") {
+    testCompletion(batchIndexPromptCompletion, batchIndexPromptDF)
+  }
+
+  def testCompletion(completion: OpenAICompletion, df: DataFrame, requiredLength: Int = 10): Unit = {
     val fromRow = CompletionResponse.makeFromRowConverter
-//    completion.transform(df).collect().map(r =>
-//      fromRow(r.getAs[Row]("out")).choices.head.text.length > 10)
-    completion.transform(df).show(truncate=false)
+    completion.transform(df).collect().map(r =>
+      fromRow(r.getAs[Row]("out")).choices.map(c =>
+        assert(c.text.length > requiredLength)))
+  }
+
+  def newCompletion(): OpenAICompletion = {
+    new OpenAICompletion()
+      .setSubscriptionKey(openAIAPIKey)
+      .setDeploymentName("text-davinci-001")
+      .setServiceName(openAIServiceName)
+      .setMaxTokens(20)
+      .setLogProbs(5)
+      .setOutputCol("out")
   }
 
   override def assertDFEq(df1: DataFrame, df2: DataFrame)(implicit eq: Equality[DataFrame]): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
index a08ffde5bd..d543e7027d 100644
--- a/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
+++ b/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
@@ -4,6 +4,7 @@
 package org.apache.spark.ml.param
 
 import org.apache.spark.annotation.DeveloperApi
+import spray.json.DefaultJsonProtocol.{listFormat, mapFormat, seqFormat}
 import spray.json.{DefaultJsonProtocol, JsValue, JsonFormat}
 import scala.collection.JavaConverters._
 import spray.json._
@@ -17,6 +18,8 @@ object AnyJsonFormat extends DefaultJsonProtocol {
         case v: String => v.toJson
         case v: Boolean => v.toJson
         case v: Integer => v.toLong.toJson
+        case v: Seq[_] => seqFormat[Any].write(v)
+        case v: Map[String, _] => mapFormat[String, Any].write(v)
         case _ => throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}")
       }
 
@@ -31,6 +34,8 @@ object AnyJsonFormat extends DefaultJsonProtocol {
           }
         case v: JsString => v.value
         case v: JsBoolean => v.value
+        case v: JsArray => listFormat[Any].read(value)
+        case v: JsObject => mapFormat[String, Any].read(value)
         case _ => throw new IllegalArgumentException(s"Cannot deserialize ${value}")
       }
     }

From 54b001f6644d83a6912df18db7d0770b79275d77 Mon Sep 17 00:00:00 2001
From: Mark Niehaus <manieh@microsoft.com>
Date: Fri, 24 Jun 2022 15:08:42 -0700
Subject: [PATCH 3/3] delete debugging code

---
 .../azure/synapse/ml/cognitive/split1/OpenAISuite.scala     | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
index ffd906a86d..034a96d438 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala
@@ -82,13 +82,9 @@ class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with Op
 
   def testCompletion(completion: OpenAICompletion, df: DataFrame, requiredLength: Int = 10): Unit = {
     val fromRow = CompletionResponse.makeFromRowConverter
-    val t = completion.transform(df)
-    t.collect().map(r =>
+    completion.transform(df).collect().map(r =>
       fromRow(r.getAs[Row]("out")).choices.map(c =>
         assert(c.text.length > requiredLength)))
-    /*completion.transform(df).collect().map(r =>
-      fromRow(r.getAs[Row]("out")).choices.map(c =>
-        assert(c.text.length > requiredLength)))*/
   }
 
   def newCompletion(): OpenAICompletion = {