From 9b616dd566a67734ce20bcf8da1e99a4b7ed92b2 Mon Sep 17 00:00:00 2001 From: Mark Niehaus Date: Thu, 12 May 2022 14:03:12 -0700 Subject: [PATCH 1/3] remove unused imports --- .../azure/synapse/ml/cognitive/split1/OpenAISuite.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala index aa770ad423..df3b3058a8 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala @@ -5,12 +5,9 @@ package com.microsoft.azure.synapse.ml.cognitive.split1 import com.microsoft.azure.synapse.ml.Secrets import com.microsoft.azure.synapse.ml.cognitive._ -import com.microsoft.azure.synapse.ml.core.spark.FluentAPI._ import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} -import org.apache.spark.ml.NamespaceInjections.pipelineModel import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.functions.typedLit import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.scalactic.Equality From ad733ec5ec883d8e0a6a584b164401215154e99b Mon Sep 17 00:00:00 2001 From: Mark Niehaus Date: Tue, 21 Jun 2022 16:34:50 -0700 Subject: [PATCH 2/3] batch prompts --- .../azure/synapse/ml/cognitive/OpenAI.scala | 63 +++++++++++++++-- .../ml/cognitive/split1/OpenAISuite.scala | 67 +++++++++++++++++-- .../spark/ml/param/UntypedArrayParam.scala | 5 ++ 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala index 719e607803..9f91750959 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/cognitive/OpenAI.scala @@ -34,7 +34,7 @@ trait HasSetServiceName extends Wrappable with HasURL { trait HasPrompt extends HasServiceParams { val prompt: ServiceParam[String] = new ServiceParam[String]( - this, "prompt", "The text to complete", isRequired = true) + this, "prompt", "The text to complete", isRequired = false) def getPrompt: String = getScalarParam(prompt) @@ -45,6 +45,45 @@ trait HasPrompt extends HasServiceParams { def setPromptCol(v: String): this.type = setVectorParam(prompt, v) } +trait HasBatchPrompt extends HasServiceParams { + val batchPrompt: ServiceParam[Seq[String]] = new ServiceParam[Seq[String]]( + this, "batchPrompt", "Sequence of prompts to complete", isRequired = false) + + def getBatchPrompt: Seq[String] = getScalarParam(batchPrompt) + + def setBatchPrompt(v: Seq[String]): this.type = setScalarParam(batchPrompt, v) + + def getBatchPromptCol: String = getVectorParam(batchPrompt) + + def setBatchPromptCol(v: String): this.type = setVectorParam(batchPrompt, v) +} + +trait HasIndexPrompt extends HasServiceParams { + val indexPrompt: ServiceParam[Seq[Int]] = new ServiceParam[Seq[Int]]( + this, "indexPrompt", "Sequence of indexes to complete", isRequired = false) + + def getIndexPrompt: Seq[Int] = getScalarParam(indexPrompt) + + def setIndexPrompt(v: Seq[Int]): this.type = setScalarParam(indexPrompt, v) + + def getIndexPromptCol: String = getVectorParam(indexPrompt) + + def setIndexPromptCol(v: String): this.type = setVectorParam(indexPrompt, v) +} + +trait HasBatchIndexPrompt extends HasServiceParams { + val batchIndexPrompt: ServiceParam[Seq[Seq[Int]]] = new ServiceParam[Seq[Seq[Int]]]( + this, "batchIndexPrompt", "Sequence of index sequences to complete", isRequired = false) + + def getBatchIndexPrompt: Seq[Seq[Int]] = getScalarParam(batchIndexPrompt) + + def setBatchIndexPrompt(v: Seq[Seq[Int]]): this.type = setScalarParam(batchIndexPrompt, v) + + def getBatchIndexPromptCol: String = getVectorParam(batchIndexPrompt) + + def setBatchIndexPromptCol(v: String): this.type = setVectorParam(batchIndexPrompt, v) +} + trait HasAPIVersion extends HasServiceParams { val apiVersion: ServiceParam[String] = new ServiceParam[String]( this, "apiVersion", "version of the api", isRequired = true, isURLParam = true) { @@ -93,7 +132,8 @@ trait HasMaxTokens extends HasServiceParams { } trait HasOpenAIParams extends HasServiceParams - with HasSetServiceName with HasPrompt with HasAPIVersion with HasDeploymentName with HasMaxTokens { + with HasSetServiceName with HasPrompt with HasBatchPrompt with HasIndexPrompt with HasBatchIndexPrompt + with HasAPIVersion with HasDeploymentName with HasMaxTokens { val temperature: ServiceParam[Double] = new ServiceParam[Double]( this, "temperature", @@ -299,14 +339,25 @@ class OpenAICompletion(override val uid: String) extends CognitiveServicesBase(u getValueOpt(r, logProbs).map(v => ("logprobs", v)) ).flatten).toMap - getValueOpt(r, prompt).map { prompt => - val fullPayload = optionalParams.updated("prompt", prompt) - new StringEntity(fullPayload.toJson.compactPrint, ContentType.APPLICATION_JSON) - } + getValueOpt(r, prompt) + .map(prompt => getStringEntity(prompt, optionalParams)) + .orElse(getValueOpt(r, batchPrompt) + .map(batchPrompt => getStringEntity(batchPrompt, optionalParams))) + .orElse(getValueOpt(r, indexPrompt) + .map(indexPrompt => getStringEntity(indexPrompt, optionalParams))) + .orElse(getValueOpt(r, batchIndexPrompt) + .map(batchIndexPrompt => getStringEntity(batchIndexPrompt, optionalParams))) + .orElse(throw new IllegalArgumentException( + "Please set one of prompt, batchPrompt, indexPrompt or batchIndexPrompt.")) } override val subscriptionKeyHeaderName: String = "api-key" override def responseDataType: DataType = CompletionResponse.schema + private[this] def getStringEntity[A](prompt: A, optionalParams: Map[String, Any]): StringEntity = { + val fullPayload = optionalParams.updated("prompt", prompt) + new StringEntity(fullPayload.toJson.compactPrint, ContentType.APPLICATION_JSON) + } + } diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala index df3b3058a8..88a0fc592d 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala @@ -5,15 +5,15 @@ package com.microsoft.azure.synapse.ml.cognitive.split1 import com.microsoft.azure.synapse.ml.Secrets import com.microsoft.azure.synapse.ml.cognitive._ -import com.microsoft.azure.synapse.ml.core.test.base.{Flaky, TestBase} +import com.microsoft.azure.synapse.ml.core.test.base.Flaky import com.microsoft.azure.synapse.ml.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.{DataFrame, Dataset, Row} +import org.apache.spark.sql.{DataFrame, Row} import org.scalactic.Equality trait OpenAIAPIKey { lazy val openAIAPIKey: String = sys.env.getOrElse("OPENAI_API_KEY", Secrets.OpenAIApiKey) - lazy val openAIServiceName: String = "bugbashtest6" + lazy val openAIServiceName: String = "m3test11" } class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with OpenAIAPIKey with Flaky { @@ -29,17 +29,72 @@ class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with Op .setPromptCol("prompt") .setOutputCol("out") + lazy val promptCompletion: OpenAICompletion = newCompletion.setPromptCol("prompt") + lazy val batchPromptCompletion: OpenAICompletion = newCompletion.setBatchPromptCol("batchPrompt") + lazy val indexPromptCompletion: OpenAICompletion = newCompletion.setIndexPromptCol("indexPrompt") + lazy val batchIndexPromptCompletion: OpenAICompletion = newCompletion.setBatchIndexPromptCol("batchIndexPrompt") + lazy val df: DataFrame = Seq( "Once upon a time", "Best programming language award goes to", "SynapseML is " ).toDF("prompt") + lazy val promptDF: DataFrame = Seq( + "Once upon a time", + "Best programming language award goes to", + "SynapseML is " + ).toDF("prompt") + + lazy val batchPromptDF: DataFrame = Seq( + Seq( + "This is a test", + "Now is the time", + "Knock, knock") + ).toDF("batchPrompt") + + lazy val indexPromptDF: DataFrame = Seq( + Seq(3, 1, 5, 4) + ).toDF("indexPrompt") + + lazy val batchIndexPromptDF: DataFrame = Seq( + Seq( + Seq(1, 8, 4, 2), + Seq(7, 3, 8, 5, 9), + Seq(8, 0, 11, 3, 14, 1)) + ).toDF("batchIndexPrompt") + test("Basic Usage") { + testCompletion(promptCompletion, promptDF) + } + + test("Batch Prompt") { + testCompletion(batchPromptCompletion, batchPromptDF) + } + + test("Index Prompt") { + testCompletion(indexPromptCompletion, indexPromptDF) + } + + test("Batch Index Prompt") { + testCompletion(batchIndexPromptCompletion, batchIndexPromptDF) + } + + def testCompletion(completion: OpenAICompletion, df: DataFrame, requiredLength: Int = 10): Unit = { val fromRow = CompletionResponse.makeFromRowConverter -// completion.transform(df).collect().map(r => -// fromRow(r.getAs[Row]("out")).choices.head.text.length > 10) - completion.transform(df).show(truncate=false) + completion.transform(df).collect().map(r => + fromRow(r.getAs[Row]("out")).choices.map(c => + assert(c.text.length > requiredLength))) + } + + def newCompletion(): OpenAICompletion = { + new OpenAICompletion() + .setSubscriptionKey(openAIAPIKey) + .setDeploymentName("text-davinci-001") + .setServiceName(openAIServiceName) + .setMaxTokens(20) + .setLogProbs(5) + .setOutputCol("out") } override def assertDFEq(df1: DataFrame, df2: DataFrame)(implicit eq: Equality[DataFrame]): Unit = { diff --git a/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala index a08ffde5bd..d543e7027d 100644 --- a/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala +++ b/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala @@ -4,6 +4,7 @@ package org.apache.spark.ml.param import org.apache.spark.annotation.DeveloperApi +import spray.json.DefaultJsonProtocol.{listFormat, mapFormat, seqFormat} import spray.json.{DefaultJsonProtocol, JsValue, JsonFormat} import scala.collection.JavaConverters._ import spray.json._ @@ -17,6 +18,8 @@ object AnyJsonFormat extends DefaultJsonProtocol { case v: String => v.toJson case v: Boolean => v.toJson case v: Integer => v.toLong.toJson + case v: Seq[_] => seqFormat[Any].write(v) + case v: Map[String, _] => mapFormat[String, Any].write(v) case _ => throw new IllegalArgumentException(s"Cannot serialize ${any} of type ${any.getClass}") } @@ -31,6 +34,8 @@ object AnyJsonFormat extends DefaultJsonProtocol { } case v: JsString => v.value case v: JsBoolean => v.value + case v: JsArray => listFormat[Any].read(value) + case v: JsObject => mapFormat[String, Any].read(value) case _ => throw new IllegalArgumentException(s"Cannot deserialize ${value}") } } From 54b001f6644d83a6912df18db7d0770b79275d77 Mon Sep 17 00:00:00 2001 From: Mark Niehaus Date: Fri, 24 Jun 2022 15:08:42 -0700 Subject: [PATCH 3/3] delete debugging code --- .../azure/synapse/ml/cognitive/split1/OpenAISuite.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala index ffd906a86d..034a96d438 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/cognitive/split1/OpenAISuite.scala @@ -82,13 +82,9 @@ class OpenAICompletionSuite extends TransformerFuzzing[OpenAICompletion] with Op def testCompletion(completion: OpenAICompletion, df: DataFrame, requiredLength: Int = 10): Unit = { val fromRow = CompletionResponse.makeFromRowConverter - val t = completion.transform(df) - t.collect().map(r => + completion.transform(df).collect().map(r => fromRow(r.getAs[Row]("out")).choices.map(c => assert(c.text.length > requiredLength))) - /*completion.transform(df).collect().map(r => - fromRow(r.getAs[Row]("out")).choices.map(c => - assert(c.text.length > requiredLength)))*/ } def newCompletion(): OpenAICompletion = {