WIP

microsoft · Mar 9, 2021 · e047df3 · e047df3
1 parent ac7c924
commit e047df3
Show file tree

Hide file tree

Showing 85 changed files with 384 additions and 361 deletions.
diff --git a/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala b/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala
@@ -18,7 +18,7 @@ class VerifyFindBestModel extends EstimatorFuzzing[FindBestModel]{
   val mockLabelColumn = "Label"
 
   def createMockDataset: DataFrame = {
-    session.createDataFrame(Seq(
+    spark.createDataFrame(Seq(
       (0, 2, 0.50, 0.60, 0),
       (1, 3, 0.40, 0.50, 1),
       (0, 4, 0.78, 0.99, 2),

diff --git a/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala b/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala
@@ -23,7 +23,7 @@ class VerifyTuneHyperparameters extends Benchmarks {
 
   val mockLabelColumn = "Label"
   def createMockDataset: DataFrame = {
-    session.createDataFrame(Seq(
+    spark.createDataFrame(Seq(
       (0, 2, 0.50, 0.60, 0),
       (1, 3, 0.40, 0.50, 1),
       (0, 4, 0.78, 0.99, 2),
@@ -65,15 +65,15 @@ class VerifyTuneHyperparameters extends Benchmarks {
   verifyBinaryCsv("bank.train.csv",                   "y", 2, false)
   verifyBinaryCsv("TelescopeData.csv",                " Class", 2, false)
 
-  test("Compare benchmark results file to generated file", TestBase.Extended) {
+  test("Compare benchmark results file to generated file") {
     verifyBenchmarks()
   }
 
   def verifyBinaryCsv(fileName: String,
                       labelCol: String,
                       decimals: Int,
                       includeNaiveBayes: Boolean): Unit = {
-    test("Verify classifier can be trained and scored on " + fileName, TestBase.Extended) {
+    test("Verify classifier can be trained and scored on " + fileName) {
       val fileLocation = DatasetUtils.binaryTrainFile(fileName).toString
       val bestModel = tuneDataset(fileName, labelCol, fileLocation, true, includeNaiveBayes)
       val bestMetric = bestModel.bestMetric
@@ -85,7 +85,7 @@ class VerifyTuneHyperparameters extends Benchmarks {
                           labelCol: String,
                           decimals: Int,
                           includeNaiveBayes: Boolean): Unit = {
-    test("Verify classifier can be trained and scored on multiclass " + fileName, TestBase.Extended) {
+    test("Verify classifier can be trained and scored on multiclass " + fileName) {
       val fileLocation = DatasetUtils.multiclassTrainFile(fileName).toString
       val bestModel = tuneDataset(fileName, labelCol, fileLocation, false, includeNaiveBayes)
       val bestMetric = bestModel.bestMetric
@@ -100,7 +100,7 @@ class VerifyTuneHyperparameters extends Benchmarks {
                   includeNaiveBayes: Boolean): TuneHyperparametersModel = {
     // TODO: Add other file types for testing
     val dataset: DataFrame =
-    session.read.format("com.databricks.spark.csv")
+    spark.read.format("com.databricks.spark.csv")
       .option("header", "true").option("inferSchema", "true")
       .option("treatEmptyValuesAsNulls", "false")
       .option("delimiter", if (fileName.endsWith(".csv")) "," else "\t")

diff --git a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala b/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
@@ -26,7 +26,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
   // TODO: Move away from getTempDirectoryPath and have TestBase provide one
 
   def testModel(minibatchSize: Int = 10): CNTKModel = {
-    session // make sure session is loaded
+    spark // make sure session is loaded
     new CNTKModel()
       .setModelLocation(modelPath)
       .setInputCol(inputCol)
@@ -45,7 +45,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
   }
 
   def testModelDouble(minibatchSize: Int = 10): CNTKModel = {
-    session // make sure session is loaded
+    spark // make sure session is loaded
     new CNTKModel()
       .setModelLocation(doubleModelFile.toString)
       .setInputCol(inputCol)
@@ -54,9 +54,9 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
       .setOutputNodeIndex(0)
   }
 
-  lazy val images = testImages(session)
+  lazy val images = testImages(spark)
 
-  import session.implicits._
+  import spark.implicits._
 
   private def checkParameters(minibatchSize: Int) = {
     val model = testModel(minibatchSize)
@@ -66,7 +66,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
 
   test("A CNTK model should be able to support setting the input and output node") {
     val model = testModel().setInputNodeIndex(0)
-    val data = makeFakeData(session, 30, featureVectorLength)
+    val data = makeFakeData(spark, 30, featureVectorLength)
     val result = model.transform(data)
     assert(result.select(outputCol).count() == 30)
   }
@@ -78,7 +78,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
       .setOutputCol(outputCol)
       .setOutputNode("z")
 
-    val data = makeFakeData(session, 10, featureVectorLength).coalesce(1)
+    val data = makeFakeData(spark, 10, featureVectorLength).coalesce(1)
     val result = model.transform(data)
     assert(result.select(outputCol).collect()(0).getAs[DenseVector](0).size == 10)
     assert(result.select(outputCol).count() == 10)
@@ -91,14 +91,14 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
       .setOutputNode("nonexistant-node")
       .setModelLocation(modelPath)
 
-    val data = makeFakeData(session, 3, featureVectorLength)
+    val data = makeFakeData(spark, 3, featureVectorLength)
     intercept[IllegalArgumentException] {
       model.transform(data).collect()
     }
   }
 
   def testCNN(model: CNTKModel, doubleInput: Boolean, shape: Int = featureVectorLength): Unit = {
-    val data = makeFakeData(session, 3, shape, doubleInput)
+    val data = makeFakeData(spark, 3, shape, doubleInput)
     val result = model.transform(data)
     assert(result.select(outputCol).collect()(0).getAs[DenseVector](0).size == 10)
     assert(result.count() == 3)
@@ -138,7 +138,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
 
   test("A CNTK model should output Vectors and interop with other estimators") {
     val model = testModel()
-    val data = makeFakeData(session, 3, featureVectorLength, outputDouble = true)
+    val data = makeFakeData(spark, 3, featureVectorLength, outputDouble = true)
     val result = model.transform(data)
     assert(result.select(outputCol).schema.fields(0).dataType == VectorType)
 
@@ -163,7 +163,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
   }
 
   test("A CNTK model should work on an empty dataframe") {
-    val images = session.createDataFrame(
+    val images = spark.createDataFrame(
       sc.emptyRDD[Row], new StructType().add(inputCol, ArrayType(FloatType, false)))
     val model = testModel()
     val result = model.transform(images)

diff --git a/src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala b/src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala
@@ -78,10 +78,7 @@ object TestGen {
         ltc.genTestClass()
       } catch {
         case _: NotImplementedError =>
-          println(s"Could not generate test for ${ltc.testClassName} because of Complex Parameters")
-        case e: Throwable =>
-          println(s"Could not generate test for ${ltc.testClassName} because:")
-          e.printStackTrace()
+          println(s"ERROR: Could not generate test for ${ltc.testClassName} because of Complex Parameters")
       }
     }
   }

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala
@@ -17,7 +17,7 @@ trait AnomalyKey {
 
 trait AnomalyDetectorSuiteBase extends TestBase with AnomalyKey{
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("1972-01-01T00:00:00Z", 826.0),
@@ -144,7 +144,7 @@ class SimpleDetectAnomaliesSuite extends TransformerFuzzing[SimpleDetectAnomalie
     ("1973-03-01T00:00:00Z", 9000.0)
   )
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val sdf: DataFrame = baseSeq.map(p => (p._1,p._2,1.0))
     .++(baseSeq.map(p => (p._1,p._2,2.0)))

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala
@@ -22,7 +22,7 @@ trait CognitiveKey {
 
 trait OCRUtils extends TestBase {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
@@ -91,7 +91,7 @@ class OCRSuite extends TransformerFuzzing[OCR] with CognitiveKey with Flaky with
 
 class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] with CognitiveKey with Flaky {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
@@ -306,7 +306,7 @@ class ReadSuite extends TransformerFuzzing[Read]
 class RecognizeDomainSpecificContentSuite extends TransformerFuzzing[RecognizeDomainSpecificContent]
   with CognitiveKey with Flaky {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
@@ -362,7 +362,7 @@ class RecognizeDomainSpecificContentSuite extends TransformerFuzzing[RecognizeDo
 class GenerateThumbnailsSuite extends TransformerFuzzing[GenerateThumbnails]
   with CognitiveKey with Flaky {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
@@ -405,7 +405,7 @@ class GenerateThumbnailsSuite extends TransformerFuzzing[GenerateThumbnails]
 
 class TagImageSuite extends TransformerFuzzing[TagImage] with CognitiveKey with Flaky {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
@@ -461,7 +461,7 @@ class TagImageSuite extends TransformerFuzzing[TagImage] with CognitiveKey with
 class DescribeImageSuite extends TransformerFuzzing[DescribeImage]
   with CognitiveKey with Flaky {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala
@@ -15,7 +15,7 @@ import org.scalatest.Assertion
 
 class DetectFaceSuite extends TransformerFuzzing[DetectFace] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
@@ -56,7 +56,7 @@ class DetectFaceSuite extends TransformerFuzzing[DetectFace] with CognitiveKey {
 
 class FindSimilarFaceSuite extends TransformerFuzzing[FindSimilarFace] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
@@ -106,7 +106,7 @@ class FindSimilarFaceSuite extends TransformerFuzzing[FindSimilarFace] with Cogn
 
 class GroupFacesSuite extends TransformerFuzzing[GroupFaces] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
@@ -155,7 +155,7 @@ class GroupFacesSuite extends TransformerFuzzing[GroupFaces] with CognitiveKey {
 
 class IdentifyFacesSuite extends TransformerFuzzing[IdentifyFaces] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val satyaFaces = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
@@ -236,7 +236,7 @@ class IdentifyFacesSuite extends TransformerFuzzing[IdentifyFaces] with Cognitiv
 
 class VerifyFacesSuite extends TransformerFuzzing[VerifyFaces] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala
@@ -18,7 +18,7 @@ trait HasImageSearchKey {
 class ImageSearchSuite extends TransformerFuzzing[BingImageSearch]
   with HasImageSearchKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val offsets: Seq[Int] = (0 to 1).map(_ * 10)
   lazy val searchQueries = List("Elephant", "African Elephant",

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala
@@ -19,7 +19,7 @@ trait TextKey {
 
 class LanguageDetectorSuite extends TransformerFuzzing[LanguageDetectorV2] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "Hello World",
@@ -72,7 +72,7 @@ class LanguageDetectorSuite extends TransformerFuzzing[LanguageDetectorV2] with
 
 class LanguageDetectorV3Suite extends TransformerFuzzing[LanguageDetector] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     "Hello World",
@@ -104,7 +104,7 @@ class LanguageDetectorV3Suite extends TransformerFuzzing[LanguageDetector] with
 
 class EntityDetectorSuite extends TransformerFuzzing[EntityDetectorV2] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("1", "Microsoft released Windows 10"),
@@ -134,7 +134,7 @@ class EntityDetectorSuite extends TransformerFuzzing[EntityDetectorV2] with Text
 
 class EntityDetectorSuiteV3 extends TransformerFuzzing[EntityDetector] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("1", "Microsoft released Windows 10"),
@@ -164,7 +164,7 @@ class EntityDetectorSuiteV3 extends TransformerFuzzing[EntityDetector] with Text
 }
 
 trait TextSentimentBaseSuite extends TestBase with TextKey {
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("en", "Hello world. This is some input text that I love."),
@@ -253,7 +253,7 @@ class TextSentimentSuite extends TransformerFuzzing[TextSentimentV2] with TextSe
 
 class KeyPhraseExtractorSuite extends TransformerFuzzing[KeyPhraseExtractorV2] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("en", "Hello world. This is some input text that I love."),
@@ -287,7 +287,7 @@ class KeyPhraseExtractorSuite extends TransformerFuzzing[KeyPhraseExtractorV2] w
 
 class KeyPhraseExtractorV3Suite extends TransformerFuzzing[KeyPhraseExtractor] with TextKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("en", "Hello world. This is some input text that I love."),
@@ -320,7 +320,7 @@ class KeyPhraseExtractorV3Suite extends TransformerFuzzing[KeyPhraseExtractor] w
 }
 
 class NERSuite extends TransformerFuzzing[NERV2] with TextKey {
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("1", "en", "Jeff bought three dozen eggs because there was a 50% discount."),
@@ -359,7 +359,7 @@ class NERSuite extends TransformerFuzzing[NERV2] with TextKey {
 }
 
 class NERSuiteV3 extends TransformerFuzzing[NER] with TextKey {
-  import session.implicits._
+  import spark.implicits._
 
   lazy val df: DataFrame = Seq(
     ("1", "en", "I had a wonderful trip to Seattle last week."),

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala
@@ -25,7 +25,7 @@ trait AzureSearchKey {
 class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister
   with TransformerFuzzing[AddDocuments] with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   private val testServiceName = "mmlspark-azure-search"
 

diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala b/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala
@@ -16,7 +16,7 @@ import org.scalactic.Equality
 class SpeechToTextSuite extends TransformerFuzzing[SpeechToText]
   with CognitiveKey {
 
-  import session.implicits._
+  import spark.implicits._
 
   val region = "eastus"
   val resourcesDir = System.getProperty("user.dir") + "/src/test/resources/"