Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
mhamilton723 committed Mar 9, 2021
1 parent ac7c924 commit e047df3
Show file tree
Hide file tree
Showing 85 changed files with 384 additions and 361 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class VerifyFindBestModel extends EstimatorFuzzing[FindBestModel]{
val mockLabelColumn = "Label"

def createMockDataset: DataFrame = {
session.createDataFrame(Seq(
spark.createDataFrame(Seq(
(0, 2, 0.50, 0.60, 0),
(1, 3, 0.40, 0.50, 1),
(0, 4, 0.78, 0.99, 2),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class VerifyTuneHyperparameters extends Benchmarks {

val mockLabelColumn = "Label"
def createMockDataset: DataFrame = {
session.createDataFrame(Seq(
spark.createDataFrame(Seq(
(0, 2, 0.50, 0.60, 0),
(1, 3, 0.40, 0.50, 1),
(0, 4, 0.78, 0.99, 2),
Expand Down Expand Up @@ -65,15 +65,15 @@ class VerifyTuneHyperparameters extends Benchmarks {
verifyBinaryCsv("bank.train.csv", "y", 2, false)
verifyBinaryCsv("TelescopeData.csv", " Class", 2, false)

test("Compare benchmark results file to generated file", TestBase.Extended) {
test("Compare benchmark results file to generated file") {
verifyBenchmarks()
}

def verifyBinaryCsv(fileName: String,
labelCol: String,
decimals: Int,
includeNaiveBayes: Boolean): Unit = {
test("Verify classifier can be trained and scored on " + fileName, TestBase.Extended) {
test("Verify classifier can be trained and scored on " + fileName) {
val fileLocation = DatasetUtils.binaryTrainFile(fileName).toString
val bestModel = tuneDataset(fileName, labelCol, fileLocation, true, includeNaiveBayes)
val bestMetric = bestModel.bestMetric
Expand All @@ -85,7 +85,7 @@ class VerifyTuneHyperparameters extends Benchmarks {
labelCol: String,
decimals: Int,
includeNaiveBayes: Boolean): Unit = {
test("Verify classifier can be trained and scored on multiclass " + fileName, TestBase.Extended) {
test("Verify classifier can be trained and scored on multiclass " + fileName) {
val fileLocation = DatasetUtils.multiclassTrainFile(fileName).toString
val bestModel = tuneDataset(fileName, labelCol, fileLocation, false, includeNaiveBayes)
val bestMetric = bestModel.bestMetric
Expand All @@ -100,7 +100,7 @@ class VerifyTuneHyperparameters extends Benchmarks {
includeNaiveBayes: Boolean): TuneHyperparametersModel = {
// TODO: Add other file types for testing
val dataset: DataFrame =
session.read.format("com.databricks.spark.csv")
spark.read.format("com.databricks.spark.csv")
.option("header", "true").option("inferSchema", "true")
.option("treatEmptyValuesAsNulls", "false")
.option("delimiter", if (fileName.endsWith(".csv")) "," else "\t")
Expand Down
20 changes: 10 additions & 10 deletions src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
// TODO: Move away from getTempDirectoryPath and have TestBase provide one

def testModel(minibatchSize: Int = 10): CNTKModel = {
session // make sure session is loaded
spark // make sure session is loaded
new CNTKModel()
.setModelLocation(modelPath)
.setInputCol(inputCol)
Expand All @@ -45,7 +45,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
}

def testModelDouble(minibatchSize: Int = 10): CNTKModel = {
session // make sure session is loaded
spark // make sure session is loaded
new CNTKModel()
.setModelLocation(doubleModelFile.toString)
.setInputCol(inputCol)
Expand All @@ -54,9 +54,9 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
.setOutputNodeIndex(0)
}

lazy val images = testImages(session)
lazy val images = testImages(spark)

import session.implicits._
import spark.implicits._

private def checkParameters(minibatchSize: Int) = {
val model = testModel(minibatchSize)
Expand All @@ -66,7 +66,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin

test("A CNTK model should be able to support setting the input and output node") {
val model = testModel().setInputNodeIndex(0)
val data = makeFakeData(session, 30, featureVectorLength)
val data = makeFakeData(spark, 30, featureVectorLength)
val result = model.transform(data)
assert(result.select(outputCol).count() == 30)
}
Expand All @@ -78,7 +78,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
.setOutputCol(outputCol)
.setOutputNode("z")

val data = makeFakeData(session, 10, featureVectorLength).coalesce(1)
val data = makeFakeData(spark, 10, featureVectorLength).coalesce(1)
val result = model.transform(data)
assert(result.select(outputCol).collect()(0).getAs[DenseVector](0).size == 10)
assert(result.select(outputCol).count() == 10)
Expand All @@ -91,14 +91,14 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
.setOutputNode("nonexistant-node")
.setModelLocation(modelPath)

val data = makeFakeData(session, 3, featureVectorLength)
val data = makeFakeData(spark, 3, featureVectorLength)
intercept[IllegalArgumentException] {
model.transform(data).collect()
}
}

def testCNN(model: CNTKModel, doubleInput: Boolean, shape: Int = featureVectorLength): Unit = {
val data = makeFakeData(session, 3, shape, doubleInput)
val data = makeFakeData(spark, 3, shape, doubleInput)
val result = model.transform(data)
assert(result.select(outputCol).collect()(0).getAs[DenseVector](0).size == 10)
assert(result.count() == 3)
Expand Down Expand Up @@ -138,7 +138,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin

test("A CNTK model should output Vectors and interop with other estimators") {
val model = testModel()
val data = makeFakeData(session, 3, featureVectorLength, outputDouble = true)
val data = makeFakeData(spark, 3, featureVectorLength, outputDouble = true)
val result = model.transform(data)
assert(result.select(outputCol).schema.fields(0).dataType == VectorType)

Expand All @@ -163,7 +163,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
}

test("A CNTK model should work on an empty dataframe") {
val images = session.createDataFrame(
val images = spark.createDataFrame(
sc.emptyRDD[Row], new StructType().add(inputCol, ArrayType(FloatType, false)))
val model = testModel()
val result = model.transform(images)
Expand Down
5 changes: 1 addition & 4 deletions src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,7 @@ object TestGen {
ltc.genTestClass()
} catch {
case _: NotImplementedError =>
println(s"Could not generate test for ${ltc.testClassName} because of Complex Parameters")
case e: Throwable =>
println(s"Could not generate test for ${ltc.testClassName} because:")
e.printStackTrace()
println(s"ERROR: Could not generate test for ${ltc.testClassName} because of Complex Parameters")
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ trait AnomalyKey {

trait AnomalyDetectorSuiteBase extends TestBase with AnomalyKey{

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("1972-01-01T00:00:00Z", 826.0),
Expand Down Expand Up @@ -144,7 +144,7 @@ class SimpleDetectAnomaliesSuite extends TransformerFuzzing[SimpleDetectAnomalie
("1973-03-01T00:00:00Z", 9000.0)
)

import session.implicits._
import spark.implicits._

lazy val sdf: DataFrame = baseSeq.map(p => (p._1,p._2,1.0))
.++(baseSeq.map(p => (p._1,p._2,2.0)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ trait CognitiveKey {

trait OCRUtils extends TestBase {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
Expand Down Expand Up @@ -91,7 +91,7 @@ class OCRSuite extends TransformerFuzzing[OCR] with CognitiveKey with Flaky with

class AnalyzeImageSuite extends TransformerFuzzing[AnalyzeImage] with CognitiveKey with Flaky {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
Expand Down Expand Up @@ -306,7 +306,7 @@ class ReadSuite extends TransformerFuzzing[Read]
class RecognizeDomainSpecificContentSuite extends TransformerFuzzing[RecognizeDomainSpecificContent]
with CognitiveKey with Flaky {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
Expand Down Expand Up @@ -362,7 +362,7 @@ class RecognizeDomainSpecificContentSuite extends TransformerFuzzing[RecognizeDo
class GenerateThumbnailsSuite extends TransformerFuzzing[GenerateThumbnails]
with CognitiveKey with Flaky {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
Expand Down Expand Up @@ -405,7 +405,7 @@ class GenerateThumbnailsSuite extends TransformerFuzzing[GenerateThumbnails]

class TagImageSuite extends TransformerFuzzing[TagImage] with CognitiveKey with Flaky {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
Expand Down Expand Up @@ -461,7 +461,7 @@ class TagImageSuite extends TransformerFuzzing[TagImage] with CognitiveKey with
class DescribeImageSuite extends TransformerFuzzing[DescribeImage]
with CognitiveKey with Flaky {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import org.scalatest.Assertion

class DetectFaceSuite extends TransformerFuzzing[DetectFace] with CognitiveKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
Expand Down Expand Up @@ -56,7 +56,7 @@ class DetectFaceSuite extends TransformerFuzzing[DetectFace] with CognitiveKey {

class FindSimilarFaceSuite extends TransformerFuzzing[FindSimilarFace] with CognitiveKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
Expand Down Expand Up @@ -106,7 +106,7 @@ class FindSimilarFaceSuite extends TransformerFuzzing[FindSimilarFace] with Cogn

class GroupFacesSuite extends TransformerFuzzing[GroupFaces] with CognitiveKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
Expand Down Expand Up @@ -155,7 +155,7 @@ class GroupFacesSuite extends TransformerFuzzing[GroupFaces] with CognitiveKey {

class IdentifyFacesSuite extends TransformerFuzzing[IdentifyFaces] with CognitiveKey {

import session.implicits._
import spark.implicits._

lazy val satyaFaces = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
Expand Down Expand Up @@ -236,7 +236,7 @@ class IdentifyFacesSuite extends TransformerFuzzing[IdentifyFaces] with Cognitiv

class VerifyFacesSuite extends TransformerFuzzing[VerifyFaces] with CognitiveKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ trait HasImageSearchKey {
class ImageSearchSuite extends TransformerFuzzing[BingImageSearch]
with HasImageSearchKey {

import session.implicits._
import spark.implicits._

lazy val offsets: Seq[Int] = (0 to 1).map(_ * 10)
lazy val searchQueries = List("Elephant", "African Elephant",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ trait TextKey {

class LanguageDetectorSuite extends TransformerFuzzing[LanguageDetectorV2] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"Hello World",
Expand Down Expand Up @@ -72,7 +72,7 @@ class LanguageDetectorSuite extends TransformerFuzzing[LanguageDetectorV2] with

class LanguageDetectorV3Suite extends TransformerFuzzing[LanguageDetector] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
"Hello World",
Expand Down Expand Up @@ -104,7 +104,7 @@ class LanguageDetectorV3Suite extends TransformerFuzzing[LanguageDetector] with

class EntityDetectorSuite extends TransformerFuzzing[EntityDetectorV2] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("1", "Microsoft released Windows 10"),
Expand Down Expand Up @@ -134,7 +134,7 @@ class EntityDetectorSuite extends TransformerFuzzing[EntityDetectorV2] with Text

class EntityDetectorSuiteV3 extends TransformerFuzzing[EntityDetector] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("1", "Microsoft released Windows 10"),
Expand Down Expand Up @@ -164,7 +164,7 @@ class EntityDetectorSuiteV3 extends TransformerFuzzing[EntityDetector] with Text
}

trait TextSentimentBaseSuite extends TestBase with TextKey {
import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("en", "Hello world. This is some input text that I love."),
Expand Down Expand Up @@ -253,7 +253,7 @@ class TextSentimentSuite extends TransformerFuzzing[TextSentimentV2] with TextSe

class KeyPhraseExtractorSuite extends TransformerFuzzing[KeyPhraseExtractorV2] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("en", "Hello world. This is some input text that I love."),
Expand Down Expand Up @@ -287,7 +287,7 @@ class KeyPhraseExtractorSuite extends TransformerFuzzing[KeyPhraseExtractorV2] w

class KeyPhraseExtractorV3Suite extends TransformerFuzzing[KeyPhraseExtractor] with TextKey {

import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("en", "Hello world. This is some input text that I love."),
Expand Down Expand Up @@ -320,7 +320,7 @@ class KeyPhraseExtractorV3Suite extends TransformerFuzzing[KeyPhraseExtractor] w
}

class NERSuite extends TransformerFuzzing[NERV2] with TextKey {
import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("1", "en", "Jeff bought three dozen eggs because there was a 50% discount."),
Expand Down Expand Up @@ -359,7 +359,7 @@ class NERSuite extends TransformerFuzzing[NERV2] with TextKey {
}

class NERSuiteV3 extends TransformerFuzzing[NER] with TextKey {
import session.implicits._
import spark.implicits._

lazy val df: DataFrame = Seq(
("1", "en", "I had a wonderful trip to Seattle last week."),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ trait AzureSearchKey {
class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister
with TransformerFuzzing[AddDocuments] with CognitiveKey {

import session.implicits._
import spark.implicits._

private val testServiceName = "mmlspark-azure-search"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import org.scalactic.Equality
class SpeechToTextSuite extends TransformerFuzzing[SpeechToText]
with CognitiveKey {

import session.implicits._
import spark.implicits._

val region = "eastus"
val resourcesDir = System.getProperty("user.dir") + "/src/test/resources/"
Expand Down
Loading

0 comments on commit e047df3

Please sign in to comment.