Skip to content

Commit

Permalink
fix: remove exclusion for ExplanationDashboard notebook (#1531)
Browse files Browse the repository at this point in the history
* remove test exclusions: draft

* fix nits

* cleanup for PR

* nit

* Add required argument to pipeline's AzureCLI tasks

* fix bugs

* do interactive shell stuff only for Azure Spark Machine runtime

* fix style check

* fix isolation forest notebook

* fix style checks

* Update core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseTests.scala

Co-authored-by: Puneet Pruthi <[email protected]>
  • Loading branch information
ppruthi and Puneet Pruthi authored Jun 16, 2022
1 parent 1c1b2e1 commit d38c824
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class SynapseTests extends TestBase {

val resourcesDirectory = new File(getClass.getResource("/").toURI)
val notebooksDir = new File(resourcesDirectory, "generated-notebooks")
println(s"Notebooks dir: $notebooksDir")
FileUtils.deleteDirectory(notebooksDir)
assert(notebooksDir.mkdirs())

Expand Down Expand Up @@ -109,17 +108,17 @@ class SynapseTests extends TestBase {
.filterNot(_.getAbsolutePath.contains("CyberML"))
.filterNot(_.getAbsolutePath.contains("VowpalWabbitOverview"))
.filterNot(_.getAbsolutePath.contains("IsolationForest"))
.filterNot(_.getAbsolutePath.contains("ExplanationDashboard"))
.filterNot(_.getAbsolutePath.contains("DeepLearning"))
.filterNot(_.getAbsolutePath.contains("InterpretabilitySnowLeopardDetection"))
.sortBy(_.getAbsolutePath)

selectedPythonFiles.foreach(println)
assert(selectedPythonFiles.length > 1)

val expectedPoolCount: Int = selectedPythonFiles.length

println("SynapseTests E2E Test Suite starting...")
assert(expectedPoolCount >= 1)
println(s"SynapseTests E2E Test Suite starting on ${expectedPoolCount} notebook(s)...")
selectedPythonFiles.foreach(println)

// Cleanup old stray spark pools lying around due to ungraceful test shutdown
tryDeleteOldSparkPools()

println(s"Creating $expectedPoolCount Spark Pools...")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ object SynapseUtilities {
val dest = s"$Folder/${notebook.getName}"
exec(s"az storage fs file upload " +
s" -s ${notebook.getAbsolutePath} -p $dest -f $StorageContainer " +
s" --overwrite true " +
" --overwrite true " +
s" --account-name $StorageAccount --account-key ${Secrets.SynapseStorageKey}")
val abfssPath = s"abfss://$StorageContainer@$StorageAccount.dfs.core.windows.net/$dest"

Expand All @@ -197,6 +197,7 @@ object SynapseUtilities {
"org.scalactic:scalactic_2.12",
"org.scalatest:scalatest_2.12",
"org.slf4j:slf4j-api").mkString(",")
val packages: String = s"com.microsoft.azure:synapseml_2.12:${BuildInfo.version}"
val runName = abfssPath.split('/').last.replace(".py", "")
val livyPayload: String =
s"""
Expand All @@ -210,7 +211,7 @@ object SynapseUtilities {
| "numExecutors" : 2,
| "conf" :
| {
| "spark.jars.packages" : "com.microsoft.azure:synapseml_2.12:${BuildInfo.version}",
| "spark.jars.packages" : "$packages",
| "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven",
| "spark.jars.excludes": "$excludes",
| "spark.driver.userClassPathFirst": "true",
Expand Down Expand Up @@ -238,13 +239,17 @@ object SynapseUtilities {
poolLocation: String,
poolNodeSize: String,
createdAtTime: String): String = {
val buildId: String = sys.env.getOrElse("AdoBuildId", "unknown")
val buildNumber: String = sys.env.getOrElse("AdoBuildNumber", "unknown")
s"""
|{
| "name": "$bigDataPoolName",
| "location": "$poolLocation",
| "tags": {
| "createdBy": "SynapseE2E Tests",
| "createdAt": "$createdAtTime"
| "createdAt": "$createdAtTime",
| "buildId": "$buildId",
| "buildNumber": "$buildNumber",
| },
| "properties": {
| "autoPause": {
Expand Down Expand Up @@ -289,6 +294,7 @@ object SynapseUtilities {
sparkPools.foreach(sparkPool => {
val name = sparkPool.name.stripPrefix(s"$WorkspaceName/")
if (sparkPool.tags.contains("createdAt") && sparkPool.tags.contains("createdBy")) {
assert(name.stripPrefix(ClusterPrefix).length == dayAgoTsInMillis.toString.length)
val creationTime = name.stripPrefix(ClusterPrefix).toLong
if (creationTime <= dayAgoTsInMillis) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
"outputs": [],
"source": [
"import os\n",
"from IPython import get_ipython\n",
"from IPython.terminal.interactiveshell import TerminalInteractiveShell\n",
"import uuid\n",
"import mlflow\n",
"import matplotlib.pyplot as plt\n",
Expand Down Expand Up @@ -84,6 +86,8 @@
" from pyspark.sql import SparkSession\n",
"\n",
" spark = SparkSession.builder.getOrCreate()\n",
" shell = TerminalInteractiveShell.instance()\n",
" shell.define_macro(\"foo\", \"\"\"a,b=10,20\"\"\")\n",
" from notebookutils.visualization import display"
],
"metadata": {
Expand Down Expand Up @@ -353,11 +357,26 @@
}
},
"source": [
"Next, we create an ML pipeline to train the Isolation Forest model. We also demonstrate how to create an MLFlow experiement and register the trained model.\n",
"Next, we create an ML pipeline to train the Isolation Forest model. We also demonstrate how to create an MLFlow experiment and register the trained model.\n",
"\n",
"Note that MLFlow model registration is strictly only required if accessing the trained model at a later time. For training the model, and performing inferencing in the same notebook, the model object model is sufficient."
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"if os.environ.get(\"AZURE_SERVICE\", None) == \"Microsoft.ProjectArcadia\":\n",
" !pip install --upgrade sqlparse"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -637,16 +656,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade raiwidgets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade interpret-community"
"!pip install --upgrade raiwidgets interpret-community"
]
},
{
Expand Down Expand Up @@ -1004,4 +1014,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"outputs": [],
"source": [
"import pyspark\n",
"from IPython import get_ipython\n",
"from IPython.terminal.interactiveshell import TerminalInteractiveShell\n",
"from synapse.ml.explainers import *\n",
"from pyspark.ml import Pipeline\n",
"from pyspark.ml.classification import LogisticRegression\n",
Expand All @@ -46,6 +48,8 @@
" from pyspark.sql import SparkSession\n",
"\n",
" spark = SparkSession.builder.getOrCreate()\n",
" shell = TerminalInteractiveShell.instance()\n",
" shell.define_macro(\"foo\", \"\"\"a,b=10,20\"\"\")\n",
" from notebookutils.visualization import display\n",
"\n",
"\n",
Expand Down Expand Up @@ -474,4 +478,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
47 changes: 31 additions & 16 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Scala Style Check'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt scalastyle test:scalastyle'
- task: UsePythonVersion@00
inputs:
Expand Down Expand Up @@ -111,11 +112,12 @@ jobs:
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'E2E'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.DatabricksTests"'
condition: and(succeeded(), eq(variables.runTests, 'True'))
- task: PublishTestResults@2
Expand Down Expand Up @@ -148,14 +150,15 @@ jobs:
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'E2E'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
AdoBuildId=$(Build.BuildId) AdoBuildNumber=$(Build.BuildNumber) sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
condition: and(succeeded(), eq(variables.runTests, 'True'))
- task: PublishTestResults@2
displayName: 'Publish Test Results'
Expand All @@ -169,11 +172,12 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Get Docker Tag + Version'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g')
echo '##vso[task.setvariable variable=version]'$VERSION
Expand Down Expand Up @@ -341,22 +345,24 @@ jobs:
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Install Pip Package'
timeoutInMinutes: 10
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
sbt installPipPackage
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test Python Code'
timeoutInMinutes: 40
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython)
Expand All @@ -366,11 +372,12 @@ jobs:
testResultsFiles: '**/python-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -389,12 +396,13 @@ jobs:
- template: templates/kv.yml
- bash: curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz
displayName: Download spark
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test R Code'
timeoutInMinutes: 30
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
Expand All @@ -405,11 +413,12 @@ jobs:
testResultsFiles: '**/r-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -425,12 +434,13 @@ jobs:
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test Website Samples'
timeoutInMinutes: 30
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
source activate synapseml
Expand All @@ -444,11 +454,12 @@ jobs:
testResultsFiles: '**/website-test-result.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -469,11 +480,12 @@ jobs:
inputs:
versionSpec: '16.x'
displayName: 'Install Node.js'
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Convert notebooks to markdowns'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt convertNotebooks
Expand Down Expand Up @@ -579,20 +591,22 @@ jobs:
PACKAGE: "vw"
steps:
#- template: templates/ivy_cache.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Setup repo'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
(timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests)
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Unit Test'
timeoutInMinutes: 90
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
(${FFMPEG:-false} && sudo add-apt-repository ppa:jonathonf/ffmpeg-4 -y && \
sudo apt-get update && sudo apt-get install ffmpeg libgstreamer1.0-0 \
Expand All @@ -608,11 +622,12 @@ jobs:
testResultsFiles: '**/test-reports/TEST-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/kv.yml
Expand Down

0 comments on commit d38c824

Please sign in to comment.