Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove exclusion for ExplanationDashboard notebook #1531

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class SynapseTests extends TestBase {

val resourcesDirectory = new File(getClass.getResource("/").toURI)
val notebooksDir = new File(resourcesDirectory, "generated-notebooks")
println(s"Notebooks dir: $notebooksDir")
FileUtils.deleteDirectory(notebooksDir)
assert(notebooksDir.mkdirs())

Expand Down Expand Up @@ -109,17 +108,17 @@ class SynapseTests extends TestBase {
.filterNot(_.getAbsolutePath.contains("CyberML"))
.filterNot(_.getAbsolutePath.contains("VowpalWabbitOverview"))
.filterNot(_.getAbsolutePath.contains("IsolationForest"))
.filterNot(_.getAbsolutePath.contains("ExplanationDashboard"))
.filterNot(_.getAbsolutePath.contains("DeepLearning"))
.filterNot(_.getAbsolutePath.contains("InterpretabilitySnowLeopardDetection"))
.sortBy(_.getAbsolutePath)

selectedPythonFiles.foreach(println)
assert(selectedPythonFiles.length > 1)
ppruthi marked this conversation as resolved.
Show resolved Hide resolved

val expectedPoolCount: Int = selectedPythonFiles.length

println("SynapseTests E2E Test Suite starting...")
assert(expectedPoolCount >= 1)
println(s"SynapseTests E2E Test Suite starting on ${expectedPoolCount} notebook(s)...")
selectedPythonFiles.foreach(println)

// Cleanup old stray spark pools lying around due to ungraceful test shutdown
tryDeleteOldSparkPools()

println(s"Creating $expectedPoolCount Spark Pools...")
Expand Down Expand Up @@ -155,6 +154,7 @@ class SynapseTests extends TestBase {
failures.foreach(failure =>
println(failure.failed.get.getMessage))
}
FileUtils.deleteDirectory(notebooksDir)
mhamilton723 marked this conversation as resolved.
Show resolved Hide resolved
super.afterAll()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ object SynapseUtilities {
val dest = s"$Folder/${notebook.getName}"
exec(s"az storage fs file upload " +
s" -s ${notebook.getAbsolutePath} -p $dest -f $StorageContainer " +
s" --overwrite true " +
" --overwrite true " +
s" --account-name $StorageAccount --account-key ${Secrets.SynapseStorageKey}")
val abfssPath = s"abfss://$StorageContainer@$StorageAccount.dfs.core.windows.net/$dest"

Expand All @@ -197,6 +197,7 @@ object SynapseUtilities {
"org.scalactic:scalactic_2.12",
"org.scalatest:scalatest_2.12",
"org.slf4j:slf4j-api").mkString(",")
val packages: String = s"com.microsoft.azure:synapseml_2.12:${BuildInfo.version}"
val runName = abfssPath.split('/').last.replace(".py", "")
val livyPayload: String =
s"""
Expand All @@ -210,7 +211,7 @@ object SynapseUtilities {
| "numExecutors" : 2,
| "conf" :
| {
| "spark.jars.packages" : "com.microsoft.azure:synapseml_2.12:${BuildInfo.version}",
| "spark.jars.packages" : "$packages",
| "spark.jars.repositories" : "https://mmlspark.azureedge.net/maven",
| "spark.jars.excludes": "$excludes",
| "spark.driver.userClassPathFirst": "true",
Expand Down Expand Up @@ -238,13 +239,17 @@ object SynapseUtilities {
poolLocation: String,
poolNodeSize: String,
createdAtTime: String): String = {
val buildId: String = sys.env.getOrElse("AdoBuildId", "unknown")
val buildNumber: String = sys.env.getOrElse("AdoBuildNumber", "unknown")
s"""
|{
| "name": "$bigDataPoolName",
| "location": "$poolLocation",
| "tags": {
| "createdBy": "SynapseE2E Tests",
| "createdAt": "$createdAtTime"
| "createdAt": "$createdAtTime",
| "buildId": "$buildId",
| "buildNumber": "$buildNumber",
| },
| "properties": {
| "autoPause": {
Expand Down Expand Up @@ -289,6 +294,7 @@ object SynapseUtilities {
sparkPools.foreach(sparkPool => {
val name = sparkPool.name.stripPrefix(s"$WorkspaceName/")
if (sparkPool.tags.contains("createdAt") && sparkPool.tags.contains("createdBy")) {
assert(name.stripPrefix(ClusterPrefix).length == dayAgoTsInMillis.toString.length)
val creationTime = name.stripPrefix(ClusterPrefix).toLong
if (creationTime <= dayAgoTsInMillis) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
"outputs": [],
"source": [
"import os\n",
"from IPython import get_ipython\n",
"from IPython.terminal.interactiveshell import TerminalInteractiveShell\n",
"import uuid\n",
"import mlflow\n",
"import matplotlib.pyplot as plt\n",
Expand Down Expand Up @@ -84,6 +86,8 @@
" from pyspark.sql import SparkSession\n",
"\n",
" spark = SparkSession.builder.getOrCreate()\n",
" shell = TerminalInteractiveShell.instance()\n",
" shell.define_macro(\"foo\", \"\"\"a,b=10,20\"\"\")\n",
" from notebookutils.visualization import display"
],
"metadata": {
Expand Down Expand Up @@ -353,11 +357,26 @@
}
},
"source": [
"Next, we create an ML pipeline to train the Isolation Forest model. We also demonstrate how to create an MLFlow experiement and register the trained model.\n",
"Next, we create an ML pipeline to train the Isolation Forest model. We also demonstrate how to create an MLFlow experiment and register the trained model.\n",
"\n",
"Note that MLFlow model registration is strictly only required if accessing the trained model at a later time. For training the model, and performing inferencing in the same notebook, the model object model is sufficient."
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"if os.environ.get(\"AZURE_SERVICE\", None) == \"Microsoft.ProjectArcadia\":\n",
" !pip install --upgrade sqlparse"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -637,16 +656,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade raiwidgets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade interpret-community"
"!pip install --upgrade raiwidgets interpret-community"
]
},
{
Expand Down Expand Up @@ -1004,4 +1014,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
"outputs": [],
"source": [
"import pyspark\n",
"from IPython import get_ipython\n",
"from IPython.terminal.interactiveshell import TerminalInteractiveShell\n",
"from synapse.ml.explainers import *\n",
"from pyspark.ml import Pipeline\n",
"from pyspark.ml.classification import LogisticRegression\n",
Expand All @@ -46,6 +48,8 @@
" from pyspark.sql import SparkSession\n",
"\n",
" spark = SparkSession.builder.getOrCreate()\n",
" shell = TerminalInteractiveShell.instance()\n",
" shell.define_macro(\"foo\", \"\"\"a,b=10,20\"\"\")\n",
" from notebookutils.visualization import display\n",
"\n",
"\n",
Expand Down Expand Up @@ -474,4 +478,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
47 changes: 31 additions & 16 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Scala Style Check'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt scalastyle test:scalastyle'
- task: UsePythonVersion@00
inputs:
Expand Down Expand Up @@ -111,11 +112,12 @@ jobs:
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'E2E'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.DatabricksTests"'
condition: and(succeeded(), eq(variables.runTests, 'True'))
- task: PublishTestResults@2
Expand Down Expand Up @@ -148,14 +150,15 @@ jobs:
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'E2E'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
AdoBuildId=$(Build.BuildId) AdoBuildNumber=$(Build.BuildNumber) sbt "testOnly com.microsoft.azure.synapse.ml.nbtest.SynapseTests"
condition: and(succeeded(), eq(variables.runTests, 'True'))
- task: PublishTestResults@2
displayName: 'Publish Test Results'
Expand All @@ -169,11 +172,12 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Get Docker Tag + Version'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
VERSION=$(sbt "core/version" | tail -1 | cut -d' ' -f2 | sed 's/\x1b\[[0-9;]*m//g')
echo '##vso[task.setvariable variable=version]'$VERSION
Expand Down Expand Up @@ -341,22 +345,24 @@ jobs:
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Install Pip Package'
timeoutInMinutes: 10
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
sbt installPipPackage
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test Python Code'
timeoutInMinutes: 40
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython) || (sbt "project $(PACKAGE)" coverage testPython)
Expand All @@ -366,11 +372,12 @@ jobs:
testResultsFiles: '**/python-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -389,12 +396,13 @@ jobs:
- template: templates/kv.yml
- bash: curl https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz -o spark-3.2.0-bin-hadoop3.2.tgz
displayName: Download spark
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test R Code'
timeoutInMinutes: 30
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
Expand All @@ -405,11 +413,12 @@ jobs:
testResultsFiles: '**/r-test-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -425,12 +434,13 @@ jobs:
- template: templates/update_cli.yml
- template: templates/conda.yml
- template: templates/kv.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Test Website Samples'
timeoutInMinutes: 30
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
set -e
source activate synapseml
Expand All @@ -444,11 +454,12 @@ jobs:
testResultsFiles: '**/website-test-result.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/codecov.yml
Expand All @@ -469,11 +480,12 @@ jobs:
inputs:
versionSpec: '16.x'
displayName: 'Install Node.js'
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Convert notebooks to markdowns'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
source activate synapseml
sbt convertNotebooks
Expand Down Expand Up @@ -579,20 +591,22 @@ jobs:
PACKAGE: "vw"
steps:
#- template: templates/ivy_cache.yml
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Setup repo'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
(timeout 30s pip install requests) || (echo "retrying" && timeout 30s pip install requests)
(timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup)
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Unit Test'
timeoutInMinutes: 90
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: |
(${FFMPEG:-false} && sudo add-apt-repository ppa:jonathonf/ffmpeg-4 -y && \
sudo apt-get update && sudo apt-get install ffmpeg libgstreamer1.0-0 \
Expand All @@ -608,11 +622,12 @@ jobs:
testResultsFiles: '**/test-reports/TEST-*.xml'
failTaskOnFailedTests: true
condition: succeededOrFailed()
- task: AzureCLI@1
- task: AzureCLI@2
displayName: 'Generate Codecov report'
inputs:
azureSubscription: 'MMLSpark Build'
scriptLocation: inlineScript
scriptType: bash
inlineScript: 'sbt coverageReport'
condition: succeededOrFailed()
- template: templates/kv.yml
Expand Down