From 93d7ccf7a782d89ac157d6e1c87ea3f55d11b886 Mon Sep 17 00:00:00 2001 From: Puneet Pruthi <33642858+ppruthi@users.noreply.github.com> Date: Tue, 9 Aug 2022 19:40:39 -0700 Subject: [PATCH] chore: pin binder version for users (#1607) * chore: pin binder compatible version * fix dbfs/databricks case escapes in notebooks Co-authored-by: Puneet Pruthi --- .../features/onnx/ONNX - Inference on Spark.ipynb | 1 + .../CyberML - Anomalous Access Detection.ipynb | 14 ++++++++++++-- ...arning - BiLSTM Medical Entity Extraction.ipynb | 7 +++++-- ...pLearning - CIFAR10 Convolutional Network.ipynb | 6 ++++-- ...eepLearning - Flower Image Classification.ipynb | 6 ++++-- .../other/DeepLearning - Transfer Learning.ipynb | 6 ++++-- .../regression/Regression - Flight Delays.ipynb | 6 ++++-- ...Interpretability - Snow Leopard Detection.ipynb | 13 ++++++++++--- start | 2 +- 9 files changed, 45 insertions(+), 16 deletions(-) diff --git a/notebooks/features/onnx/ONNX - Inference on Spark.ipynb b/notebooks/features/onnx/ONNX - Inference on Spark.ipynb index 59e6c31876..ea64c95011 100644 --- a/notebooks/features/onnx/ONNX - Inference on Spark.ipynb +++ b/notebooks/features/onnx/ONNX - Inference on Spark.ipynb @@ -119,6 +119,7 @@ "from synapse.ml.core.platform import running_on_binder\n", "\n", "if running_on_binder():\n", + " !pip install lightgbm==3.2.1\n", " from IPython import get_ipython\n", "import lightgbm as lgb\n", "from lightgbm import Booster, LGBMClassifier\n", diff --git a/notebooks/features/other/CyberML - Anomalous Access Detection.ipynb b/notebooks/features/other/CyberML - Anomalous Access Detection.ipynb index 977c7ee8f4..e2dad7d5e1 100644 --- a/notebooks/features/other/CyberML - Anomalous Access Detection.ipynb +++ b/notebooks/features/other/CyberML - Anomalous Access Detection.ipynb @@ -87,7 +87,12 @@ "metadata": {}, "outputs": [], "source": [ - "spark.sparkContext.setCheckpointDir(\"dbfs:/checkpoint_path/\")\n", + "from synapse.ml.core.platform import running_on_databricks\n", + "\n", + "if running_on_databricks():\n", + " spark.sparkContext.setCheckpointDir(\"dbfs:/checkpoint_path/\")\n", + "else:\n", + " spark.sparkContext.setCheckpointDir(\"/tmp/checkpoint_path/\")\n", "\n", "factory = DataFactory(\n", " num_hr_users=25,\n", @@ -445,7 +450,12 @@ "\n", "p = plot(fig, output_type=\"div\")\n", "\n", - "displayHTML(p)" + "if running_on_databricks():\n", + " displayHTML(p)\n", + "else:\n", + " import IPython\n", + "\n", + " IPython.display.HTML(p)" ] }, { diff --git a/notebooks/features/other/DeepLearning - BiLSTM Medical Entity Extraction.ipynb b/notebooks/features/other/DeepLearning - BiLSTM Medical Entity Extraction.ipynb index 0537d646ce..d053f7a64d 100644 --- a/notebooks/features/other/DeepLearning - BiLSTM Medical Entity Extraction.ipynb +++ b/notebooks/features/other/DeepLearning - BiLSTM Medical Entity Extraction.ipynb @@ -45,7 +45,7 @@ "# Bootstrap Spark Session\n", "spark = SparkSession.builder.getOrCreate()\n", "\n", - "from synapse.ml.core.platform import running_on_synapse\n", + "from synapse.ml.core.platform import running_on_synapse, running_on_databricks\n", "\n", "if running_on_synapse():\n", " from notebookutils.visualization import display" @@ -71,9 +71,12 @@ "if running_on_synapse():\n", " modelDir = \"abfss://synapse@mmlsparkeuap.dfs.core.windows.net/models/\"\n", " dataDir = \"./nltkdata\"\n", - "else:\n", + "elif running_on_databricks():\n", " modelDir = \"dbfs:/models/\"\n", " dataDir = \"/dbfs/nltkdata\"\n", + "else:\n", + " modelDir = \"/tmp/models/\"\n", + " dataDir = \"/tmp/nltkdata\"\n", "\n", "d = ModelDownloader(spark, modelDir)\n", "modelSchema = d.downloadByName(modelName)\n", diff --git a/notebooks/features/other/DeepLearning - CIFAR10 Convolutional Network.ipynb b/notebooks/features/other/DeepLearning - CIFAR10 Convolutional Network.ipynb index 916c3469e5..f30f862351 100644 --- a/notebooks/features/other/DeepLearning - CIFAR10 Convolutional Network.ipynb +++ b/notebooks/features/other/DeepLearning - CIFAR10 Convolutional Network.ipynb @@ -23,7 +23,7 @@ "# Bootstrap Spark Session\n", "spark = SparkSession.builder.getOrCreate()\n", "\n", - "from synapse.ml.core.platform import running_on_synapse\n", + "from synapse.ml.core.platform import running_on_synapse, running_on_databricks\n", "\n", "if running_on_synapse():\n", " from notebookutils.visualization import display" @@ -63,8 +63,10 @@ "modelName = \"ConvNet\"\n", "if running_on_synapse():\n", " modelDir = \"abfss://synapse@mmlsparkeuap.dfs.core.windows.net/models/\"\n", + "elif running_on_databricks():\n", + " modelDir = \"dbfs:/models/\"\n", "else:\n", - " modelDir = \"dbfs:/models/\"" + " modelDir = \"/tmp/models/\"" ] }, { diff --git a/notebooks/features/other/DeepLearning - Flower Image Classification.ipynb b/notebooks/features/other/DeepLearning - Flower Image Classification.ipynb index 9806aa68d7..e4a56ffc4c 100644 --- a/notebooks/features/other/DeepLearning - Flower Image Classification.ipynb +++ b/notebooks/features/other/DeepLearning - Flower Image Classification.ipynb @@ -22,7 +22,7 @@ "# Bootstrap Spark Session\n", "spark = SparkSession.builder.getOrCreate()\n", "\n", - "from synapse.ml.core.platform import running_on_synapse\n", + "from synapse.ml.core.platform import running_on_synapse, running_on_databricks\n", "\n", "if running_on_synapse():\n", " from notebookutils.visualization import display" @@ -38,8 +38,10 @@ "source": [ "if running_on_synapse():\n", " modelDir = \"abfss://synapse@mmlsparkeuap.dfs.core.windows.net/models/\"\n", - "else:\n", + "elif running_on_databricks():\n", " modelDir = \"dbfs:/models/\"\n", + "else:\n", + " modelDir = \"/tmp/models/\"\n", "\n", "model = ModelDownloader(spark, modelDir).downloadByName(\"ResNet50\")" ] diff --git a/notebooks/features/other/DeepLearning - Transfer Learning.ipynb b/notebooks/features/other/DeepLearning - Transfer Learning.ipynb index 366a1bffcd..72940b8b12 100644 --- a/notebooks/features/other/DeepLearning - Transfer Learning.ipynb +++ b/notebooks/features/other/DeepLearning - Transfer Learning.ipynb @@ -35,13 +35,15 @@ "# Bootstrap Spark Session\n", "spark = SparkSession.builder.getOrCreate()\n", "\n", - "from synapse.ml.core.platform import running_on_synapse\n", + "from synapse.ml.core.platform import running_on_synapse, running_on_databricks\n", "\n", "modelName = \"ConvNet\"\n", "if running_on_synapse():\n", " modelDir = \"abfss://synapse@mmlsparkeuap.dfs.core.windows.net/models/\"\n", - "else:\n", + "elif running_on_databricks():\n", " modelDir = \"dbfs:/models/\"\n", + "else:\n", + " modelDir = \"/tmp/models/\"\n", "\n", "d = ModelDownloader(spark, modelDir)\n", "model = d.downloadByName(modelName)\n", diff --git a/notebooks/features/regression/Regression - Flight Delays.ipynb b/notebooks/features/regression/Regression - Flight Delays.ipynb index f953e561c3..7865b9d149 100644 --- a/notebooks/features/regression/Regression - Flight Delays.ipynb +++ b/notebooks/features/regression/Regression - Flight Delays.ipynb @@ -125,12 +125,14 @@ "metadata": {}, "outputs": [], "source": [ - "from synapse.ml.core.platform import running_on_synapse\n", + "from synapse.ml.core.platform import *\n", "\n", "if running_on_synapse():\n", " model_name = \"/models/flightDelayModel.mml\"\n", - "else:\n", + "elif running_on_databricks():\n", " model_name = \"dbfs:/flightDelayModel.mml\"\n", + "else:\n", + " model_name = \"/tmp/flightDelayModel.mml\"\n", "\n", "model.write().overwrite().save(model_name)\n", "flightDelayModel = TrainedRegressorModel.load(model_name)\n", diff --git a/notebooks/features/responsible_ai/Interpretability - Snow Leopard Detection.ipynb b/notebooks/features/responsible_ai/Interpretability - Snow Leopard Detection.ipynb index d62efa0fa4..b98c675630 100644 --- a/notebooks/features/responsible_ai/Interpretability - Snow Leopard Detection.ipynb +++ b/notebooks/features/responsible_ai/Interpretability - Snow Leopard Detection.ipynb @@ -15,7 +15,7 @@ "source": [ "import os\n", "from pyspark.sql import SparkSession\n", - "from synapse.ml.core.platform import running_on_synapse, find_secret\n", + "from synapse.ml.core.platform import *\n", "\n", "# Bootstrap Spark Session\n", "spark = SparkSession.builder.getOrCreate()\n", @@ -133,7 +133,12 @@ " header, tableHTML\n", " )\n", " try:\n", - " displayHTML(style + body)\n", + " if running_on_databricks():\n", + " displayHTML(style + body)\n", + " else:\n", + " import IPython\n", + "\n", + " IPython.display.HTML(style + body)\n", " except:\n", " pass" ], @@ -232,8 +237,10 @@ " network = ModelDownloader(\n", " spark, \"abfss://synapse@mmlsparkeuap.dfs.core.windows.net/models/\"\n", " ).downloadByName(\"ResNet50\")\n", - "else:\n", + "elif running_on_databricks():\n", " network = ModelDownloader(spark, \"dbfs:/Models/\").downloadByName(\"ResNet50\")\n", + "else:\n", + " network = ModelDownloader(spark, \"/tmp/Models/\").downloadByName(\"ResNet50\")\n", "\n", "model = Pipeline(\n", " stages=[\n", diff --git a/start b/start index 22c89523b8..3a84dfe78a 100644 --- a/start +++ b/start @@ -3,7 +3,7 @@ export OPENMPI_VERSION="3.1.2" export SPARK_VERSION="3.2.2" export HADOOP_VERSION="2.7" -export MMLSPARK_VERSION="0.10.0" +export SYNAPSEML_VERSION="0.10.0-26-c7a61ecd-SNAPSHOT" # Binder compatibility version echo "Beginning Spark Session..." exec "$@"