From 890222989bee0363e615deddbde6b6e81923a3c1 Mon Sep 17 00:00:00 2001 From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com> Date: Thu, 18 Apr 2024 08:51:30 -0700 Subject: [PATCH 1/3] update isolation forest doc --- .../Quickstart - Isolation Forests.ipynb | 85 ++++++++++++++----- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index a03e9e75de..b69913e1b4 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -24,22 +24,42 @@ "metadata": {}, "source": [ "## Prerequisites\n", - " - If you are running it on Synapse, you'll need to [create an AML workspace and set up linked Service](../../../Use%20with%20MLFlow/Overview/).\n" + " - If running on Synapse, you'll need to [create an AML workspace and set up linked Service](../../Use%20with%20MLFlow/Overview.md) and add the following installation cell.\n", + " - If running on Fabric, you need to add the following installation cell and attach the notebook to a lakehouse. On the left side of your notebook, select Add to add an existing lakehouse or create a new one." ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4" - ], + "# %%configure -f\n", + "# {\n", + "# \"name\": \"synapseml\",\n", + "# \"conf\": {\n", + "# \"spark.jars.packages\": \"com.microsoft.azure:synapseml_2.12:\",\n", + "# \"spark.jars.repositories\": \"https://mmlspark.azureedge.net/maven\",\n", + "# \"spark.jars.excludes\": \"org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind\",\n", + "# \"spark.yarn.user.classpath.first\": \"true\",\n", + "# \"spark.sql.parquet.enableVectorizedReader\": \"false\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4" + ] }, { "cell_type": "markdown", @@ -155,8 +175,14 @@ "\n", "# MLFlow experiment\n", "artifact_path = \"isolationforest\"\n", - "experiment_name = f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\"\n", - "model_name = f\"isolation-forest-model\"" + "model_name = f\"isolation-forest-model\"\n", + "\n", + "platform = current_platform()\n", + "experiment_name = {\n", + " \"databricks\": f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\",\n", + " \"synapse\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\",\n", + " \"synapse_internal\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\", # Fabric\n", + "}.get(platform, f\"isolation_forest_experiment\")\n" ] }, { @@ -365,18 +391,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-synapse-internal" + ] + }, "outputs": [], "source": [ "if running_on_synapse():\n", " from synapse.ml.core.platform import find_secret\n", - "\n", " tracking_url = find_secret(\n", " secret_name=\"aml-mlflow-tracking-url\", keyvault=\"mmlspark-build-keys\"\n", " ) # check link in prerequisites for more information on mlflow tracking url\n", - " mlflow.set_tracking_uri(tracking_url)\n", - " experiment_name = f\"isolation_forest_experiment\"\n", - " model_name = \"isolation-forest\"" + " mlflow.set_tracking_uri(tracking_url)" ] }, { @@ -393,7 +420,7 @@ "outputs": [], "source": [ "mlflow.set_experiment(experiment_name)\n", - "with mlflow.start_run():\n", + "with mlflow.start_run() as run:\n", " va = VectorAssembler(inputCols=inputCols, outputCol=\"features\")\n", " pipeline = Pipeline(stages=[va, isolationForest])\n", " model = pipeline.fit(df_train)\n", @@ -424,7 +451,10 @@ "nuid": "57cda5af-b090-4b6d-ad07-530519e0300e", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "source": [ "Load the trained Isolation Forest Model" @@ -439,13 +469,21 @@ "nuid": "f44b9a1f-c2fe-4b5b-a318-4d6d73370978", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "outputs": [], "source": [ - "# model_version = 1\n", - "# model_uri = f\"models:/{model_name}/{model_version}\"\n", - "# model = mlflow.spark.load_model(model_uri)" + "if running_on_databricks():\n", + " model_version = 1\n", + " model_uri = f\"models:/{model_name}/{model_version}\"\n", + "elif running_on_synapse_internal():\n", + " model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n", + " run_id=run.info.run_id, artifact_path=artifact_path\n", + ")\n", + "model = mlflow.spark.load_model(model_uri)" ] }, { @@ -943,12 +981,16 @@ "source": [ "When you run the cell above, you will see the following global feature importance plot:\n", "\n", - "![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global_feature_importance.jpg)" + "![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global-feature-importance.jpg)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [ + "hide-synapse-internal" + ] + }, "source": [ "Visualize the explanation in the ExplanationDashboard from https://github.com/microsoft/responsible-ai-widgets." ] @@ -962,7 +1004,10 @@ "nuid": "140602e6-908e-4b32-ab9c-49dd79705171", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "outputs": [], "source": [ From 11f4f7c8e6174b2b3efaffbb1b26a965ed7b81c3 Mon Sep 17 00:00:00 2001 From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:04:07 -0700 Subject: [PATCH 2/3] formatting --- .../Quickstart - Isolation Forests.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index b69913e1b4..045cba5dab 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -181,8 +181,8 @@ "experiment_name = {\n", " \"databricks\": f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\",\n", " \"synapse\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\",\n", - " \"synapse_internal\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\", # Fabric\n", - "}.get(platform, f\"isolation_forest_experiment\")\n" + " \"synapse_internal\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\", # Fabric\n", + "}.get(platform, f\"isolation_forest_experiment\")" ] }, { @@ -400,6 +400,7 @@ "source": [ "if running_on_synapse():\n", " from synapse.ml.core.platform import find_secret\n", + "\n", " tracking_url = find_secret(\n", " secret_name=\"aml-mlflow-tracking-url\", keyvault=\"mmlspark-build-keys\"\n", " ) # check link in prerequisites for more information on mlflow tracking url\n", @@ -481,8 +482,8 @@ " model_uri = f\"models:/{model_name}/{model_version}\"\n", "elif running_on_synapse_internal():\n", " model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n", - " run_id=run.info.run_id, artifact_path=artifact_path\n", - ")\n", + " run_id=run.info.run_id, artifact_path=artifact_path\n", + " )\n", "model = mlflow.spark.load_model(model_uri)" ] }, From 9951b36bef5872eedae8a9bcea6b11852b3c6134 Mon Sep 17 00:00:00 2001 From: JessicaXYWang <108437381+JessicaXYWang@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:59:31 -0700 Subject: [PATCH 3/3] update isolation forest --- .../Quickstart - Isolation Forests.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index 045cba5dab..0a4f0e38ea 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -477,14 +477,14 @@ }, "outputs": [], "source": [ - "if running_on_databricks():\n", - " model_version = 1\n", - " model_uri = f\"models:/{model_name}/{model_version}\"\n", - "elif running_on_synapse_internal():\n", - " model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n", - " run_id=run.info.run_id, artifact_path=artifact_path\n", - " )\n", - "model = mlflow.spark.load_model(model_uri)" + "# if running_on_databricks():\n", + "# model_version = \n", + "# model_uri = f\"models:/{model_name}/{model_version}\"\n", + "# elif running_on_synapse_internal():\n", + "# model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n", + "# run_id=run.info.run_id, artifact_path=artifact_path\n", + "# )\n", + "# model = mlflow.spark.load_model(model_uri)" ] }, {