diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4386b8dca0f..2556638be7f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,7 +56,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/README.md b/README.md index ecb10916c4a..ae3062f8a2b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![SynapseML](https://mmlspark.azureedge.net/icons/mmlspark.svg) +![SynapseML](https://mmlspark.blob.core.windows.net/icons/mmlspark.svg) # Synapse Machine Learning @@ -97,7 +97,7 @@ In Microsoft Fabric notebooks SynapseML is already installed. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -120,7 +120,7 @@ In Azure Synapse notebooks please place the following in the first cell of your "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -136,7 +136,7 @@ In Azure Synapse notebooks please place the following in the first cell of your "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -156,7 +156,7 @@ coordinates](https://docs.databricks.com/user-guide/libraries.html#libraries-fro in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.8` -with the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +with the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. If you encounter Netty dependency issues please use DBR 10.1. diff --git a/core/src/main/python/synapse/ml/core/init_spark.py b/core/src/main/python/synapse/ml/core/init_spark.py index 0f218102a15..06bb1543c8f 100644 --- a/core/src/main/python/synapse/ml/core/init_spark.py +++ b/core/src/main/python/synapse/ml/core/init_spark.py @@ -16,7 +16,7 @@ def init_spark(): + __spark_package_version__ + ",org.apache.spark:spark-avro_2.12:3.4.1", ) - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") .config("spark.executor.heartbeatInterval", "60s") .config("spark.sql.shuffle.partitions", 10) .config("spark.sql.crossJoin.enabled", "true") diff --git a/core/src/main/python/synapse/ml/downloader/ModelDownloader.py b/core/src/main/python/synapse/ml/downloader/ModelDownloader.py index 250d47bf68f..7b4184512ef 100644 --- a/core/src/main/python/synapse/ml/downloader/ModelDownloader.py +++ b/core/src/main/python/synapse/ml/downloader/ModelDownloader.py @@ -9,7 +9,7 @@ from pyspark.ml.param.shared import * from synapse.ml.core.schema.Utils import * -DEFAULT_URL = "https://mmlspark.azureedge.net/datasets/CNTKModels/" +DEFAULT_URL = "https://mmlspark.blob.core.windows.net/datasets/CNTKModels/" class ModelSchema: diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala index 786b84825dd..edb8971d75d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala @@ -9,7 +9,7 @@ import com.microsoft.azure.synapse.ml.build.BuildInfo * Centralized values for package repositories and coordinates (mostly used by test pipeline frameworks) */ object PackageUtils { - private val SparkMLRepository = "https://mmlspark.azureedge.net/maven" + private val SparkMLRepository = "https://mmlspark.blob.core.windows.net/maven" private val SonatypeSnapshotsRepository = "https://oss.sonatype.org/content/repositories/snapshots" val ScalaVersionSuffix: String = BuildInfo.scalaVersion.split(".".toCharArray).dropRight(1).mkString(".") diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index 6461ff518cd..4748a7a9c9e 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -43,7 +43,7 @@ "# \"name\": \"synapseml\",\n", "# \"conf\": {\n", "# \"spark.jars.packages\": \"com.microsoft.azure:synapseml_2.12:\",\n", - "# \"spark.jars.repositories\": \"https://mmlspark.azureedge.net/maven\",\n", + "# \"spark.jars.repositories\": \"https://mmlspark.blob.core.windows.net/maven\",\n", "# \"spark.jars.excludes\": \"org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind\",\n", "# \"spark.yarn.user.classpath.first\": \"true\",\n", "# \"spark.sql.parquet.enableVectorizedReader\": \"false\"\n", diff --git a/docs/Explore Algorithms/Deep Learning/Getting Started.md b/docs/Explore Algorithms/Deep Learning/Getting Started.md index d23cc8330d2..ab1efa3214f 100644 --- a/docs/Explore Algorithms/Deep Learning/Getting Started.md +++ b/docs/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.8 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.8 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb b/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb index 1dc88b7d1fb..a3d5088b08f 100644 --- a/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb +++ b/docs/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.ipynb @@ -35,7 +35,7 @@ "\n", "1. In Cluster Libraries install from library source Maven:\n", "Coordinates: com.microsoft.azure:synapseml_2.12:1.0.8\n", - "Repository: https://mmlspark.azureedge.net/maven\n", + "Repository: https://mmlspark.blob.core.windows.net/maven\n", "\n", "2. In Cluster Libraries install from PyPI the library called plotly" ], diff --git a/docs/Get Started/Install SynapseML.md b/docs/Get Started/Install SynapseML.md index 394d45daca1..68617a1842d 100644 --- a/docs/Get Started/Install SynapseML.md +++ b/docs/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.8") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.8" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.8` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/docs/Reference/Contributor Guide.md b/docs/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/docs/Reference/Contributor Guide.md +++ b/docs/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/docs/Reference/R Setup.md b/docs/Reference/R Setup.md index d7588702db2..c158adb93e6 100644 --- a/docs/Reference/R Setup.md +++ b/docs/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.8.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.8.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/project/BlobMavenPlugin.scala b/project/BlobMavenPlugin.scala index 63941df2331..df92d7c4d17 100644 --- a/project/BlobMavenPlugin.scala +++ b/project/BlobMavenPlugin.scala @@ -39,7 +39,7 @@ object BlobMavenPlugin extends AutoPlugin { | `${organization.value}:${moduleName.value}_${scalaBinaryVersion.value}:${version.value}` | |### Maven Resolver - | `https://mmlspark.azureedge.net/maven` + | `https://mmlspark.blob.core.windows.net/maven` |""".stripMargin } ) diff --git a/tools/docker/demo/init_notebook.py b/tools/docker/demo/init_notebook.py index 4d28f684d10..519ff9985b9 100644 --- a/tools/docker/demo/init_notebook.py +++ b/tools/docker/demo/init_notebook.py @@ -32,7 +32,7 @@ ), ( "spark.jars.repositories", - "https://mmlspark.azureedge.net/maven,https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure,https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage", + "https://mmlspark.blob.core.windows.net/maven,https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure,https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage", ), ], ) diff --git a/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln b/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln index 745db798d09..64c1357994c 100644 --- a/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln +++ b/tools/helm/zeppelin/mmlsparkExamples/classification_mmlspark_2E3REACQR.zpln @@ -118,7 +118,7 @@ "code": "SUCCESS", "msg": [ { - "data": "Help on package mmlspark:\n\nNAME\n mmlspark\n\nFILE\n /zeppelin/local-repo/Azure/mmlspark/0.15/mmlspark-0.15.jar/mmlspark/__init__.py\n\nDESCRIPTION\n MicrosoftML is a library of Python classes to interface with the\n Microsoft scala APIs to utilize Apache Spark to create distibuted\n machine learning models.\n \n MicrosoftML simplifies training and scoring classifiers and\n regressors, as well as facilitating the creation of models using the\n CNTK library, images, and text.\n\nPACKAGE CONTENTS\n AnalyzeImage\n AssembleFeatures\n BinaryFileReader\n BingImageReader\n BingImageSearch\n CNTKLearner\n CNTKModel\n Cacher\n CheckpointData\n ClassBalancer\n CleanMissingData\n ComputeModelStatistics\n ComputePerInstanceStatistics\n CustomInputParser\n CustomOutputParser\n DataConversion\n DescribeImage\n DetectFace\n DropColumns\n DynamicMiniBatchTransformer\n EnsembleByKey\n EntityDetector\n Explode\n FastVectorAssembler\n Featurize\n FindBestModel\n FindSimilarFace\n FixedMiniBatchTransformer\n FlattenBatch\n FluentAPI\n GenerateThumbnails\n GroupFaces\n HTTPTransformer\n HyperparamBuilder\n IdentifyFaces\n ImageFeaturizer\n ImageLIME\n ImageReader\n ImageSetAugmenter\n ImageTransformer\n ImageWriter\n IndexToValue\n JSONInputParser\n JSONOutputParser\n KeyPhraseExtractor\n Lambda\n LanguageDetector\n LightGBMClassifier\n LightGBMRegressor\n ModelDownloader\n MultiColumnAdapter\n MultiNGram\n NER\n OCR\n PageSplitter\n PartitionConsolidator\n PartitionSample\n PowerBIWriter\n RankingAdapter\n RankingAdapterModel\n RankingEvaluator\n RecognizeDomainSpecificContent\n RecognizeText\n RenameColumn\n Repartition\n SelectColumns\n ServingFunctions\n ServingImplicits\n SimpleHTTPTransformer\n StringOutputParser\n SummarizeData\n SuperpixelTransformer\n TagImage\n TextFeaturizer\n TextPreprocessor\n TextSentiment\n TimeIntervalMiniBatchTransformer\n Timer\n TrainClassifier\n TrainRegressor\n TuneHyperparameters\n TypeConversionUtils\n UDFTransformer\n UnrollBinaryImage\n UnrollImage\n Utils\n ValueIndexer\n ValueIndexerModel\n VerifyFaces\n _BingImageSearch\n _CNTKLearner\n _CNTKModel\n _FindBestModel\n _ImageFeaturizer\n _ImageTransformer\n _JSONOutputParser\n _LightGBMClassifier\n _LightGBMRegressor\n _ResizeImageTransformer\n _SimpleHTTPTransformer\n _TrainClassifier\n _TrainRegressor\n _TuneHyperparameters\n _UDFTransformer\n java_params_patch\n plot\n\nDATA\n BinaryFileFields = ['path', 'bytes']\n BinaryFileSchema = StructType(List(StructField(path,StringType,true),S...\n DEFAULT_URL = 'https://mmlspark.azureedge.net/datasets/CNTKModels/'\n ImageFields = ['path', 'height', 'width', 'type', 'bytes']\n ImageSchema = StructType(List(StructField(path,StringType,true...erTyp...\n __loader__ =

\r\n\r\nIn this tutorial, we perform the same classification task in two different ways: once using plain **`pyspark`** and once using the **`mmlspark`** library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).\r\n\r\nThe task is simple: Predict whether a user's review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560907558_-1510106009","id":"paragraph_1549560907558_-1510106009","dateCreated":"2019-02-07T17:35:52+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7270","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

103 - Simplifying Machine Learning Pipelines with mmlspark

\n

1. Introduction

\n


\n

In this tutorial, we perform the same classification task in two different ways: once using plain pyspark and once using the mmlspark library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).

\n

The task is simple: Predict whether a user’s review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.

\n
"}]},"runtimeInfos":{}},{"text":"%md\r\n### 2. Read the data\r\n\r\nWe download and read in the data. We show a sample below:","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560950666_-2091601662","id":"paragraph_1549560950666_-2091601662","dateCreated":"2019-02-07T17:36:11+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7361","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

2. Read the data

\n

We download and read in the data. We show a sample below:

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\n# Zeppelin needs the path to be update manually to find mmlspark library\nimport sys\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\n\nimport pandas as pd\nimport mmlspark\nfrom pyspark.sql.types import IntegerType, StringType, StructType, StructField\n\ndataFilePath = \"BookReviewsFromAmazon10K.tsv\"\ntextSchema = StructType([StructField(\"rating\", IntegerType(), False),\n StructField(\"text\", StringType(), False)])\nimport os, urllib\nif not os.path.isfile(dataFilePath):\n urllib.urlretrieve(\"https://mmlspark.azureedge.net/datasets/\" + dataFilePath, dataFilePath)\nrawData = spark.createDataFrame(pd.read_csv(dataFilePath, sep=\"\\t\", header=None), textSchema)\nrawData.show(5)\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560971147_-312816441","id":"paragraph_1549560971147_-312816441","dateCreated":"2019-02-07T17:36:26+0000","status":"READY","focus":true,"$$hashKey":"object:7460","runtimeInfos":{}},{"text":"%md\n### 3. Extract more features and process data\n\nReal data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the **word count** of the review and the **mean word length**.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561045867_-2023351219","id":"paragraph_1549561045867_-2023351219","dateCreated":"2019-02-07T17:37:29+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7616","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

3. Extract more features and process data

\n

Real data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the word count of the review and the mean word length.

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import LongType, FloatType, DoubleType\ndef wordCount(s):\n return len(s.split())\ndef wordLength(s):\n import numpy as np\n ss = [len(w) for w in s.split()]\n return round(float(np.mean(ss)), 2)\nwordLengthUDF = udf(wordLength, DoubleType())\nwordCountUDF = udf(wordCount, IntegerType())","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560986476_-857211016","id":"paragraph_1549560986476_-857211016","dateCreated":"2019-02-07T17:37:10+0000","status":"READY","focus":true,"$$hashKey":"object:7544","runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import UDFTransformer\nwordLength = \"wordLength\"\nwordCount = \"wordCount\"\nwordLengthTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordLength, udf=wordLengthUDF)\nwordCountTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordCount, udf=wordCountUDF)","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561076902_257402397","id":"paragraph_1549561076902_257402397","dateCreated":"2019-02-07T17:38:04+0000","status":"READY","focus":true,"$$hashKey":"object:7706","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml import Pipeline\ndata = Pipeline(stages=[wordLengthTransformer, wordCountTransformer]) \\\n .fit(rawData).transform(rawData) \\\n .withColumn(\"label\", rawData[\"rating\"] > 3).drop(\"rating\")","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561091918_-2108761101","id":"paragraph_1549561091918_-2108761101","dateCreated":"2019-02-07T17:38:16+0000","status":"READY","focus":true,"$$hashKey":"object:7778","runtimeInfos":{}},{"text":"%md\n### 4a. Classify using pyspark\n\nTo choose the best LogisticRegression classifier using the `pyspark` library, need to *explictly* perform the following steps:\n\n1. Process the features:\n * Tokenize the text column\n * Hash the tokenized column into a vector using hashing\n * Merge the numeric features with the vector in the step above\n2. Process the label column: cast it into the proper type.\n3. Train multiple LogisticRegression algorithms on the `train` dataset with different hyperparameters\n4. Compute the area under the ROC curve for each of the trained models and select the model with the highest metric as computed on the `test` dataset\n5. Evaluate the best model on the `validation` set\n\nAs you can see below, there is a lot of work involved and a lot of steps where something can go wrong!","user":"anonymous","config":{"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561105519_758143693","id":"paragraph_1549561105519_758143693","dateCreated":"2019-02-07T17:38:35+0000","status":"READY","focus":true,"$$hashKey":"object:7850","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.feature import Tokenizer, HashingTF\nfrom pyspark.ml.feature import VectorAssembler\n\n# Featurize text column\ntokenizer = Tokenizer(inputCol=\"text\", outputCol=\"tokenizedText\")\nnumFeatures = 10000\nhashingScheme = HashingTF(inputCol=\"tokenizedText\",\n outputCol=\"TextFeatures\",\n numFeatures=numFeatures)\ntokenizedData = tokenizer.transform(data)\nfeaturizedData = hashingScheme.transform(tokenizedData)\n\n# Merge text and numeric features in one feature column\nfeatureColumnsArray = [\"TextFeatures\", \"wordCount\", \"wordLength\"]\nassembler = VectorAssembler(\n inputCols = featureColumnsArray,\n outputCol=\"features\")\nassembledData = assembler.transform(featurizedData)\n\n# Select only columns of interest\n# Convert rating column from boolean to int\nprocessedData = assembledData \\\n .select(\"label\", \"features\") \\\n .withColumn(\"label\", assembledData.label.cast(IntegerType()))\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561123327_369147431","id":"paragraph_1549561123327_369147431","dateCreated":"2019-02-07T17:38:50+0000","status":"READY","focus":true,"$$hashKey":"object:7922","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.classification import LogisticRegression\n\n# Prepare data for learning\ntrain, test, validation = processedData.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nevaluator = BinaryClassificationEvaluator(rawPredictionCol=\"rawPrediction\",\n metricName=\"areaUnderROC\")\nmetrics = []\nmodels = []\n\n# Select the best model\nfor learner in logisticRegressions:\n model = learner.fit(train)\n models.append(model)\n scoredData = model.transform(test)\n metrics.append(evaluator.evaluate(scoredData))\nbestMetric = max(metrics)\nbestModel = models[metrics.index(bestMetric)]\n\n# Save model\nbestModel.write().overwrite().save(\"SparkMLExperiment.mmls\")\n# Get AUC on the validation dataset\nscoredVal = bestModel.transform(validation)\nprint(evaluator.evaluate(scoredVal))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561136914_-1460942872","id":"paragraph_1549561136914_-1460942872","dateCreated":"2019-02-07T17:39:01+0000","status":"READY","focus":true,"$$hashKey":"object:7994","runtimeInfos":{}},{"text":"%md\n### 4b. Classify using mmlspark\n\nLife is a lot simpler when using `mmlspark`!\n\n1. The **`TrainClassifier`** Estimator featurizes the data internally,\n as long as the columns selected in the `train`, `test`, `validation`\n dataset represent the features\n\n2. The **`FindBestModel`** Estimator find the best model from a pool of\n trained models by find the model which performs best on the `test`\n dataset given the specified metric\n\n3. The **`CompueModelStatistics`** Transformer computes the different\n metrics on a scored dataset (in our case, the `validation` dataset)\n at the same time","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561151031_-669643989","id":"paragraph_1549561151031_-669643989","dateCreated":"2019-02-07T17:39:16+0000","status":"FINISHED","focus":true,"$$hashKey":"object:8066","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

4b. Classify using mmlspark

\n

Life is a lot simpler when using mmlspark!

\n
    \n
  1. \n

    The TrainClassifier Estimator featurizes the data internally,
    as long as the columns selected in the train, test, validation
    dataset represent the features

  2. \n
  3. \n

    The FindBestModel Estimator find the best model from a pool of
    trained models by find the model which performs best on the test
    dataset given the specified metric

  4. \n
  5. \n

    The CompueModelStatistics Transformer computes the different
    metrics on a scored dataset (in our case, the validation dataset)
    at the same time

  6. \n
\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import TrainClassifier, FindBestModel, ComputeModelStatistics\n\n# Prepare data for learning\ntrain, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nlrmodels = [TrainClassifier(model=lrm, labelCol=\"label\", numFeatures=10000).fit(train)\n for lrm in logisticRegressions]\n\n# Select the best model\nbestModel = FindBestModel(evaluationMetric=\"AUC\", models=lrmodels).fit(test)\n\n# Save model\nbestModel.write().overwrite().save(\"MMLSExperiment.mmls\")\n# Get AUC on the validation dataset\npredictions = bestModel.transform(validation)\nmetrics = ComputeModelStatistics().transform(predictions)\nprint(\"Best model's AUC on validation set = \"\n + \"{0:.2f}%\".format(metrics.first()[\"AUC\"] * 100))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561156327_905971663","id":"paragraph_1549561156327_905971663","dateCreated":"2019-02-07T17:39:36+0000","status":"READY","focus":true,"$$hashKey":"object:8144","runtimeInfos":{}}],"name":"simplification_mmlspark","id":"2E3XBY5JN","defaultInterpreterGroup":"spark","noteParams":{},"noteForms":{},"angularObjects":{},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}} +{"paragraphs":[{"user":"anonymous","config":{"editorSetting":{"language":"scala","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/scala","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560881266_-1396707350","id":"paragraph_1549560881266_-1396707350","dateCreated":"2019-02-07T17:35:07+0000","status":"READY","focus":true,"$$hashKey":"object:7074","text":"%spark.dep\n// include the azure mmlspark dependency\nz.reset()\nz.load(\"Azure:mmlspark:0.15\")\nz.load(\"org.apache.hadoop:hadoop-azure:2.7.0\")\nz.load(\"com.microsoft.azure:azure-storage:8.0.0\")","runtimeInfos":{}},{"text":"%md\r\n## 103 - Simplifying Machine Learning Pipelines with `mmlspark`\r\n\r\n### 1. Introduction\r\n\r\n


\r\n\r\nIn this tutorial, we perform the same classification task in two different ways: once using plain **`pyspark`** and once using the **`mmlspark`** library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).\r\n\r\nThe task is simple: Predict whether a user's review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560907558_-1510106009","id":"paragraph_1549560907558_-1510106009","dateCreated":"2019-02-07T17:35:52+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7270","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

103 - Simplifying Machine Learning Pipelines with mmlspark

\n

1. Introduction

\n


\n

In this tutorial, we perform the same classification task in two different ways: once using plain pyspark and once using the mmlspark library. The two methods yield the same performance, but one of the two libraries is drastically simpler to use and iterate on (can you guess which one?).

\n

The task is simple: Predict whether a user’s review of a book sold on Amazon is good (rating > 3) or bad based on the text of the review. We accomplish this by training LogisticRegression learners with different hyperparameters and choosing the best model.

\n
"}]},"runtimeInfos":{}},{"text":"%md\r\n### 2. Read the data\r\n\r\nWe download and read in the data. We show a sample below:","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560950666_-2091601662","id":"paragraph_1549560950666_-2091601662","dateCreated":"2019-02-07T17:36:11+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7361","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

2. Read the data

\n

We download and read in the data. We show a sample below:

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\n# Zeppelin needs the path to be update manually to find mmlspark library\nimport sys\nsys.path.extend(sc.getConf().get(\"spark.jars\").split(\",\"))\n\nimport pandas as pd\nimport mmlspark\nfrom pyspark.sql.types import IntegerType, StringType, StructType, StructField\n\ndataFilePath = \"BookReviewsFromAmazon10K.tsv\"\ntextSchema = StructType([StructField(\"rating\", IntegerType(), False),\n StructField(\"text\", StringType(), False)])\nimport os, urllib\nif not os.path.isfile(dataFilePath):\n urllib.urlretrieve(\"https://mmlspark.blob.core.windows.net/datasets/\" + dataFilePath, dataFilePath)\nrawData = spark.createDataFrame(pd.read_csv(dataFilePath, sep=\"\\t\", header=None), textSchema)\nrawData.show(5)\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560971147_-312816441","id":"paragraph_1549560971147_-312816441","dateCreated":"2019-02-07T17:36:26+0000","status":"READY","focus":true,"$$hashKey":"object:7460","runtimeInfos":{}},{"text":"%md\n### 3. Extract more features and process data\n\nReal data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the **word count** of the review and the **mean word length**.","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561045867_-2023351219","id":"paragraph_1549561045867_-2023351219","dateCreated":"2019-02-07T17:37:29+0000","status":"FINISHED","focus":true,"$$hashKey":"object:7616","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

3. Extract more features and process data

\n

Real data however is more complex than the above dataset. It is common for a dataset to have features of multiple types: text, numeric, categorical. To illustrate how difficult it is to work with these datasets, we add two numerical features to the dataset: the word count of the review and the mean word length.

\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql.types import LongType, FloatType, DoubleType\ndef wordCount(s):\n return len(s.split())\ndef wordLength(s):\n import numpy as np\n ss = [len(w) for w in s.split()]\n return round(float(np.mean(ss)), 2)\nwordLengthUDF = udf(wordLength, DoubleType())\nwordCountUDF = udf(wordCount, IntegerType())","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549560986476_-857211016","id":"paragraph_1549560986476_-857211016","dateCreated":"2019-02-07T17:37:10+0000","status":"READY","focus":true,"$$hashKey":"object:7544","runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import UDFTransformer\nwordLength = \"wordLength\"\nwordCount = \"wordCount\"\nwordLengthTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordLength, udf=wordLengthUDF)\nwordCountTransformer = UDFTransformer(inputCol=\"text\", outputCol=wordCount, udf=wordCountUDF)","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561076902_257402397","id":"paragraph_1549561076902_257402397","dateCreated":"2019-02-07T17:38:04+0000","status":"READY","focus":true,"$$hashKey":"object:7706","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml import Pipeline\ndata = Pipeline(stages=[wordLengthTransformer, wordCountTransformer]) \\\n .fit(rawData).transform(rawData) \\\n .withColumn(\"label\", rawData[\"rating\"] > 3).drop(\"rating\")","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561091918_-2108761101","id":"paragraph_1549561091918_-2108761101","dateCreated":"2019-02-07T17:38:16+0000","status":"READY","focus":true,"$$hashKey":"object:7778","runtimeInfos":{}},{"text":"%md\n### 4a. Classify using pyspark\n\nTo choose the best LogisticRegression classifier using the `pyspark` library, need to *explictly* perform the following steps:\n\n1. Process the features:\n * Tokenize the text column\n * Hash the tokenized column into a vector using hashing\n * Merge the numeric features with the vector in the step above\n2. Process the label column: cast it into the proper type.\n3. Train multiple LogisticRegression algorithms on the `train` dataset with different hyperparameters\n4. Compute the area under the ROC curve for each of the trained models and select the model with the highest metric as computed on the `test` dataset\n5. Evaluate the best model on the `validation` set\n\nAs you can see below, there is a lot of work involved and a lot of steps where something can go wrong!","user":"anonymous","config":{"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561105519_758143693","id":"paragraph_1549561105519_758143693","dateCreated":"2019-02-07T17:38:35+0000","status":"READY","focus":true,"$$hashKey":"object:7850","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.feature import Tokenizer, HashingTF\nfrom pyspark.ml.feature import VectorAssembler\n\n# Featurize text column\ntokenizer = Tokenizer(inputCol=\"text\", outputCol=\"tokenizedText\")\nnumFeatures = 10000\nhashingScheme = HashingTF(inputCol=\"tokenizedText\",\n outputCol=\"TextFeatures\",\n numFeatures=numFeatures)\ntokenizedData = tokenizer.transform(data)\nfeaturizedData = hashingScheme.transform(tokenizedData)\n\n# Merge text and numeric features in one feature column\nfeatureColumnsArray = [\"TextFeatures\", \"wordCount\", \"wordLength\"]\nassembler = VectorAssembler(\n inputCols = featureColumnsArray,\n outputCol=\"features\")\nassembledData = assembler.transform(featurizedData)\n\n# Select only columns of interest\n# Convert rating column from boolean to int\nprocessedData = assembledData \\\n .select(\"label\", \"features\") \\\n .withColumn(\"label\", assembledData.label.cast(IntegerType()))\n","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561123327_369147431","id":"paragraph_1549561123327_369147431","dateCreated":"2019-02-07T17:38:50+0000","status":"READY","focus":true,"$$hashKey":"object:7922","runtimeInfos":{}},{"text":"%pyspark\nfrom pyspark.ml.evaluation import BinaryClassificationEvaluator\nfrom pyspark.ml.classification import LogisticRegression\n\n# Prepare data for learning\ntrain, test, validation = processedData.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nevaluator = BinaryClassificationEvaluator(rawPredictionCol=\"rawPrediction\",\n metricName=\"areaUnderROC\")\nmetrics = []\nmodels = []\n\n# Select the best model\nfor learner in logisticRegressions:\n model = learner.fit(train)\n models.append(model)\n scoredData = model.transform(test)\n metrics.append(evaluator.evaluate(scoredData))\nbestMetric = max(metrics)\nbestModel = models[metrics.index(bestMetric)]\n\n# Save model\nbestModel.write().overwrite().save(\"SparkMLExperiment.mmls\")\n# Get AUC on the validation dataset\nscoredVal = bestModel.transform(validation)\nprint(evaluator.evaluate(scoredVal))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561136914_-1460942872","id":"paragraph_1549561136914_-1460942872","dateCreated":"2019-02-07T17:39:01+0000","status":"READY","focus":true,"$$hashKey":"object:7994","runtimeInfos":{}},{"text":"%md\n### 4b. Classify using mmlspark\n\nLife is a lot simpler when using `mmlspark`!\n\n1. The **`TrainClassifier`** Estimator featurizes the data internally,\n as long as the columns selected in the `train`, `test`, `validation`\n dataset represent the features\n\n2. The **`FindBestModel`** Estimator find the best model from a pool of\n trained models by find the model which performs best on the `test`\n dataset given the specified metric\n\n3. The **`CompueModelStatistics`** Transformer computes the different\n metrics on a scored dataset (in our case, the `validation` dataset)\n at the same time","user":"anonymous","config":{"tableHide":false,"editorSetting":{"language":"markdown","editOnDblClick":true,"completionKey":"TAB","completionSupport":false},"colWidth":12,"editorMode":"ace/mode/markdown","fontSize":9,"editorHide":true,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561151031_-669643989","id":"paragraph_1549561151031_-669643989","dateCreated":"2019-02-07T17:39:16+0000","status":"FINISHED","focus":true,"$$hashKey":"object:8066","results":{"code":"SUCCESS","msg":[{"type":"HTML","data":"
\n

4b. Classify using mmlspark

\n

Life is a lot simpler when using mmlspark!

\n
    \n
  1. \n

    The TrainClassifier Estimator featurizes the data internally,
    as long as the columns selected in the train, test, validation
    dataset represent the features

  2. \n
  3. \n

    The FindBestModel Estimator find the best model from a pool of
    trained models by find the model which performs best on the test
    dataset given the specified metric

  4. \n
  5. \n

    The CompueModelStatistics Transformer computes the different
    metrics on a scored dataset (in our case, the validation dataset)
    at the same time

  6. \n
\n
"}]},"runtimeInfos":{}},{"text":"%pyspark\nfrom mmlspark import TrainClassifier, FindBestModel, ComputeModelStatistics\n\n# Prepare data for learning\ntrain, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)\n\n# Train the models on the 'train' data\nlrHyperParams = [0.05, 0.1, 0.2, 0.4]\nlogisticRegressions = [LogisticRegression(regParam = hyperParam)\n for hyperParam in lrHyperParams]\nlrmodels = [TrainClassifier(model=lrm, labelCol=\"label\", numFeatures=10000).fit(train)\n for lrm in logisticRegressions]\n\n# Select the best model\nbestModel = FindBestModel(evaluationMetric=\"AUC\", models=lrmodels).fit(test)\n\n# Save model\nbestModel.write().overwrite().save(\"MMLSExperiment.mmls\")\n# Get AUC on the validation dataset\npredictions = bestModel.transform(validation)\nmetrics = ComputeModelStatistics().transform(predictions)\nprint(\"Best model's AUC on validation set = \"\n + \"{0:.2f}%\".format(metrics.first()[\"AUC\"] * 100))","user":"anonymous","config":{"editorSetting":{"language":"python","editOnDblClick":false,"completionKey":"TAB","completionSupport":true},"colWidth":12,"editorMode":"ace/mode/python","fontSize":9,"results":{},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"progressUpdateIntervalMs":500,"jobName":"paragraph_1549561156327_905971663","id":"paragraph_1549561156327_905971663","dateCreated":"2019-02-07T17:39:36+0000","status":"READY","focus":true,"$$hashKey":"object:8144","runtimeInfos":{}}],"name":"simplification_mmlspark","id":"2E3XBY5JN","defaultInterpreterGroup":"spark","noteParams":{},"noteForms":{},"angularObjects":{},"config":{"isZeppelinNotebookCronEnable":false,"looknfeel":"default","personalizedMode":"false"},"info":{}} diff --git a/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln b/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln index ebaa53aadf4..9a00bfcad90 100644 --- a/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln +++ b/tools/helm/zeppelin/mmlsparkExamples/submitjob_2DZ7DHX6E.zpln @@ -1,7 +1,7 @@ { "paragraphs": [ { - "text": "%md\nContents of /zeppelin/notebook/mmlspark/serving.py\n```\nimport mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel(\"WARN\")\n\nprint(\"creating df\")\ndf = spark.readStream.continuousServer() \\\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\n .load() \\\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\n\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\n .makeReply(\"fooLength\")\n\nprint(\"creating server\")\nserver = replies\\\n .writeStream \\\n .continuousServer() \\\n .trigger(continuous=\"1 second\") \\\n .replyTo(\"my_api\") \\\n .queryName(\"my_query\") \\\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\n\nprint(\"starting server\")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven serving2.py\n\n# Test \n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/\n```", + "text": "%md\nContents of /zeppelin/notebook/mmlspark/serving.py\n```\nimport mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName(\"SimpleContServing\").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel(\"WARN\")\n\nprint(\"creating df\")\ndf = spark.readStream.continuousServer() \\\n .address(\"0.0.0.0\", 8888, \"my_api\") \\\n .load() \\\n .parseRequest(StructType().add(\"foo\", StringType()).add(\"bar\", IntegerType()))\n\nreplies = df.withColumn(\"fooLength\", length(col(\"foo\")))\\\n .makeReply(\"fooLength\")\n\nprint(\"creating server\")\nserver = replies\\\n .writeStream \\\n .continuousServer() \\\n .trigger(continuous=\"1 second\") \\\n .replyTo(\"my_api\") \\\n .queryName(\"my_query\") \\\n .option(\"checkpointLocation\", \"file:///tmp/checkpoints\")\n\nprint(\"starting server\")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.blob.core.windows.net/maven serving2.py\n\n# Test \n# curl -X POST -d '{\"foo\":\"foolen\", \"bar\":43}' -H \"ContentType: application/json\" http://[[ip address of load balancer]]:8888/\n```", "user": "anonymous", "config": { "tableHide": false, @@ -26,7 +26,7 @@ "msg": [ { "type": "HTML", - "data": "
\n

Contents of /zeppelin/notebook/mmlspark/serving.py

\n
import mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName("SimpleContServing").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel("WARN")\n\nprint("creating df")\ndf = spark.readStream.continuousServer() \\\n    .address("0.0.0.0", 8888, "my_api") \\\n    .load() \\\n    .parseRequest(StructType().add("foo", StringType()).add("bar", IntegerType()))\n\nreplies = df.withColumn("fooLength", length(col("foo")))\\\n    .makeReply("fooLength")\n\nprint("creating server")\nserver = replies\\\n    .writeStream \\\n    .continuousServer() \\\n    .trigger(continuous="1 second") \\\n    .replyTo("my_api") \\\n    .queryName("my_query") \\\n    .option("checkpointLocation", "file:///tmp/checkpoints")\n\nprint("starting server")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven  serving2.py\n\n# Test \n# curl -X POST -d '{"foo":"foolen", "bar":43}' -H "ContentType: application/json" http://[[ip address of load balancer]]:8888/\n
\n
" + "data": "
\n

Contents of /zeppelin/notebook/mmlspark/serving.py

\n
import mmlspark\nfrom pyspark.sql.types import *\nfrom pyspark.sql import SparkSession\n\nfrom pyspark.sql.functions import length, col\n\nspark = SparkSession.builder.appName("SimpleContServing").getOrCreate()\nsc = spark.sparkContext\nsc.setLogLevel("WARN")\n\nprint("creating df")\ndf = spark.readStream.continuousServer() \\\n    .address("0.0.0.0", 8888, "my_api") \\\n    .load() \\\n    .parseRequest(StructType().add("foo", StringType()).add("bar", IntegerType()))\n\nreplies = df.withColumn("fooLength", length(col("foo")))\\\n    .makeReply("fooLength")\n\nprint("creating server")\nserver = replies\\\n    .writeStream \\\n    .continuousServer() \\\n    .trigger(continuous="1 second") \\\n    .replyTo("my_api") \\\n    .queryName("my_query") \\\n    .option("checkpointLocation", "file:///tmp/checkpoints")\n\nprint("starting server")\nquery = server.start()\nquery.awaitTermination()\n\n# Submit the server\n# .\\bin\\spark-submit --packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.blob.core.windows.net/maven  serving2.py\n\n# Test \n# curl -X POST -d '{"foo":"foolen", "bar":43}' -H "ContentType: application/json" http://[[ip address of load balancer]]:8888/\n
\n
" } ] }, diff --git a/tools/helm/zeppelin/zeppelin-env.sh b/tools/helm/zeppelin/zeppelin-env.sh index 63f4a928a52..d455f23903d 100644 --- a/tools/helm/zeppelin/zeppelin-env.sh +++ b/tools/helm/zeppelin/zeppelin-env.sh @@ -71,7 +71,7 @@ export MASTER="${SPARK_MASTER:=local[*]}" ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit ## export SPARK_HOME=/opt/spark/ # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries -# export SPARK_SUBMIT_OPTIONS="--packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.azureedge.net/maven" # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". +# export SPARK_SUBMIT_OPTIONS="--packages com.microsoft.ml.spark:mmlspark_2.11:0.14.dev42 --repositories https://mmlspark.blob.core.windows.net/maven" # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". # export SPARK_APP_NAME # (optional) The name of spark application. ## Use embedded spark binaries ## diff --git a/website/doctest.py b/website/doctest.py index 6e2fcaeebfc..d3d29cc13b4 100644 --- a/website/doctest.py +++ b/website/doctest.py @@ -19,7 +19,7 @@ def add_python_helper_to_markdown(folder, md, version): spark = (pyspark.sql.SparkSession.builder.appName("MyApp") .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:{}") - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") .getOrCreate()) def getSecret(secretName): diff --git a/website/src/pages/index.js b/website/src/pages/index.js index 8ee4f908562..c454b10896e 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -269,7 +269,7 @@ function Home() { "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -284,7 +284,7 @@ function Home() { "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -301,7 +301,7 @@ function Home() { "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:[THE_SYNAPSEML_VERSION_YOU_WANT]", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -354,7 +354,7 @@ spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.8 MyApp.jar `} > with the resolver: Ensure this library is attached to your target cluster(s). @@ -406,7 +406,7 @@ spark-submit --packages com.microsoft.azure:synapseml_2.12:1.0.8 MyApp.jar `} snippet={`import pyspark spark = (pyspark.sql.SparkSession.builder.appName("MyApp") .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.8") # Please use 1.0.8 version for Spark3.4 and 0.11.4-spark3.3 version for Spark3.3 - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") .getOrCreate()) import synapse.ml`} lang="python" @@ -416,7 +416,7 @@ import synapse.ml`} If you are building a Spark application in Scala, add the following lines to your build.sbt: diff --git a/website/versioned_docs/version-0.11.3/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-0.11.3/Explore Algorithms/Deep Learning/Getting Started.md index 26c05bd0c2c..b05f39b4fbd 100644 --- a/website/versioned_docs/version-0.11.3/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-0.11.3/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==0.11.3 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:0.11.3 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-0.11.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-0.11.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index e5d7a443d73..68e15a7d8b3 100644 --- a/website/versioned_docs/version-0.11.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-0.11.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:0.11.3 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-0.11.3/Get Started/Install SynapseML.md b/website/versioned_docs/version-0.11.3/Get Started/Install SynapseML.md index 4eca5bb3ede..59deca21f65 100644 --- a/website/versioned_docs/version-0.11.3/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-0.11.3/Get Started/Install SynapseML.md @@ -15,7 +15,7 @@ For Spark3.2 pool: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.3,org.apache.spark:spark-avro_2.12:3.3.1", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false", @@ -31,7 +31,7 @@ For Spark3.3 pool: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.3-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -49,7 +49,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.3-spark3.3 version for Spark3.3 and 0.11.3 version for Spark3.2 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:0.11.3") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -60,7 +60,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.3 version for Spark3.2 and 0.11.3-spark3.3 for Spark3.3 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "0.11.3" ``` @@ -91,7 +91,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:0.11.3` for Spark3.2 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.3-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. @@ -112,7 +112,7 @@ In Microsoft Fabric notebooks please place the following in the first cell of yo "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.3,org.apache.spark:spark-avro_2.12:3.3.1", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false", @@ -129,7 +129,7 @@ In Microsoft Fabric notebooks please place the following in the first cell of yo "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.3-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-0.11.3/Reference/Contributor Guide.md b/website/versioned_docs/version-0.11.3/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-0.11.3/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-0.11.3/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-0.11.3/Reference/R Setup.md b/website/versioned_docs/version-0.11.3/Reference/R Setup.md index fb30d1df389..00d01c116b6 100644 --- a/website/versioned_docs/version-0.11.3/Reference/R Setup.md +++ b/website/versioned_docs/version-0.11.3/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-0.11.3.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-0.11.3.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-0.11.4/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-0.11.4/Explore Algorithms/Deep Learning/Getting Started.md index bb16e7e37de..6b55194a52c 100644 --- a/website/versioned_docs/version-0.11.4/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-0.11.4/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==0.11.4 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:0.11.4 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-0.11.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-0.11.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 026b7c2d2a8..cbed2ea16b7 100644 --- a/website/versioned_docs/version-0.11.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-0.11.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:0.11.4 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-0.11.4/Get Started/Install SynapseML.md b/website/versioned_docs/version-0.11.4/Get Started/Install SynapseML.md index ec85a065900..4f964d68ac7 100644 --- a/website/versioned_docs/version-0.11.4/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-0.11.4/Get Started/Install SynapseML.md @@ -15,7 +15,7 @@ For Spark3.2 pool: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4,org.apache.spark:spark-avro_2.12:3.3.1", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false", @@ -31,7 +31,7 @@ For Spark3.3 pool: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -49,7 +49,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 0.11.4 version for Spark3.2 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:0.11.4") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -60,7 +60,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4 version for Spark3.2 and 0.11.4-spark3.3 for Spark3.3 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "0.11.4" ``` @@ -91,7 +91,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:0.11.4` for Spark3.2 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. @@ -112,7 +112,7 @@ In Microsoft Fabric notebooks please place the following in the first cell of yo "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4,org.apache.spark:spark-avro_2.12:3.3.1", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false", @@ -129,7 +129,7 @@ In Microsoft Fabric notebooks please place the following in the first cell of yo "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-0.11.4/Reference/Contributor Guide.md b/website/versioned_docs/version-0.11.4/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-0.11.4/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-0.11.4/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-0.11.4/Reference/R Setup.md b/website/versioned_docs/version-0.11.4/Reference/R Setup.md index 8fefd2f6138..a0baefa489f 100644 --- a/website/versioned_docs/version-0.11.4/Reference/R Setup.md +++ b/website/versioned_docs/version-0.11.4/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-0.11.4.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-0.11.4.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.1/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.1/Explore Algorithms/Deep Learning/Getting Started.md index b0416662ae3..8f373b7cc0d 100644 --- a/website/versioned_docs/version-1.0.1/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.1/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.1 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.1 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.1/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.1/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index a811b956e0d..3126c2d9646 100644 --- a/website/versioned_docs/version-1.0.1/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.1/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.1 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.1/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.1/Get Started/Install SynapseML.md index d6202d8ad30..9a30d1b7bb7 100644 --- a/website/versioned_docs/version-1.0.1/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.1/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.1", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.1 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.1") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.1 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.1" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.1` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.1/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.1/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.1/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.1/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.1/Reference/R Setup.md b/website/versioned_docs/version-1.0.1/Reference/R Setup.md index d44604b0c62..114f37a3745 100644 --- a/website/versioned_docs/version-1.0.1/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.1/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.1.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.1.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.2/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.2/Explore Algorithms/Deep Learning/Getting Started.md index 723270d19e1..7345be333e0 100644 --- a/website/versioned_docs/version-1.0.2/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.2/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.2 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.2 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.2/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.2/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 503af9c558c..0b0402a40db 100644 --- a/website/versioned_docs/version-1.0.2/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.2/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.2 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.2/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.2/Get Started/Install SynapseML.md index 1af8bf1f10c..22288a3680b 100644 --- a/website/versioned_docs/version-1.0.2/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.2/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.2", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.2 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.2") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.2 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.2" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.2` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.2/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.2/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.2/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.2/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.2/Reference/R Setup.md b/website/versioned_docs/version-1.0.2/Reference/R Setup.md index 7272697f61f..d02ba5a2b6c 100644 --- a/website/versioned_docs/version-1.0.2/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.2/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.2.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.2.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.3/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.3/Explore Algorithms/Deep Learning/Getting Started.md index 694acec8f0c..3d26afb5fa6 100644 --- a/website/versioned_docs/version-1.0.3/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.3/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.3 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.3 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 32df597c019..3e6a24d5763 100644 --- a/website/versioned_docs/version-1.0.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.3/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.3 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.3/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.3/Get Started/Install SynapseML.md index 7a10850c83e..521afa411cd 100644 --- a/website/versioned_docs/version-1.0.3/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.3/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.3 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.3") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.3 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.3" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.3` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.3/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.3/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.3/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.3/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.3/Reference/R Setup.md b/website/versioned_docs/version-1.0.3/Reference/R Setup.md index d9ff17c50cf..d401171b632 100644 --- a/website/versioned_docs/version-1.0.3/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.3/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.3.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.3.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.4/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.4/Explore Algorithms/Deep Learning/Getting Started.md index f097fbd3a27..aa5a7726148 100644 --- a/website/versioned_docs/version-1.0.4/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.4/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.4 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.4 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 37d05610f48..082478bd777 100644 --- a/website/versioned_docs/version-1.0.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.4/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.4 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.4/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.4/Get Started/Install SynapseML.md index e54849771fa..1c375743693 100644 --- a/website/versioned_docs/version-1.0.4/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.4/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.4", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.4 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.4") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.4 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.4" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.4` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.4/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.4/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.4/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.4/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.4/Reference/R Setup.md b/website/versioned_docs/version-1.0.4/Reference/R Setup.md index 3eae8a94358..b435d20ebff 100644 --- a/website/versioned_docs/version-1.0.4/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.4/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.4.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.4.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.5/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md b/website/versioned_docs/version-1.0.5/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md index b4c241b3160..40ce8d5d732 100644 --- a/website/versioned_docs/version-1.0.5/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md +++ b/website/versioned_docs/version-1.0.5/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md @@ -19,7 +19,7 @@ To learn more about the Isolation Forest model please refer to the original pape # "name": "synapseml", # "conf": { # "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", -# "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", +# "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", # "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", # "spark.yarn.user.classpath.first": "true", # "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-1.0.5/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.5/Explore Algorithms/Deep Learning/Getting Started.md index 23cdf72be95..3141f3d119e 100644 --- a/website/versioned_docs/version-1.0.5/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.5/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.5 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.5 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.5/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.5/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index d6fbd3dccc1..30c2365c047 100644 --- a/website/versioned_docs/version-1.0.5/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.5/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.5 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.5/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.5/Get Started/Install SynapseML.md index a15f60590c0..5dd9dec609d 100644 --- a/website/versioned_docs/version-1.0.5/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.5/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.5", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.5 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.5") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.5 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.5" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.5` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.5/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.5/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.5/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.5/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.5/Reference/R Setup.md b/website/versioned_docs/version-1.0.5/Reference/R Setup.md index d59f3d3b77e..118712fa226 100644 --- a/website/versioned_docs/version-1.0.5/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.5/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.5.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.5.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.6/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md b/website/versioned_docs/version-1.0.6/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md index b4c241b3160..40ce8d5d732 100644 --- a/website/versioned_docs/version-1.0.6/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md +++ b/website/versioned_docs/version-1.0.6/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md @@ -19,7 +19,7 @@ To learn more about the Isolation Forest model please refer to the original pape # "name": "synapseml", # "conf": { # "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", -# "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", +# "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", # "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", # "spark.yarn.user.classpath.first": "true", # "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-1.0.6/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.6/Explore Algorithms/Deep Learning/Getting Started.md index 559c37928af..ff06a5d7085 100644 --- a/website/versioned_docs/version-1.0.6/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.6/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.6 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.6 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.6/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.6/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index e34820ccf20..72f57ee0173 100644 --- a/website/versioned_docs/version-1.0.6/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.6/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.6 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.6/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.6/Get Started/Install SynapseML.md index 114cbcf8a39..3513a9dc8d1 100644 --- a/website/versioned_docs/version-1.0.6/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.6/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.6", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.6 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.6") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.6 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.6" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.6` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.6/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.6/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.6/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.6/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.6/Reference/R Setup.md b/website/versioned_docs/version-1.0.6/Reference/R Setup.md index 6d13496fd26..f4f18df7ab5 100644 --- a/website/versioned_docs/version-1.0.6/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.6/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.6.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.6.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.7/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md b/website/versioned_docs/version-1.0.7/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md index b4c241b3160..40ce8d5d732 100644 --- a/website/versioned_docs/version-1.0.7/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md +++ b/website/versioned_docs/version-1.0.7/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md @@ -19,7 +19,7 @@ To learn more about the Isolation Forest model please refer to the original pape # "name": "synapseml", # "conf": { # "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", -# "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", +# "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", # "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", # "spark.yarn.user.classpath.first": "true", # "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-1.0.7/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.7/Explore Algorithms/Deep Learning/Getting Started.md index 3ae4371d327..289097b8d0c 100644 --- a/website/versioned_docs/version-1.0.7/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.7/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.7 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.7 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.7/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.7/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 8fa5224bdb0..46a0bf9b235 100644 --- a/website/versioned_docs/version-1.0.7/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.7/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.7 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.7/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.7/Get Started/Install SynapseML.md index deb7dc7d75c..e6f7512c21f 100644 --- a/website/versioned_docs/version-1.0.7/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.7/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.7", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.7") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.7 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.7" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.7` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.7/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.7/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.7/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.7/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.7/Reference/R Setup.md b/website/versioned_docs/version-1.0.7/Reference/R Setup.md index 142b371591b..5a08953237b 100644 --- a/website/versioned_docs/version-1.0.7/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.7/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.7.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.7.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks") diff --git a/website/versioned_docs/version-1.0.8/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md b/website/versioned_docs/version-1.0.8/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md index b4c241b3160..40ce8d5d732 100644 --- a/website/versioned_docs/version-1.0.8/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md +++ b/website/versioned_docs/version-1.0.8/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.md @@ -19,7 +19,7 @@ To learn more about the Isolation Forest model please refer to the original pape # "name": "synapseml", # "conf": { # "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", -# "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", +# "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", # "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", # "spark.yarn.user.classpath.first": "true", # "spark.sql.parquet.enableVectorizedReader": "false" diff --git a/website/versioned_docs/version-1.0.8/Explore Algorithms/Deep Learning/Getting Started.md b/website/versioned_docs/version-1.0.8/Explore Algorithms/Deep Learning/Getting Started.md index d23cc8330d2..ab1efa3214f 100644 --- a/website/versioned_docs/version-1.0.8/Explore Algorithms/Deep Learning/Getting Started.md +++ b/website/versioned_docs/version-1.0.8/Explore Algorithms/Deep Learning/Getting Started.md @@ -27,7 +27,7 @@ pip install synapseml==1.0.8 An alternative is installing the SynapseML jar package in library management section, by adding: ``` Coordinate: com.microsoft.azure:synapseml_2.12:1.0.8 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven ``` :::note If you install the jar package, follow the first two cells of this [sample](../Quickstart%20-%20Fine-tune%20a%20Vision%20Classifier#environment-setup----reinstall-horovod-based-on-new-version-of-pytorch) diff --git a/website/versioned_docs/version-1.0.8/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md b/website/versioned_docs/version-1.0.8/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md index 20c2cdf152c..403c46a2d07 100644 --- a/website/versioned_docs/version-1.0.8/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md +++ b/website/versioned_docs/version-1.0.8/Explore Algorithms/Other Algorithms/Quickstart - Anomalous Access Detection.md @@ -29,7 +29,7 @@ Note: the data does NOT contain information about departments, this information 1. In Cluster Libraries install from library source Maven: Coordinates: com.microsoft.azure:synapseml_2.12:1.0.8 -Repository: https://mmlspark.azureedge.net/maven +Repository: https://mmlspark.blob.core.windows.net/maven 2. In Cluster Libraries install from PyPI the library called plotly diff --git a/website/versioned_docs/version-1.0.8/Get Started/Install SynapseML.md b/website/versioned_docs/version-1.0.8/Get Started/Install SynapseML.md index 394d45daca1..68617a1842d 100644 --- a/website/versioned_docs/version-1.0.8/Get Started/Install SynapseML.md +++ b/website/versioned_docs/version-1.0.8/Get Started/Install SynapseML.md @@ -13,7 +13,7 @@ SynapseML is already installed in Microsoft Fabric notebooks. To change the vers "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -33,7 +33,7 @@ For Spark3.4 pools "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:1.0.8", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -48,7 +48,7 @@ For Spark3.3 pools: "name": "synapseml", "conf": { "spark.jars.packages": "com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3", - "spark.jars.repositories": "https://mmlspark.azureedge.net/maven", + "spark.jars.repositories": "https://mmlspark.blob.core.windows.net/maven", "spark.jars.excludes": "org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind", "spark.yarn.user.classpath.first": "true", "spark.sql.parquet.enableVectorizedReader": "false" @@ -66,7 +66,7 @@ import pyspark spark = pyspark.sql.SparkSession.builder.appName("MyApp") \ # Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 .config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:1.0.8") \ - .config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven") \ + .config("spark.jars.repositories", "https://mmlspark.blob.core.windows.net/maven") \ .getOrCreate() import synapse.ml ``` @@ -77,7 +77,7 @@ If you're building a Spark application in Scala, add the following lines to your `build.sbt`: ```scala -resolvers += "SynapseML" at "https://mmlspark.azureedge.net/maven" +resolvers += "SynapseML" at "https://mmlspark.blob.core.windows.net/maven" // Use 0.11.4-spark3.3 version for Spark3.3 and 1.0.8 version for Spark3.4 libraryDependencies += "com.microsoft.azure" % "synapseml_2.12" % "1.0.8" ``` @@ -108,7 +108,7 @@ in your workspace. For the coordinates use: `com.microsoft.azure:synapseml_2.12:1.0.8` for Spark3.4 Cluster and `com.microsoft.azure:synapseml_2.12:0.11.4-spark3.3` for Spark3.3 Cluster; -Add the resolver: `https://mmlspark.azureedge.net/maven`. Ensure this library is +Add the resolver: `https://mmlspark.blob.core.windows.net/maven`. Ensure this library is attached to your target cluster(s). Finally, ensure that your Spark cluster has at least Spark 3.2 and Scala 2.12. diff --git a/website/versioned_docs/version-1.0.8/Reference/Contributor Guide.md b/website/versioned_docs/version-1.0.8/Reference/Contributor Guide.md index e8413400828..e44cc20f222 100644 --- a/website/versioned_docs/version-1.0.8/Reference/Contributor Guide.md +++ b/website/versioned_docs/version-1.0.8/Reference/Contributor Guide.md @@ -65,7 +65,7 @@ this process: case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API - reference documentation](https://mmlspark.azureedge.net/docs/pyspark/) + reference documentation](https://mmlspark.blob.core.windows.net/docs/pyspark/) #### Open a pull request diff --git a/website/versioned_docs/version-1.0.8/Reference/R Setup.md b/website/versioned_docs/version-1.0.8/Reference/R Setup.md index d7588702db2..c158adb93e6 100644 --- a/website/versioned_docs/version-1.0.8/Reference/R Setup.md +++ b/website/versioned_docs/version-1.0.8/Reference/R Setup.md @@ -22,7 +22,7 @@ To install the current SynapseML package for R, first install synapseml-core: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-core-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-core-0.11.0.zip") ... ``` @@ -38,11 +38,11 @@ In other words: ```R ... -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-cognitive-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-deep-learning-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-lightgbm-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-opencv-0.11.0.zip") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-vw-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-cognitive-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-deep-learning-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-lightgbm-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-opencv-0.11.0.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-vw-0.11.0.zip") ... ``` @@ -120,7 +120,7 @@ and then use spark_connect with method = "databricks": ```R install.packages("devtools") -devtools::install_url("https://mmlspark.azureedge.net/rrr/synapseml-1.0.8.zip") +devtools::install_url("https://mmlspark.blob.core.windows.net/rrr/synapseml-1.0.8.zip") library(sparklyr) library(dplyr) sc <- spark_connect(method = "databricks")