Unit tests

microsoft · Aug 2, 2021 · b56aefa · b56aefa
1 parent 01bc243
commit b56aefa
Show file tree

Hide file tree

Showing 5 changed files with 130 additions and 123 deletions.
diff --git a/build.sbt b/build.sbt
@@ -213,7 +213,7 @@ lazy val deepLearning = (project in file("deep-learning"))
   .settings(settings ++ Seq(
     libraryDependencies ++= Seq(
       "com.microsoft.cntk" % "cntk" % "2.4",
-      "com.microsoft.onnxruntime" % "onnxruntime" % "1.8.0"
+      "com.microsoft.onnxruntime" % "onnxruntime" % "1.8.1"
     ),
     name := "mmlspark-deep-learning",
   ): _*)

diff --git a/deep-learning/src/test/scala/com/microsoft/ml/spark/onnx/ONNXModelSuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/onnx/ONNXModelSuite.scala
@@ -4,29 +4,37 @@ import breeze.linalg.{argmax, argtopk}
 import com.microsoft.ml.spark.build.BuildInfo
 import com.microsoft.ml.spark.core.env.FileUtilities
 import com.microsoft.ml.spark.core.test.base.TestBase
-import com.microsoft.ml.spark.core.utils.BreezeUtils._
 import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
+import com.microsoft.ml.spark.core.utils.BreezeUtils._
 import com.microsoft.ml.spark.io.IOImplicits._
 import com.microsoft.ml.spark.opencv.ImageTransformer
 import org.apache.commons.io.FileUtils
 import org.apache.spark.injections.UDFUtils
 import org.apache.spark.ml.image.ImageSchema
 import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}
 import org.apache.spark.ml.util.MLReadable
-import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.expressions.UserDefinedFunction
-import org.apache.spark.sql.types.{FloatType, IntegerType}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{FloatType, IntegerType}
+import org.apache.spark.sql.{DataFrame, Row}
 import org.scalactic.{Equality, TolerantNumerics}
 
 import java.io.File
 import java.net.URL
 
-class ONNXModelSuite extends TestBase {
-  // with TransformerFuzzing[ONNXModel] {
-  //  override def testObjects(): Seq[TestObject[ONNXModel]] = ???
-  //
-  //  override def reader: MLReadable[_] = ???
+class ONNXModelSuite extends TestBase
+  with TransformerFuzzing[ONNXModel] {
+
+  override def testObjects(): Seq[TestObject[ONNXModel]] = Seq(
+    new TestObject(onnxIris, testDfIrisFloat),
+    new TestObject(onnxIris, testDfIrisDouble),
+    new TestObject(onnxIris, testDfIrisVector),
+    new TestObject(onnxMNIST, testDfMNIST),
+    new TestObject(onnxAdultsIncome, testDfAdultsIncome),
+    new TestObject(onnxResNet50, testDfResNet50)
+  )
+
+  override def reader: MLReadable[_] = ONNXModel
 
   private val baseUrl = "https://mmlspark.blob.core.windows.net/publicwasb/ONNXModels/"
   private implicit val eqFloat: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1E-5f)
@@ -42,23 +50,36 @@ class ONNXModelSuite extends TestBase {
     f
   }
 
-  test("ONNXModel can infer observations of matching input types") {
+  private lazy val onnxIris: ONNXModel = {
     // Making sure spark context is initialized
     spark
-
     val model = downloadModel("iris.onnx", baseUrl)
-    val onnx = new ONNXModel()
+    new ONNXModel()
       .setModelLocation(model.getPath)
       .setFeedDict(Map("float_input" -> "features"))
       .setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))
+  }
 
-    val testDf = Seq(
-      Array(6.7f, 3.1f, 4.7f, 1.5f),
-      Array(4.9f, 3.0f, 1.4f, 0.2f),
-      Array(5.8f, 2.7f, 5.1f, 1.9f)
-    ) toDF "features"
+  private lazy val testDfIrisFloat: DataFrame = Seq(
+    Array(6.7f, 3.1f, 4.7f, 1.5f),
+    Array(4.9f, 3.0f, 1.4f, 0.2f),
+    Array(5.8f, 2.7f, 5.1f, 1.9f)
+  ) toDF "features"
 
-    val predicted = onnx.transform(testDf).as[(Seq[Float], Long, Map[Long, Float])].collect()
+  private lazy val testDfIrisDouble: DataFrame = Seq(
+    Array(6.7d, 3.1d, 4.7d, 1.5d),
+    Array(4.9d, 3.0d, 1.4d, 0.2d),
+    Array(5.8d, 2.7d, 5.1d, 1.9d)
+  ) toDF "features"
+
+  private lazy val testDfIrisVector: DataFrame = Seq(
+    Tuple1(Vectors.dense(6.7d, 3.1d, 4.7d, 1.5d)),
+    Tuple1(Vectors.dense(4.9d, 3.0d, 1.4d, 0.2d)),
+    Tuple1(Vectors.dense(5.8d, 2.7d, 5.1d, 1.9d))
+  ) toDF "features"
+
+  test("ONNXModel can infer observations of matching input types") {
+    val predicted = onnxIris.transform(testDfIrisFloat).as[(Seq[Float], Long, Map[Long, Float])].collect()
 
     assert(predicted(0)._2 == 1L)
     assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
@@ -71,22 +92,7 @@ class ONNXModelSuite extends TestBase {
   }
 
   test("ONNXModel can infer observations of compatible input types") {
-    // Making sure spark context is initialized
-    spark
-
-    val model = downloadModel("iris.onnx", baseUrl)
-    val onnx = new ONNXModel()
-      .setModelLocation(model.getPath)
-      .setFeedDict(Map("float_input" -> "features"))
-      .setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))
-
-    val testDf = Seq(
-      Array(6.7d, 3.1d, 4.7d, 1.5d),
-      Array(4.9d, 3.0d, 1.4d, 0.2d),
-      Array(5.8d, 2.7d, 5.1d, 1.9d)
-    ) toDF "features"
-
-    val predicted = onnx.transform(testDf).as[(Seq[Double], Long, Map[Long, Float])].collect()
+    val predicted = onnxIris.transform(testDfIrisDouble).as[(Seq[Double], Long, Map[Long, Float])].collect()
 
     assert(predicted(0)._2 == 1L)
     assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
@@ -99,22 +105,7 @@ class ONNXModelSuite extends TestBase {
   }
 
   test("ONNXModel can infer observations of vector input types") {
-    // Making sure spark context is initialized
-    spark
-
-    val model = downloadModel("iris.onnx", baseUrl)
-    val onnx = new ONNXModel()
-      .setModelLocation(model.getPath)
-      .setFeedDict(Map("float_input" -> "features"))
-      .setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))
-
-    val testDf = Seq(
-      Tuple1(Vectors.dense(6.7d, 3.1d, 4.7d, 1.5d)),
-      Tuple1(Vectors.dense(4.9d, 3.0d, 1.4d, 0.2d)),
-      Tuple1(Vectors.dense(5.8d, 2.7d, 5.1d, 1.9d))
-    ) toDF "features"
-
-    val predicted = onnx.transform(testDf).as[(DenseVector, Long, Map[Long, Float])].collect()
+    val predicted = onnxIris.transform(testDfIrisVector).as[(DenseVector, Long, Map[Long, Float])].collect()
 
     assert(predicted(0)._2 == 1L)
     assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
@@ -126,6 +117,19 @@ class ONNXModelSuite extends TestBase {
     assert(predicted(2)._3 === Map(0L -> 5.4029905E-4f, 1L -> 0.24569187f, 2L -> 0.75376785f))
   }
 
+  private lazy val onnxMNIST: ONNXModel = {
+    // Making sure spark context is initialized
+    spark
+    val model = downloadModel("mnist-8.onnx", baseUrl)
+    new ONNXModel()
+      .setModelLocation(model.getPath)
+      .setFeedDict(Map("Input3" -> "features"))
+      .setFetchDict(Map("rawPrediction" -> "Plus214_Output_0"))
+      .setSoftMaxDict(Map("rawPrediction" -> "probability"))
+      .setArgMaxDict(Map("rawPrediction" -> "prediction"))
+      .setMiniBatchSize(1)
+  }
+
   def getLibSVM2ImageUdf(origin: String, height: Int,
                          width: Int, nChannels: Int, mode: Int): UserDefinedFunction = {
     UDFUtils.oldUdf(
@@ -139,7 +143,7 @@ class ONNXModelSuite extends TestBase {
     )
   }
 
-  test("ONNXModel can infer for MNIST model") {
+  private lazy val testDfMNIST: DataFrame = {
     val mnistDataLocation: String = {
       val loc = "/tmp/mnist.t"
       val f = new File(loc)
@@ -151,8 +155,6 @@ class ONNXModelSuite extends TestBase {
       loc
     }
 
-    val model = downloadModel("mnist-8.onnx", baseUrl)
-
     val libSVM2ImageFunc = getLibSVM2ImageUdf(
       origin = "mnist.t",
       height = 28,
@@ -174,20 +176,14 @@ class ONNXModelSuite extends TestBase {
       .setOutputCol("features")
       .resize(28, 28)
       .centerCrop(28, 28)
-      .normalize(Array(0d), Array(1d), 255)
+      .normalize(mean = Array(0d), std = Array(1d), colorScaleFactor = 1d / 255d)
       .setTensorElementType(FloatType)
 
-    val testDf = imageTransformer.transform(imageDf).cache()
-
-    val mnistModel = new ONNXModel()
-      .setModelLocation(model.getPath)
-      .setFeedDict(Map("Input3" -> "features"))
-      .setFetchDict(Map("rawPrediction"-> "Plus214_Output_0"))
-      .setSoftMaxDict(Map("rawPrediction" -> "probability"))
-      .setArgMaxDict(Map("rawPrediction" -> "prediction"))
-      .setMiniBatchSize(1)
+    imageTransformer.transform(imageDf).cache()
+  }
 
-    val prediction = mnistModel.transform(testDf)
+  test("ONNXModel can infer for MNIST model") {
+    val prediction = onnxMNIST.transform(testDfMNIST)
       .select("label", "rawPrediction", "probability", "prediction")
 
     val rows = prediction.as[(Int, Array[Float], Vector, Double)].head(10)
@@ -204,30 +200,36 @@ class ONNXModelSuite extends TestBase {
     }
   }
 
-  test("ONNXModel can translate zipmap output properly") {
-    val features = Array("Age", "WorkClass", "fnlwgt", "Education", "EducationNum", "MaritalStatus", "Occupation",
-      "Relationship", "Race", "Gender", "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry")
+  private lazy val featuresAdultsIncome = Array("Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
+    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender", "CapitalGain", "CapitalLoss", "HoursPerWeek",
+    "NativeCountry")
 
+  private lazy val onnxAdultsIncome = {
+    spark
+    val model = downloadModel("adults_income.onnx", baseUrl)
+    new ONNXModel()
+      .setModelLocation(model.getPath)
+      .setFeedDict(featuresAdultsIncome.map(v => (v, v)).toMap)
+      .setFetchDict(Map("probability" -> "output_probability"))
+      .setArgMaxDict(Map("probability" -> "prediction"))
+  }
+
+  private lazy val testDfAdultsIncome = {
     val testDf = Seq(
       (39L, " State-gov", 77516L, " Bachelors", 13L, " Never-married", " Adm-clerical",
         " Not-in-family", " White", " Male", 2174L, 0L, 40L, " United-States"),
       (52L, " Self-emp-not-inc", 209642L, " Doctorate", 16L, " Married-civ-spouse", " Exec-managerial",
         " Husband", " White", " Male", 0L, 0L, 45L, " United-States")
-    ).toDF(features: _*)
+    ).toDF(featuresAdultsIncome: _*)
 
-    val converted = features.foldLeft(testDf) {
+    featuresAdultsIncome.foldLeft(testDf) {
       case (acc, feature) =>
         acc.withColumn(feature, array(col(feature)))
     }.repartition(1)
+  }
 
-    val model = downloadModel("adults_income.onnx", baseUrl)
-    val onnx = new ONNXModel()
-      .setModelLocation(model.getPath)
-      .setFeedDict(features.map(v => (v, v)).toMap)
-      .setFetchDict(Map("probability"-> "output_probability"))
-      .setArgMaxDict(Map("probability" -> "prediction"))
-
-    val Array(row1, row2) = onnx.transform(converted)
+  test("ONNXModel can translate zipmap output properly") {
+    val Array(row1, row2) = onnxAdultsIncome.transform(testDfAdultsIncome)
       .select("probability", "prediction")
       .orderBy(col("prediction"))
       .as[(Map[Long, Float], Double)]
@@ -240,7 +242,19 @@ class ONNXModelSuite extends TestBase {
     assert(row2._2 == 1.0)
   }
 
-  test("ONNXModel can infer for resnet50 model") {
+  private lazy val onnxResNet50 = {
+    spark
+    val model = downloadModel("resnet50-v2-7.onnx", baseUrl)
+    new ONNXModel()
+      .setModelLocation(model.getPath)
+      .setFeedDict(Map("data" -> "features"))
+      .setFetchDict(Map("rawPrediction" -> "resnetv24_dense0_fwd"))
+      .setSoftMaxDict(Map("rawPrediction" -> "probability"))
+      .setArgMaxDict(Map("rawPrediction" -> "prediction"))
+      .setMiniBatchSize(1)
+  }
+
+  private lazy val testDfResNet50: DataFrame = {
     val greyhoundImageLocation: String = {
       val loc = "/tmp/greyhound.jpg"
       val f = new File(loc)
@@ -252,27 +266,19 @@ class ONNXModelSuite extends TestBase {
     }
 
     val imageDf = spark.read.image.load(greyhoundImageLocation)
-
     val imageTransformer = new ImageTransformer()
       .setInputCol("image")
       .setOutputCol("features")
       .resize(224, 224)
       .centerCrop(224, 224)
-      .normalize(Array(0.485, 0.456, 0.406), Array(0.229, 0.224, 0.225), 255)
+      .normalize(mean = Array(0.485, 0.456, 0.406), std = Array(0.229, 0.224, 0.225), colorScaleFactor = 1d / 255d)
       .setTensorElementType(FloatType)
 
-    val testDf = imageTransformer.transform(imageDf).cache()
-
-    val model = downloadModel("resnet50-v2-7.onnx", baseUrl)
-    val onnx = new ONNXModel()
-      .setModelLocation(model.getPath)
-      .setFeedDict(Map("data" -> "features"))
-      .setFetchDict(Map("rawPrediction" -> "resnetv24_dense0_fwd"))
-      .setSoftMaxDict(Map("rawPrediction" -> "probability"))
-      .setArgMaxDict(Map("rawPrediction" -> "prediction"))
-      .setMiniBatchSize(1)
+    imageTransformer.transform(imageDf).cache()
+  }
 
-    val (probability, prediction) = onnx.transform(testDf)
+  test("ONNXModel can infer for resnet50 model") {
+    val (probability, prediction) = onnxResNet50.transform(testDfResNet50)
       .select("probability", "prediction")
       .as[(Vector, Double)]
       .head