Skip to content

Commit

Permalink
Unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
memoryz committed Aug 2, 2021
1 parent 01bc243 commit b56aefa
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 123 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ lazy val deepLearning = (project in file("deep-learning"))
.settings(settings ++ Seq(
libraryDependencies ++= Seq(
"com.microsoft.cntk" % "cntk" % "2.4",
"com.microsoft.onnxruntime" % "onnxruntime" % "1.8.0"
"com.microsoft.onnxruntime" % "onnxruntime" % "1.8.1"
),
name := "mmlspark-deep-learning",
): _*)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,37 @@ import breeze.linalg.{argmax, argtopk}
import com.microsoft.ml.spark.build.BuildInfo
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.core.utils.BreezeUtils._
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import com.microsoft.ml.spark.core.utils.BreezeUtils._
import com.microsoft.ml.spark.io.IOImplicits._
import com.microsoft.ml.spark.opencv.ImageTransformer
import org.apache.commons.io.FileUtils
import org.apache.spark.injections.UDFUtils
import org.apache.spark.ml.image.ImageSchema
import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.types.{FloatType, IntegerType}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{FloatType, IntegerType}
import org.apache.spark.sql.{DataFrame, Row}
import org.scalactic.{Equality, TolerantNumerics}

import java.io.File
import java.net.URL

class ONNXModelSuite extends TestBase {
// with TransformerFuzzing[ONNXModel] {
// override def testObjects(): Seq[TestObject[ONNXModel]] = ???
//
// override def reader: MLReadable[_] = ???
class ONNXModelSuite extends TestBase
with TransformerFuzzing[ONNXModel] {

override def testObjects(): Seq[TestObject[ONNXModel]] = Seq(
new TestObject(onnxIris, testDfIrisFloat),
new TestObject(onnxIris, testDfIrisDouble),
new TestObject(onnxIris, testDfIrisVector),
new TestObject(onnxMNIST, testDfMNIST),
new TestObject(onnxAdultsIncome, testDfAdultsIncome),
new TestObject(onnxResNet50, testDfResNet50)
)

override def reader: MLReadable[_] = ONNXModel

private val baseUrl = "https://mmlspark.blob.core.windows.net/publicwasb/ONNXModels/"
private implicit val eqFloat: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1E-5f)
Expand All @@ -42,23 +50,36 @@ class ONNXModelSuite extends TestBase {
f
}

test("ONNXModel can infer observations of matching input types") {
private lazy val onnxIris: ONNXModel = {
// Making sure spark context is initialized
spark

val model = downloadModel("iris.onnx", baseUrl)
val onnx = new ONNXModel()
new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("float_input" -> "features"))
.setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))
}

val testDf = Seq(
Array(6.7f, 3.1f, 4.7f, 1.5f),
Array(4.9f, 3.0f, 1.4f, 0.2f),
Array(5.8f, 2.7f, 5.1f, 1.9f)
) toDF "features"
private lazy val testDfIrisFloat: DataFrame = Seq(
Array(6.7f, 3.1f, 4.7f, 1.5f),
Array(4.9f, 3.0f, 1.4f, 0.2f),
Array(5.8f, 2.7f, 5.1f, 1.9f)
) toDF "features"

val predicted = onnx.transform(testDf).as[(Seq[Float], Long, Map[Long, Float])].collect()
private lazy val testDfIrisDouble: DataFrame = Seq(
Array(6.7d, 3.1d, 4.7d, 1.5d),
Array(4.9d, 3.0d, 1.4d, 0.2d),
Array(5.8d, 2.7d, 5.1d, 1.9d)
) toDF "features"

private lazy val testDfIrisVector: DataFrame = Seq(
Tuple1(Vectors.dense(6.7d, 3.1d, 4.7d, 1.5d)),
Tuple1(Vectors.dense(4.9d, 3.0d, 1.4d, 0.2d)),
Tuple1(Vectors.dense(5.8d, 2.7d, 5.1d, 1.9d))
) toDF "features"

test("ONNXModel can infer observations of matching input types") {
val predicted = onnxIris.transform(testDfIrisFloat).as[(Seq[Float], Long, Map[Long, Float])].collect()

assert(predicted(0)._2 == 1L)
assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
Expand All @@ -71,22 +92,7 @@ class ONNXModelSuite extends TestBase {
}

test("ONNXModel can infer observations of compatible input types") {
// Making sure spark context is initialized
spark

val model = downloadModel("iris.onnx", baseUrl)
val onnx = new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("float_input" -> "features"))
.setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))

val testDf = Seq(
Array(6.7d, 3.1d, 4.7d, 1.5d),
Array(4.9d, 3.0d, 1.4d, 0.2d),
Array(5.8d, 2.7d, 5.1d, 1.9d)
) toDF "features"

val predicted = onnx.transform(testDf).as[(Seq[Double], Long, Map[Long, Float])].collect()
val predicted = onnxIris.transform(testDfIrisDouble).as[(Seq[Double], Long, Map[Long, Float])].collect()

assert(predicted(0)._2 == 1L)
assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
Expand All @@ -99,22 +105,7 @@ class ONNXModelSuite extends TestBase {
}

test("ONNXModel can infer observations of vector input types") {
// Making sure spark context is initialized
spark

val model = downloadModel("iris.onnx", baseUrl)
val onnx = new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("float_input" -> "features"))
.setFetchDict(Map("prediction" -> "output_label", "rawProbability" -> "output_probability"))

val testDf = Seq(
Tuple1(Vectors.dense(6.7d, 3.1d, 4.7d, 1.5d)),
Tuple1(Vectors.dense(4.9d, 3.0d, 1.4d, 0.2d)),
Tuple1(Vectors.dense(5.8d, 2.7d, 5.1d, 1.9d))
) toDF "features"

val predicted = onnx.transform(testDf).as[(DenseVector, Long, Map[Long, Float])].collect()
val predicted = onnxIris.transform(testDfIrisVector).as[(DenseVector, Long, Map[Long, Float])].collect()

assert(predicted(0)._2 == 1L)
assert(predicted(0)._3 === Map(0L -> 0.0032624616f, 1L -> 0.78214455f, 2L -> 0.214593f))
Expand All @@ -126,6 +117,19 @@ class ONNXModelSuite extends TestBase {
assert(predicted(2)._3 === Map(0L -> 5.4029905E-4f, 1L -> 0.24569187f, 2L -> 0.75376785f))
}

private lazy val onnxMNIST: ONNXModel = {
// Making sure spark context is initialized
spark
val model = downloadModel("mnist-8.onnx", baseUrl)
new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("Input3" -> "features"))
.setFetchDict(Map("rawPrediction" -> "Plus214_Output_0"))
.setSoftMaxDict(Map("rawPrediction" -> "probability"))
.setArgMaxDict(Map("rawPrediction" -> "prediction"))
.setMiniBatchSize(1)
}

def getLibSVM2ImageUdf(origin: String, height: Int,
width: Int, nChannels: Int, mode: Int): UserDefinedFunction = {
UDFUtils.oldUdf(
Expand All @@ -139,7 +143,7 @@ class ONNXModelSuite extends TestBase {
)
}

test("ONNXModel can infer for MNIST model") {
private lazy val testDfMNIST: DataFrame = {
val mnistDataLocation: String = {
val loc = "/tmp/mnist.t"
val f = new File(loc)
Expand All @@ -151,8 +155,6 @@ class ONNXModelSuite extends TestBase {
loc
}

val model = downloadModel("mnist-8.onnx", baseUrl)

val libSVM2ImageFunc = getLibSVM2ImageUdf(
origin = "mnist.t",
height = 28,
Expand All @@ -174,20 +176,14 @@ class ONNXModelSuite extends TestBase {
.setOutputCol("features")
.resize(28, 28)
.centerCrop(28, 28)
.normalize(Array(0d), Array(1d), 255)
.normalize(mean = Array(0d), std = Array(1d), colorScaleFactor = 1d / 255d)
.setTensorElementType(FloatType)

val testDf = imageTransformer.transform(imageDf).cache()

val mnistModel = new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("Input3" -> "features"))
.setFetchDict(Map("rawPrediction"-> "Plus214_Output_0"))
.setSoftMaxDict(Map("rawPrediction" -> "probability"))
.setArgMaxDict(Map("rawPrediction" -> "prediction"))
.setMiniBatchSize(1)
imageTransformer.transform(imageDf).cache()
}

val prediction = mnistModel.transform(testDf)
test("ONNXModel can infer for MNIST model") {
val prediction = onnxMNIST.transform(testDfMNIST)
.select("label", "rawPrediction", "probability", "prediction")

val rows = prediction.as[(Int, Array[Float], Vector, Double)].head(10)
Expand All @@ -204,30 +200,36 @@ class ONNXModelSuite extends TestBase {
}
}

test("ONNXModel can translate zipmap output properly") {
val features = Array("Age", "WorkClass", "fnlwgt", "Education", "EducationNum", "MaritalStatus", "Occupation",
"Relationship", "Race", "Gender", "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry")
private lazy val featuresAdultsIncome = Array("Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender", "CapitalGain", "CapitalLoss", "HoursPerWeek",
"NativeCountry")

private lazy val onnxAdultsIncome = {
spark
val model = downloadModel("adults_income.onnx", baseUrl)
new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(featuresAdultsIncome.map(v => (v, v)).toMap)
.setFetchDict(Map("probability" -> "output_probability"))
.setArgMaxDict(Map("probability" -> "prediction"))
}

private lazy val testDfAdultsIncome = {
val testDf = Seq(
(39L, " State-gov", 77516L, " Bachelors", 13L, " Never-married", " Adm-clerical",
" Not-in-family", " White", " Male", 2174L, 0L, 40L, " United-States"),
(52L, " Self-emp-not-inc", 209642L, " Doctorate", 16L, " Married-civ-spouse", " Exec-managerial",
" Husband", " White", " Male", 0L, 0L, 45L, " United-States")
).toDF(features: _*)
).toDF(featuresAdultsIncome: _*)

val converted = features.foldLeft(testDf) {
featuresAdultsIncome.foldLeft(testDf) {
case (acc, feature) =>
acc.withColumn(feature, array(col(feature)))
}.repartition(1)
}

val model = downloadModel("adults_income.onnx", baseUrl)
val onnx = new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(features.map(v => (v, v)).toMap)
.setFetchDict(Map("probability"-> "output_probability"))
.setArgMaxDict(Map("probability" -> "prediction"))

val Array(row1, row2) = onnx.transform(converted)
test("ONNXModel can translate zipmap output properly") {
val Array(row1, row2) = onnxAdultsIncome.transform(testDfAdultsIncome)
.select("probability", "prediction")
.orderBy(col("prediction"))
.as[(Map[Long, Float], Double)]
Expand All @@ -240,7 +242,19 @@ class ONNXModelSuite extends TestBase {
assert(row2._2 == 1.0)
}

test("ONNXModel can infer for resnet50 model") {
private lazy val onnxResNet50 = {
spark
val model = downloadModel("resnet50-v2-7.onnx", baseUrl)
new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("data" -> "features"))
.setFetchDict(Map("rawPrediction" -> "resnetv24_dense0_fwd"))
.setSoftMaxDict(Map("rawPrediction" -> "probability"))
.setArgMaxDict(Map("rawPrediction" -> "prediction"))
.setMiniBatchSize(1)
}

private lazy val testDfResNet50: DataFrame = {
val greyhoundImageLocation: String = {
val loc = "/tmp/greyhound.jpg"
val f = new File(loc)
Expand All @@ -252,27 +266,19 @@ class ONNXModelSuite extends TestBase {
}

val imageDf = spark.read.image.load(greyhoundImageLocation)

val imageTransformer = new ImageTransformer()
.setInputCol("image")
.setOutputCol("features")
.resize(224, 224)
.centerCrop(224, 224)
.normalize(Array(0.485, 0.456, 0.406), Array(0.229, 0.224, 0.225), 255)
.normalize(mean = Array(0.485, 0.456, 0.406), std = Array(0.229, 0.224, 0.225), colorScaleFactor = 1d / 255d)
.setTensorElementType(FloatType)

val testDf = imageTransformer.transform(imageDf).cache()

val model = downloadModel("resnet50-v2-7.onnx", baseUrl)
val onnx = new ONNXModel()
.setModelLocation(model.getPath)
.setFeedDict(Map("data" -> "features"))
.setFetchDict(Map("rawPrediction" -> "resnetv24_dense0_fwd"))
.setSoftMaxDict(Map("rawPrediction" -> "probability"))
.setArgMaxDict(Map("rawPrediction" -> "prediction"))
.setMiniBatchSize(1)
imageTransformer.transform(imageDf).cache()
}

val (probability, prediction) = onnx.transform(testDf)
test("ONNXModel can infer for resnet50 model") {
val (probability, prediction) = onnxResNet50.transform(testDfResNet50)
.select("probability", "prediction")
.as[(Vector, Double)]
.head
Expand Down
Loading

0 comments on commit b56aefa

Please sign in to comment.