Input output swap (#2239)

* All the code changes in, and most of the tests updated. * all the tests pass * 1 - Changing the "source" parameter name and field in the Columninfo classes, to be "sourceColumnName", as suggested. Changing the "name" parameter to "outputColumnName" in the: - estimator extension APIs - estimator ctors - column pairs expressed through tuples, because in context it reads better than name. Note: in the columnInfo classes i left it to "name" because "outputColumnName" makes no sense. 2 - Nit on standartizing the XML comments. 3 - Arranging the order of the parameters to be: outputColumnName, required parameters, nullable sourceColumnName.
dotnet · Jan 29, 2019 · e383091 · e383091
1 parent 22ea6d1
commit e383091
Show file tree

Hide file tree

Showing 171 changed files with 2,216 additions and 2,138 deletions.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
@@ -48,14 +48,14 @@ public static void FeatureSelectionTransform()
             // In this example we define a CountFeatureSelectingEstimator, that selects slots in a feature vector that have more non-default 
             // values than the specified count. This transformation can be used to remove slots with too many missing values.
             var countSelectEst = ml.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
-                inputColumn: "Features", outputColumn: "FeaturesCountSelect", count: 695);
+                outputColumnName: "FeaturesCountSelect", inputColumnName: "Features", count: 695);
 
             // We also define a MutualInformationFeatureSelectingEstimator that selects the top k slots in a feature 
             // vector based on highest mutual information between that slot and a specified label. Notice that it is possible to 
             // specify the parameter `numBins', which controls the number of bins used in the approximation of the mutual information
             // between features and label.
             var mutualInfoEst = ml.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(
-                inputColumn: "FeaturesCountSelect", outputColumn: "FeaturesMISelect", labelColumn: "Label", slotsInOutput: 5);
+                outputColumnName: "FeaturesMISelect", inputColumnName: "FeaturesCountSelect", labelColumn: "Label", slotsInOutput: 5);
 
             // Now, we can put the previous two transformations together in a pipeline.
             var pipeline = countSelectEst.Append(mutualInfoEst);

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs
@@ -32,15 +32,15 @@ public static void KeyToValue_Term()
             string defaultColumnName = "DefaultKeys";
             // REVIEW create through the catalog extension
             var default_pipeline = new WordTokenizingEstimator(ml, "Review")
-                .Append(new ValueToKeyMappingEstimator(ml, "Review", defaultColumnName));
+                .Append(new ValueToKeyMappingEstimator(ml, defaultColumnName, "Review"));
 
             // Another pipeline, that customizes the advanced settings of the TermEstimator.
             // We can change the maxNumTerm to limit how many keys will get generated out of the set of words, 
             // and condition the order in which they get evaluated by changing sort from the default Occurence (order in which they get encountered) 
             // to value/alphabetically.
             string customizedColumnName = "CustomizedKeys";
             var customized_pipeline = new WordTokenizingEstimator(ml, "Review")
-                .Append(new ValueToKeyMappingEstimator(ml, "Review", customizedColumnName, maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value));
+                .Append(new ValueToKeyMappingEstimator(ml,customizedColumnName,  "Review", maxNumTerms: 10, sort: ValueToKeyMappingTransformer.SortOrder.Value));
 
             // The transformed data.
             var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData);

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -26,9 +26,9 @@ public static void NgramTransform()
             // A pipeline to tokenize text as characters and then combine them together into ngrams
             // The pipeline uses the default settings to featurize.
 
-            var charsPipeline = ml.Transforms.Text.TokenizeCharacters("SentimentText", "Chars", useMarkerCharacters:false);
-            var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("Chars", "CharsUnigrams", ngramLength:1);
-            var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("Chars", "CharsTwograms");
+            var charsPipeline = ml.Transforms.Text.TokenizeCharacters("Chars", "SentimentText", useMarkerCharacters:false);
+            var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength:1);
+            var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
             var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
             var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);
 

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
@@ -33,7 +33,7 @@ public static void Normalizer()
             var transformer = pipeline.Fit(trainData);
 
             var modelParams = transformer.Columns
-                                         .First(x => x.Output == "Induced")
+                                         .First(x => x.Name == "Induced")
                                          .ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters<float>;
 
             Console.WriteLine($"The normalization parameters are: Scale = {modelParams.Scale} and Offset = {modelParams.Offset}");
@@ -66,7 +66,7 @@ public static void Normalizer()
 
             // Composing a different pipeline if we wanted to normalize more than one column at a time. 
             // Using log scale as the normalization mode. 
-            var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new[] { ("Induced", "LogInduced"), ("Spontaneous", "LogSpontaneous") });
+            var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
             // The transformed data.
             var multiColtransformer = multiColPipeline.Fit(trainData);
             var multiColtransformedData = multiColtransformer.Transform(trainData);
@@ -97,7 +97,7 @@ public static void Normalizer()
 
             // Inspect the weights of normalizing the columns
             var multiColModelParams = multiColtransformer.Columns
-                .First(x=> x.Output == "LogInduced")
+                .First(x=> x.Name == "LogInduced")
                 .ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters<float>;
 
             Console.WriteLine($"The normalization parameters are: Mean = {multiColModelParams.Mean} and Stddev = {multiColModelParams.Stddev}");

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs
@@ -35,7 +35,7 @@ public static void OnnxTransformSample()
             var mlContext = new MLContext();
             var data = GetTensorData();
             var idv = mlContext.Data.ReadFromEnumerable(data);
-            var pipeline = new OnnxScoringEstimator(mlContext, modelPath, new[] { inputInfo.Key }, new[] { outputInfo.Key });
+            var pipeline = new OnnxScoringEstimator(mlContext, new[] { outputInfo.Key }, new[] { inputInfo.Key }, modelPath);
 
             // Run the pipeline and get the transformed values
             var transformedValues = pipeline.Fit(idv).Transform(idv);

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs
@@ -22,8 +22,8 @@ public static void TensorFlowScoringSample()
             // Create a ML pipeline.
             var pipeline = mlContext.Transforms.ScoreTensorFlowModel(
                 modelLocation, 
-                new[] { nameof(TensorData.input) }, 
-                new[] { nameof(OutputScores.output) });
+                new[] { nameof(OutputScores.output) },
+                new[] { nameof(TensorData.input) });
 
             // Run the pipeline and get the transformed values.
             var estimator = pipeline.Fit(idv);

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
@@ -27,11 +27,11 @@ public static void TextTransform()
             // A pipeline for featurization of the "SentimentText" column, and placing the output in a new column named "DefaultTextFeatures"
             // The pipeline uses the default settings to featurize.
             string defaultColumnName = "DefaultTextFeatures";
-            var default_pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", defaultColumnName);
+            var default_pipeline = ml.Transforms.Text.FeaturizeText(defaultColumnName , "SentimentText");
 
             // Another pipeline, that customizes the advanced settings of the FeaturizeText transformer.
             string customizedColumnName = "CustomizedTextFeatures";
-            var customized_pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", customizedColumnName, s =>
+            var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, "SentimentText", s =>
             {
                 s.KeepPunctuations = false;
                 s.KeepNumbers = false;

diff --git a/src/Microsoft.ML.Core/Data/IEstimator.cs b/src/Microsoft.ML.Core/Data/IEstimator.cs
@@ -75,29 +75,29 @@ internal Column(string name, VectorKind vecKind, ColumnType itemType, bool isKey
             }
 
             /// <summary>
-            /// Returns whether <paramref name="inputColumn"/> is a valid input, if this object represents a
+            /// Returns whether <paramref name="source"/> is a valid input, if this object represents a
             /// requirement.
             ///
             /// Namely, it returns true iff:
             ///  - The <see cref="Name"/>, <see cref="Kind"/>, <see cref="ItemType"/>, <see cref="IsKey"/> fields match.
-            ///  - The columns of <see cref="Metadata"/> of <paramref name="inputColumn"/> is a superset of our <see cref="Metadata"/> columns.
+            ///  - The columns of <see cref="Metadata"/> of <paramref name="source"/> is a superset of our <see cref="Metadata"/> columns.
             ///  - Each such metadata column is itself compatible with the input metadata column.
             /// </summary>
             [BestFriend]
-            internal bool IsCompatibleWith(Column inputColumn)
+            internal bool IsCompatibleWith(Column source)
             {
-                Contracts.Check(inputColumn.IsValid, nameof(inputColumn));
-                if (Name != inputColumn.Name)
+                Contracts.Check(source.IsValid, nameof(source));
+                if (Name != source.Name)
                     return false;
-                if (Kind != inputColumn.Kind)
+                if (Kind != source.Kind)
                     return false;
-                if (!ItemType.Equals(inputColumn.ItemType))
+                if (!ItemType.Equals(source.ItemType))
                     return false;
-                if (IsKey != inputColumn.IsKey)
+                if (IsKey != source.IsKey)
                     return false;
                 foreach (var metaCol in Metadata)
                 {
-                    if (!inputColumn.Metadata.TryFindColumn(metaCol.Name, out var inputMetaCol))
+                    if (!source.Metadata.TryFindColumn(metaCol.Name, out var inputMetaCol))
                         return false;
                     if (!metaCol.IsCompatibleWith(inputMetaCol))
                         return false;

diff --git a/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/AnomalyDetectionEvaluator.cs
@@ -708,11 +708,11 @@ private protected override void PrintFoldResultsCore(IChannel ch, Dictionary<str
             var pFormatName = string.Format(FoldDrAtPFormat, _p);
             var numAnomName = string.Format(FoldDrAtNumAnomaliesFormat, numAnomalies);
 
-            (string Source, string Name)[] cols =
+            (string name, string source)[] cols =
             {
-                (AnomalyDetectionEvaluator.OverallMetrics.DrAtK, kFormatName),
-                (AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr, pFormatName),
-                (AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos, numAnomName)
+                (kFormatName, AnomalyDetectionEvaluator.OverallMetrics.DrAtK),
+                (pFormatName, AnomalyDetectionEvaluator.OverallMetrics.DrAtPFpr),
+                (numAnomName, AnomalyDetectionEvaluator.OverallMetrics.DrAtNumPos)
             };
 
             // List of columns to keep, note that the order specified determines the order of the output

diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs
@@ -1198,11 +1198,11 @@ private protected override void PrintFoldResultsCore(IChannel ch, Dictionary<str
             if (!metrics.TryGetValue(MetricKinds.ConfusionMatrix, out conf))
                 throw ch.Except("No overall metrics found");
 
-            (string Source, string Name)[] cols =
+            (string name, string source)[] cols =
             {
-                (BinaryClassifierEvaluator.Accuracy, FoldAccuracy),
-                (BinaryClassifierEvaluator.LogLoss, FoldLogLoss),
-                (BinaryClassifierEvaluator.LogLossReduction, FoldLogLosRed)
+                (FoldAccuracy, BinaryClassifierEvaluator.Accuracy),
+                (FoldLogLoss, BinaryClassifierEvaluator.LogLoss),
+                (FoldLogLosRed, BinaryClassifierEvaluator.LogLossReduction)
             };
 
             var colsToKeep = new List<string>();

diff --git a/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs
@@ -230,7 +230,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)
 
             // Make a list of column names that Maml outputs as part of the per-instance data view, and then wrap
             // the per-instance data computed by the evaluator in a SelectColumnsTransform.
-            var cols = new List<(string Source, string Name)>();
+            var cols = new List<(string name, string source)>();
             var colsToKeep = new List<string>();
 
             // If perInst is the result of cross-validation and contains a fold Id column, include it.
@@ -241,7 +241,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)
             // Maml always outputs a name column, if it doesn't exist add a GenerateNumberTransform.
             if (perInst.Schema.Name?.Name is string nameName)
             {
-                cols.Add((nameName, "Instance"));
+                cols.Add(("Instance", nameName));
                 colsToKeep.Add("Instance");
             }
             else

diff --git a/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MultiClassClassifierEvaluator.cs
@@ -950,7 +950,7 @@ private protected override IDataView GetOverallResultsCore(IDataView overall)
 
         private IDataView ChangeTopKAccColumnName(IDataView input)
         {
-            input = new ColumnCopyingTransformer(Host, (MultiClassClassifierEvaluator.TopKAccuracy, string.Format(TopKAccuracyFormat, _outputTopKAcc))).Transform(input);
+            input = new ColumnCopyingTransformer(Host, (string.Format(TopKAccuracyFormat, _outputTopKAcc), MultiClassClassifierEvaluator.TopKAccuracy)).Transform(input);
             return ColumnSelectingTransformer.CreateDrop(Host, input, MultiClassClassifierEvaluator.TopKAccuracy);
         }
 

diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs
@@ -154,9 +154,9 @@ private void EnsureStratificationColumn(ref IDataView data, ref string stratific
                         stratificationColumn = string.Format("{0}_{1:000}", origStratCol, ++inc);
                     HashingTransformer.ColumnInfo columnInfo;
                     if (seed.HasValue)
-                        columnInfo = new HashingTransformer.ColumnInfo(origStratCol, stratificationColumn, 30, seed.Value);
+                        columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30, seed.Value);
                     else
-                        columnInfo = new HashingTransformer.ColumnInfo(origStratCol, stratificationColumn, 30);
+                        columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30);
                     data = new HashingEstimator(Host, columnInfo).Fit(data).Transform(data);
                 }
             }

diff --git a/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingEstimator.cs b/src/Microsoft.ML.Data/Transforms/ColumnConcatenatingEstimator.cs
@@ -21,20 +21,20 @@ public sealed class ColumnConcatenatingEstimator : IEstimator<ITransformer>
         /// Initializes a new instance of <see cref="ColumnConcatenatingEstimator"/>
         /// </summary>
         /// <param name="env">The local instance of <see cref="IHostEnvironment"/>.</param>
-        /// <param name="outputColumn">The name of the resulting column.</param>
-        /// <param name="inputColumns">The columns to concatenate together.</param>
-        public ColumnConcatenatingEstimator (IHostEnvironment env, string outputColumn, params string[] inputColumns)
+        /// <param name="outputColumnName">The name of the resulting column.</param>
+        /// <param name="inputColumnNames">The columns to concatenate together.</param>
+        public ColumnConcatenatingEstimator(IHostEnvironment env, string outputColumnName, params string[] inputColumnNames)
         {
             Contracts.CheckValue(env, nameof(env));
             _host = env.Register("ColumnConcatenatingEstimator ");
 
-            _host.CheckNonEmpty(outputColumn, nameof(outputColumn));
-            _host.CheckValue(inputColumns, nameof(inputColumns));
-            _host.CheckParam(!inputColumns.Any(r => string.IsNullOrEmpty(r)), nameof(inputColumns),
+            _host.CheckNonEmpty(outputColumnName, nameof(outputColumnName));
+            _host.CheckValue(inputColumnNames, nameof(inputColumnNames));
+            _host.CheckParam(!inputColumnNames.Any(r => string.IsNullOrEmpty(r)), nameof(inputColumnNames),
                 "Contained some null or empty items");
 
-            _name = outputColumn;
-            _source = inputColumns;
+            _name = outputColumnName;
+            _source = inputColumnNames;
         }
 
         public ITransformer Fit(IDataView input)