Skip to content

Commit 356b3a4

Browse files
authored
Cumulative bug bash fixes (#3539)
1 parent 29e7dda commit 356b3a4

File tree

8 files changed

+178
-152
lines changed

8 files changed

+178
-152
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System;
2-
using System.Collections.Generic;
32
using Microsoft.ML;
43

54
namespace Samples.Dynamic
@@ -12,24 +11,17 @@ public static void Example()
1211
// as a catalog of available operations and as the source of randomness.
1312
var mlContext = new MLContext();
1413

15-
// Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
16-
IEnumerable<Microsoft.ML.SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorFloatWeightSample> enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(5);
17-
var data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
14+
// Get a small dataset as an IEnumerable.
15+
var rawData = new[] {
16+
new DataPoint() { Label = true, Feature = 1.017325f},
17+
new DataPoint() { Label = false, Feature = 0.6326591f},
18+
new DataPoint() { Label = false, Feature = 0.0326252f},
19+
new DataPoint() { Label = false, Feature = 0.8426974f},
20+
new DataPoint() { Label = true, Feature = 0.9947656f},
21+
new DataPoint() { Label = true, Feature = 1.017325f},
22+
};
1823

19-
// Look at the original dataset
20-
Console.WriteLine($"Label\tFeatures[0]");
21-
foreach (var row in enumerableOfData)
22-
{
23-
Console.WriteLine($"{row.Label}\t{row.Features[0]}");
24-
}
25-
Console.WriteLine();
26-
// Expected output:
27-
// Label Features[0]
28-
// True 1.017325
29-
// False 0.6326591
30-
// False 0.0326252
31-
// True 0.8426974
32-
// True 0.9947656
24+
var data = mlContext.Data.LoadFromEnumerable(rawData);
3325

3426
// Now take a bootstrap sample of this dataset to create a new dataset. The bootstrap is a resampling technique that
3527
// creates a training set of the same size by picking with replacement from the original dataset. With the bootstrap,
@@ -80,5 +72,12 @@ public static void Example()
8072
// True 0.8426974
8173
// True 0.8426974
8274
}
75+
76+
private class DataPoint
77+
{
78+
public bool Label { get; set; }
79+
80+
public float Feature { get; set; }
81+
}
8382
}
8483
}

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs

Lines changed: 59 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -13,83 +13,65 @@ public static void Example()
1313
{
1414
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
1515
// as well as the source of randomness.
16-
var ml = new MLContext();
17-
18-
// Get a small dataset as an IEnumerable and load it into ML.NET data set.
19-
IEnumerable<DatasetUtils.SampleTopicsData> data = DatasetUtils.GetTopicsData();
20-
var trainData = ml.Data.LoadFromEnumerable(data);
16+
var mlContext = new MLContext();
17+
18+
// Get a small dataset as an IEnumerable.
19+
var rawData = new[] {
20+
new DataPoint() { Review = "animals birds cats dogs fish horse"},
21+
new DataPoint() { Review = "horse birds house fish duck cats"},
22+
new DataPoint() { Review = "car truck driver bus pickup"},
23+
new DataPoint() { Review = "car truck driver bus pickup horse"},
24+
};
2125

22-
// Preview of one of the columns of the the topics data.
23-
// The Review column contains the keys associated with a particular body of text.
24-
//
25-
// Review
26-
// "animals birds cats dogs fish horse"
27-
// "horse birds house fish duck cats"
28-
// "car truck driver bus pickup"
29-
// "car truck driver bus pickup horse"
26+
var trainData = mlContext.Data.LoadFromEnumerable(rawData);
3027

3128
// A pipeline to convert the terms of the 'Review' column in
3229
// making use of default settings.
33-
string defaultColumnName = "DefaultKeys";
34-
// REVIEW create through the catalog extension
35-
var default_pipeline = ml.Transforms.Text.TokenizeIntoWords("Review")
36-
.Append(ml.Transforms.Conversion.MapValueToKey(defaultColumnName, "Review"));
30+
var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
31+
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText"));
3732

3833
// Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
3934
// We can change the maximumNumberOfKeys to limit how many keys will get generated out of the set of words,
4035
// and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered)
4136
// to value/alphabetically.
42-
string customizedColumnName = "CustomizedKeys";
43-
var customized_pipeline = ml.Transforms.Text.TokenizeIntoWords("Review")
44-
.Append(ml.Transforms.Conversion.MapValueToKey(customizedColumnName, "Review", maximumNumberOfKeys: 10, keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
37+
var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
38+
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText", maximumNumberOfKeys: 10,
39+
keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
4540

4641
// The transformed data.
47-
var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData);
48-
var transformedData_customized = customized_pipeline.Fit(trainData).Transform(trainData);
49-
50-
// Small helper to print the text inside the columns, in the console.
51-
Action<string, IEnumerable<VBuffer<uint>>> printHelper = (columnName, column) =>
52-
{
53-
Console.WriteLine($"{columnName} column obtained post-transformation.");
54-
foreach (var row in column)
55-
{
56-
foreach (var value in row.GetValues())
57-
Console.Write($"{value} ");
58-
Console.WriteLine("");
59-
}
60-
61-
Console.WriteLine("===================================================");
62-
};
63-
64-
// Preview of the DefaultKeys column obtained after processing the input.
65-
var defaultColumn = transformedData_default.GetColumn<VBuffer<uint>>(transformedData_default.Schema[defaultColumnName]);
66-
printHelper(defaultColumnName, defaultColumn);
67-
68-
// DefaultKeys column obtained post-transformation.
69-
//
70-
// 1 2 3 4 5 6
71-
// 6 2 7 5 8 3
72-
// 9 10 11 12 13 3
73-
// 9 10 11 12 13 6
74-
75-
// Previewing the CustomizedKeys column obtained after processing the input.
76-
var customizedColumn = transformedData_customized.GetColumn<VBuffer<uint>>(transformedData_customized.Schema[customizedColumnName]);
77-
printHelper(customizedColumnName, customizedColumn);
78-
79-
// CustomizedKeys column obtained post-transformation.
80-
//
81-
// 1 2 4 5 7 8
82-
// 8 2 9 7 6 4
83-
// 3 10 0 0 0 4
84-
// 3 10 0 0 0 8
85-
42+
var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(trainData);
43+
var transformedDataCustomized = customizedPipeline.Fit(trainData).Transform(trainData);
44+
45+
// Getting the resulting data as an IEnumerable.
46+
// This will contain the newly created columns.
47+
IEnumerable<TransformedData> defaultData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataDefault, reuseRowObject: false);
48+
IEnumerable<TransformedData> customizedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataCustomized, reuseRowObject: false);
49+
Console.WriteLine($"Keys");
50+
foreach (var dataRow in defaultData)
51+
Console.WriteLine($"{string.Join(',', dataRow.Keys)}");
52+
// Expected output:
53+
// Keys
54+
// 1,2,3,4,5,6
55+
// 6,2,7,5,8,3
56+
// 9,10,11,12,13
57+
// 9,10,11,12,13,6
58+
59+
Console.WriteLine($"Keys");
60+
foreach (var dataRow in customizedData)
61+
Console.WriteLine($"{string.Join(',', dataRow.Keys)}");
62+
// Expected output:
63+
// Keys
64+
// 1,2,4,5,7,8
65+
// 8,2,9,7,6,4
66+
// 3,10,0,0,0
67+
// 3,10,0,0,0,8
8668
// Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines
8769
// to convert the keys back to the strings.
88-
var pipeline = default_pipeline.Append(ml.Transforms.Conversion.MapKeyToValue(defaultColumnName));
89-
transformedData_default = pipeline.Fit(trainData).Transform(trainData);
70+
var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(nameof(TransformedData.Keys)));
71+
transformedDataDefault = pipeline.Fit(trainData).Transform(trainData);
9072

9173
// Preview of the DefaultColumnName column obtained.
92-
var originalColumnBack = transformedData_default.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedData_default.Schema[defaultColumnName]);
74+
var originalColumnBack = transformedDataDefault.GetColumn<VBuffer<string>>(transformedDataDefault.Schema[nameof(TransformedData.Keys)]);
9375

9476
foreach (var row in originalColumnBack)
9577
{
@@ -98,12 +80,21 @@ public static void Example()
9880
Console.WriteLine("");
9981
}
10082

101-
// DefaultKeys column obtained post-transformation.
102-
//
103-
// animals birds cats dogs fish horse
104-
// horse birds house fish duck cats
105-
// car truck driver bus pickup cats
106-
// car truck driver bus pickup horse
83+
// Expected output:
84+
// animals birds cats dogs fish horse
85+
// horse birds house fish duck cats
86+
// car truck driver bus pickup
87+
// car truck driver bus pickup horse
88+
}
89+
90+
private class DataPoint
91+
{
92+
public string Review { get; set; }
93+
}
94+
95+
private class TransformedData : DataPoint
96+
{
97+
public uint[] Keys { get; set; }
10798
}
10899
}
109100
}

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
using System;
2+
using System.Collections.Generic;
23
using Microsoft.ML;
3-
using Microsoft.ML.SamplesUtils;
4+
using Microsoft.ML.Data;
45

56
namespace Samples.Dynamic
67
{
78
/// This example demonstrates the use of the ValueToKeyMappingEstimator, by mapping KeyType values to the original strings.
89
/// For more on ML.NET KeyTypes see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types
9-
/// It is possible to have multiple values map to the same category.
10-
1110
public class MapKeyToValueMultiColumn
1211
{
1312
public static void Example()
@@ -16,20 +15,14 @@ public static void Example()
1615
// as a catalog of available operations and as the source of randomness.
1716
// Setting the seed to a fixed number in this example to make outputs deterministic.
1817
var mlContext = new MLContext(seed: 0);
18+
// Get a small dataset as an IEnumerable.
1919

2020
// Create a list of data examples.
21-
var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
21+
var examples = GenerateRandomDataPoints(1000, 10);
2222

2323
// Convert the examples list to an IDataView object, which is consumable by ML.NET API.
2424
var dataView = mlContext.Data.LoadFromEnumerable(examples);
2525

26-
//////////////////// Data Preview ////////////////////
27-
// Label Features
28-
// AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
29-
// BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
30-
// CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
31-
// DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455
32-
3326
// Create a pipeline.
3427
var pipeline =
3528
// Convert the string labels into key types.
@@ -41,33 +34,71 @@ public static void Example()
4134
// Typically predictions would be in a different, validation set.
4235
var dataWithPredictions = pipeline.Fit(dataView).Transform(dataView);
4336

44-
// at this point, the Label colum is tranformed from strings, to DataViewKeyType and
45-
// the transformation has added the PredictedLabel column, with
46-
var newPipeline = mlContext.Transforms.Conversion.MapKeyToValue(new[]
37+
// At this point, the Label colum is tranformed from strings, to DataViewKeyType and
38+
// the transformation has added the PredictedLabel column, with same DataViewKeyType as
39+
// transformed Label column.
40+
// MapKeyToValue would take columns with DataViewKeyType and convert them back to thier original values.
41+
var newPipeline = mlContext.Transforms.Conversion.MapKeyToValue(new[]
4742
{
4843
new InputOutputColumnPair("LabelOriginalValue","Label"),
4944
new InputOutputColumnPair("PredictedLabelOriginalValue","PredictedLabel")
5045
});
5146

5247
var transformedData = newPipeline.Fit(dataWithPredictions).Transform(dataWithPredictions);
53-
48+
// Let's iterate over first 5 items.
49+
transformedData = mlContext.Data.TakeRows(transformedData, 5);
5450
var values = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
5551

56-
// Printing the columns of the transformed data.
57-
Console.WriteLine($" Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue");
52+
// Printing the column names of the transformed data.
53+
Console.WriteLine($"Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue");
5854
foreach (var row in values)
5955
Console.WriteLine($"{row.Label}\t\t{row.LabelOriginalValue}\t\t\t{row.PredictedLabel}\t\t\t{row.PredictedLabelOriginalValue}");
6056

61-
// Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue
62-
// 1 AA 2 BB
63-
// 1 AA 1 AA
64-
// 4 DD 4 DD
65-
// 2 BB 2 BB
66-
// 1 AA 1 AA
67-
// 1 AA 1 AA
68-
// 1 AA 1 AA
69-
// 2 BB 2 BB
57+
// Expected output:
58+
// Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue
59+
// 1 AA 1 AA
60+
// 2 BB 2 BB
61+
// 3 CC 4 DD
62+
// 4 DD 4 DD
63+
// 1 AA 1 AA
64+
65+
}
66+
67+
private class DataPoint
68+
{
69+
public string Label { get; set; }
70+
[VectorType(10)]
71+
public float[] Features { get; set; }
72+
}
73+
74+
private static List<DataPoint> GenerateRandomDataPoints(int count, int featureVectorLenght)
75+
{
76+
var examples = new List<DataPoint>();
77+
var rnd = new Random(0);
78+
for (int i = 0; i < count; ++i)
79+
{
80+
var example = new DataPoint();
81+
example.Features = new float[featureVectorLenght];
82+
var res = i % 4;
83+
// Generate random float feature values.
84+
for (int j = 0; j < featureVectorLenght; ++j)
85+
{
86+
var value = (float)rnd.NextDouble() + res * 0.2f;
87+
example.Features[j] = value;
88+
}
7089

90+
// Generate label based on feature sum.
91+
if (res == 0)
92+
example.Label = "AA";
93+
else if (res == 1)
94+
example.Label = "BB";
95+
else if (res == 2)
96+
example.Label = "CC";
97+
else
98+
example.Label = "DD";
99+
examples.Add(example);
100+
}
101+
return examples;
71102
}
72103
private class TransformedData
73104
{

0 commit comments

Comments
 (0)