@@ -13,83 +13,65 @@ public static void Example()
13
13
{
14
14
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
15
15
// as well as the source of randomness.
16
- var ml = new MLContext ( ) ;
17
-
18
- // Get a small dataset as an IEnumerable and load it into ML.NET data set.
19
- IEnumerable < DatasetUtils . SampleTopicsData > data = DatasetUtils . GetTopicsData ( ) ;
20
- var trainData = ml . Data . LoadFromEnumerable ( data ) ;
16
+ var mlContext = new MLContext ( ) ;
17
+
18
+ // Get a small dataset as an IEnumerable.
19
+ var rawData = new [ ] {
20
+ new DataPoint ( ) { Review = "animals birds cats dogs fish horse" } ,
21
+ new DataPoint ( ) { Review = "horse birds house fish duck cats" } ,
22
+ new DataPoint ( ) { Review = "car truck driver bus pickup" } ,
23
+ new DataPoint ( ) { Review = "car truck driver bus pickup horse" } ,
24
+ } ;
21
25
22
- // Preview of one of the columns of the the topics data.
23
- // The Review column contains the keys associated with a particular body of text.
24
- //
25
- // Review
26
- // "animals birds cats dogs fish horse"
27
- // "horse birds house fish duck cats"
28
- // "car truck driver bus pickup"
29
- // "car truck driver bus pickup horse"
26
+ var trainData = mlContext . Data . LoadFromEnumerable ( rawData ) ;
30
27
31
28
// A pipeline to convert the terms of the 'Review' column in
32
29
// making use of default settings.
33
- string defaultColumnName = "DefaultKeys" ;
34
- // REVIEW create through the catalog extension
35
- var default_pipeline = ml . Transforms . Text . TokenizeIntoWords ( "Review" )
36
- . Append ( ml . Transforms . Conversion . MapValueToKey ( defaultColumnName , "Review" ) ) ;
30
+ var defaultPipeline = mlContext . Transforms . Text . TokenizeIntoWords ( "TokenizedText" , nameof ( DataPoint . Review ) )
31
+ . Append ( mlContext . Transforms . Conversion . MapValueToKey ( nameof ( TransformedData . Keys ) , "TokenizedText" ) ) ;
37
32
38
33
// Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
39
34
// We can change the maximumNumberOfKeys to limit how many keys will get generated out of the set of words,
40
35
// and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered)
41
36
// to value/alphabetically.
42
- string customizedColumnName = "CustomizedKeys" ;
43
- var customized_pipeline = ml . Transforms . Text . TokenizeIntoWords ( "Review" )
44
- . Append ( ml . Transforms . Conversion . MapValueToKey ( customizedColumnName , "Review" , maximumNumberOfKeys : 10 , keyOrdinality : ValueToKeyMappingEstimator . KeyOrdinality . ByValue ) ) ;
37
+ var customizedPipeline = mlContext . Transforms . Text . TokenizeIntoWords ( "TokenizedText" , nameof ( DataPoint . Review ) )
38
+ . Append ( mlContext . Transforms . Conversion . MapValueToKey ( nameof ( TransformedData . Keys ) , "TokenizedText" , maximumNumberOfKeys : 10 ,
39
+ keyOrdinality : ValueToKeyMappingEstimator . KeyOrdinality . ByValue ) ) ;
45
40
46
41
// The transformed data.
47
- var transformedData_default = default_pipeline . Fit ( trainData ) . Transform ( trainData ) ;
48
- var transformedData_customized = customized_pipeline . Fit ( trainData ) . Transform ( trainData ) ;
49
-
50
- // Small helper to print the text inside the columns, in the console.
51
- Action < string , IEnumerable < VBuffer < uint > > > printHelper = ( columnName , column ) =>
52
- {
53
- Console . WriteLine ( $ "{ columnName } column obtained post-transformation.") ;
54
- foreach ( var row in column )
55
- {
56
- foreach ( var value in row . GetValues ( ) )
57
- Console . Write ( $ "{ value } ") ;
58
- Console . WriteLine ( "" ) ;
59
- }
60
-
61
- Console . WriteLine ( "===================================================" ) ;
62
- } ;
63
-
64
- // Preview of the DefaultKeys column obtained after processing the input.
65
- var defaultColumn = transformedData_default . GetColumn < VBuffer < uint > > ( transformedData_default . Schema [ defaultColumnName ] ) ;
66
- printHelper ( defaultColumnName , defaultColumn ) ;
67
-
68
- // DefaultKeys column obtained post-transformation.
69
- //
70
- // 1 2 3 4 5 6
71
- // 6 2 7 5 8 3
72
- // 9 10 11 12 13 3
73
- // 9 10 11 12 13 6
74
-
75
- // Previewing the CustomizedKeys column obtained after processing the input.
76
- var customizedColumn = transformedData_customized . GetColumn < VBuffer < uint > > ( transformedData_customized . Schema [ customizedColumnName ] ) ;
77
- printHelper ( customizedColumnName , customizedColumn ) ;
78
-
79
- // CustomizedKeys column obtained post-transformation.
80
- //
81
- // 1 2 4 5 7 8
82
- // 8 2 9 7 6 4
83
- // 3 10 0 0 0 4
84
- // 3 10 0 0 0 8
85
-
42
+ var transformedDataDefault = defaultPipeline . Fit ( trainData ) . Transform ( trainData ) ;
43
+ var transformedDataCustomized = customizedPipeline . Fit ( trainData ) . Transform ( trainData ) ;
44
+
45
+ // Getting the resulting data as an IEnumerable.
46
+ // This will contain the newly created columns.
47
+ IEnumerable < TransformedData > defaultData = mlContext . Data . CreateEnumerable < TransformedData > ( transformedDataDefault , reuseRowObject : false ) ;
48
+ IEnumerable < TransformedData > customizedData = mlContext . Data . CreateEnumerable < TransformedData > ( transformedDataCustomized , reuseRowObject : false ) ;
49
+ Console . WriteLine ( $ "Keys") ;
50
+ foreach ( var dataRow in defaultData )
51
+ Console . WriteLine ( $ "{ string . Join ( ',' , dataRow . Keys ) } ") ;
52
+ // Expected output:
53
+ // Keys
54
+ // 1,2,3,4,5,6
55
+ // 6,2,7,5,8,3
56
+ // 9,10,11,12,13
57
+ // 9,10,11,12,13,6
58
+
59
+ Console . WriteLine ( $ "Keys") ;
60
+ foreach ( var dataRow in customizedData )
61
+ Console . WriteLine ( $ "{ string . Join ( ',' , dataRow . Keys ) } ") ;
62
+ // Expected output:
63
+ // Keys
64
+ // 1,2,4,5,7,8
65
+ // 8,2,9,7,6,4
66
+ // 3,10,0,0,0
67
+ // 3,10,0,0,0,8
86
68
// Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines
87
69
// to convert the keys back to the strings.
88
- var pipeline = default_pipeline . Append ( ml . Transforms . Conversion . MapKeyToValue ( defaultColumnName ) ) ;
89
- transformedData_default = pipeline . Fit ( trainData ) . Transform ( trainData ) ;
70
+ var pipeline = defaultPipeline . Append ( mlContext . Transforms . Conversion . MapKeyToValue ( nameof ( TransformedData . Keys ) ) ) ;
71
+ transformedDataDefault = pipeline . Fit ( trainData ) . Transform ( trainData ) ;
90
72
91
73
// Preview of the DefaultColumnName column obtained.
92
- var originalColumnBack = transformedData_default . GetColumn < VBuffer < ReadOnlyMemory < char > > > ( transformedData_default . Schema [ defaultColumnName ] ) ;
74
+ var originalColumnBack = transformedDataDefault . GetColumn < VBuffer < string > > ( transformedDataDefault . Schema [ nameof ( TransformedData . Keys ) ] ) ;
93
75
94
76
foreach ( var row in originalColumnBack )
95
77
{
@@ -98,12 +80,21 @@ public static void Example()
98
80
Console . WriteLine ( "" ) ;
99
81
}
100
82
101
- // DefaultKeys column obtained post-transformation.
102
- //
103
- // animals birds cats dogs fish horse
104
- // horse birds house fish duck cats
105
- // car truck driver bus pickup cats
106
- // car truck driver bus pickup horse
83
+ // Expected output:
84
+ // animals birds cats dogs fish horse
85
+ // horse birds house fish duck cats
86
+ // car truck driver bus pickup
87
+ // car truck driver bus pickup horse
88
+ }
89
+
90
+ private class DataPoint
91
+ {
92
+ public string Review { get ; set ; }
93
+ }
94
+
95
+ private class TransformedData : DataPoint
96
+ {
97
+ public uint [ ] Keys { get ; set ; }
107
98
}
108
99
}
109
100
}
0 commit comments