-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathExperiment.m
222 lines (189 loc) · 9.34 KB
/
Experiment.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
classdef Experiment < handle
%EXPERIMENT creates an experiment to run an ORCA's experiment which
% consist on optimising and running a method in fold (a pair of train-test
% dataset partition). Theexperiment is described by a configuration file.
% This class is used by Utilities to launch a set of experiments
%
% EXPERIMENT properties:
% data - DataSet object to store the train/test data
% method - Method to learn and classify data
% cvCriteria - Metric to guide the grid search for parameters optimisation
% resultsDir - Directory to store performance reports and learned models
% seed - Seed to be used for random number generation
% crossvalide - Activate corssvalidation
%
% EXPERIMENT methods:
% launch - Launch experiment described in file
%
% This file is part of ORCA: https://github.com/ayrna/orca
% Original authors: Pedro Antonio Gutiérrez, María Pérez Ortiz, Javier Sánchez Monedero
% Citation: If you use this code, please cite the associated paper http://www.uco.es/grupos/ayrna/orreview
% Copyright:
% This software is released under the The GNU General Public License v3.0 licence
% available at http://www.gnu.org/licenses/gpl-3.0.html
%
properties
data = DataSet;
method = Algorithm;
cvCriteria = MAE;
crossvalide = 0;
resultsDir = '';
% calculate metrics with the sum of matrices (only suitable for
% k-fold experimental design)
report_sum = 0;
seed = 1;
parameters; % parameters to optimize
end
properties (SetAccess = private)
logsDir
end
methods
function obj = launch(obj,expFile)
% LAUNCH Launch experiment described in file.
% EXPOBJ = LAUNCH(EXPFILE) parses EXPFILE and run the experiment
% described on it. It performs the following steps:
% # Preprocess data cleaning and standardization (option need to be actived in configuration file)
% # Optimize parameters by performing a grid search (if selected
% in configuration file)
% # Run algorithm with optimal parameters (if crossvalidation was
% selected)
% # Save experiment results for the fold
obj.process(expFile);
obj.run();
end
end
methods(Access = private)
function obj = run(obj)
% RUN do experiment steps: data cleaning and standardization, parameters
% optimization and save results
[train,test] = obj.data.preProcessData();
if obj.crossvalide
c1 = clock;
Optimals = obj.crossValideParams(train);
c2 = clock;
crossvaltime = etime(c2,c1);
totalResults = obj.method.fitpredict(train, test, Optimals);
totalResults.crossvaltime = crossvaltime;
else
totalResults = obj.method.fitpredict(train, test);
end
obj.saveResults(totalResults);
end
function obj = process(obj,fname)
% PROCESS parses experiment described in FNAME
cObj = Config(fname);
expObj = cObj.exps{:};
% Copy ini values to corresponding object properties
% General experiment properties
if expObj.general.isKey('num_folds')
obj.data.nOfFolds = str2num(expObj.general('num_folds'));
end
if expObj.general.isKey('standarize')
obj.data.standarize = str2num(expObj.general('standarize'));
end
if expObj.general.isKey('cvmetric')
met = upper(expObj.general('cvmetric'));
eval(['obj.cvCriteria = ' met ';']);
end
if expObj.general.isKey('seed')
obj.seed = str2num(expObj.general('seed'));
end
if expObj.general.isKey('report_sum')
obj.report_sum = str2num(expObj.general('report_sum'));
end
try
obj.data.directory = expObj.general('directory');
obj.data.train = expObj.general('train');
obj.data.test = expObj.general('test');
obj.resultsDir = expObj.general('results');
catch ME
error('Configuration file %s does not have mininum fields. Exception %s', fname, ME.identifier)
end
% Algorithm properties are transformed to varargs ('key',value)
varargs = obj.mapsToCell(expObj.algorithm);
alg = expObj.algorithm('algorithm');
obj.method = feval(alg, varargs);
% Parameters to be optimized
if ~isempty(expObj.params)
pkeys = expObj.params.keys;
for p=1:cast(expObj.params.Count,'int32')
%isfield(obj.parameters.' pkeys{p})
eval(['obj.parameters.' pkeys{p} ' = [' expObj.params(pkeys{p}) '];']);
obj.crossvalide = 1;
end
end
end
function obj = saveResults(obj,TotalResults)
% SAVERESULTS saves the results of the experiment and
% the best hyperparameters.
par = obj.method.getParameterNames();
if ~isempty(par)
outputFile = [obj.resultsDir filesep 'OptHyperparams' filesep obj.data.dataname ];
fid = fopen(outputFile,'w');
for i=1:(numel(par))
value = getfield(TotalResults.model.parameters,par{i});
fprintf(fid,'%s,%f\n', par{i},value);
end
fclose(fid);
end
outputFile = [obj.resultsDir filesep 'Times' filesep obj.data.dataname ];
fid = fopen(outputFile,'w');
if obj.crossvalide
fprintf(fid, '%f\n%f\n%f', TotalResults.trainTime, TotalResults.testTime, TotalResults.crossvaltime);
else
fprintf(fid, '%f\n%f\n%f', TotalResults.trainTime, TotalResults.testTime, 0);
end
fclose(fid);
outputFile = [obj.resultsDir filesep 'Predictions' filesep obj.data.train ];
dlmwrite(outputFile, TotalResults.predictedTrain);
outputFile = [obj.resultsDir filesep 'Predictions' filesep obj.data.test ];
dlmwrite(outputFile, TotalResults.predictedTest);
model = TotalResults.model;
% Write complete model
outputFile = [obj.resultsDir filesep 'Models' filesep obj.data.dataname '.mat'];
save(outputFile, 'model');
outputFile = [obj.resultsDir filesep 'Guess' filesep obj.data.train ];
dlmwrite(outputFile, TotalResults.projectedTrain, 'precision', '%.15f');
outputFile = [obj.resultsDir filesep 'Guess' filesep obj.data.test ];
dlmwrite(outputFile, TotalResults.projectedTest, 'precision', '%.15f');
end
function optimals = crossValideParams(obj,train)
% CROSSVALIDEPARAMS Function for performing the crossvalidation in a specific train partition.
%
% OPTIMALS = CROSSVALIDEPARAMS(TRAIN) Divides the data in k-folds
% (k defined by 'num fold' in configuration file). Returns
% structure OPTIMALS with optimal parameter(s)
optimals = paramopt(obj.method,obj.parameters,train, 'metric', obj.cvCriteria,...
'nfolds', obj.data.nOfFolds, 'seed', obj.seed);
end
end
methods (Static = true)
function varargs = mapsToCell(aObj)
%varargs = mapsToCell(mapObj) returns key value pairs in a comma separated
% string. Example: "'kernel', 'rbf', 'c', 0.1"
% If there are no parameters return empty cell
if aObj.Count == 1
varargs = cell(1,1);
return
end
mapObj = containers.Map(aObj.keys,aObj.values);
mapObj.remove('algorithm');
pkeys = mapObj.keys;
varargs = cell(1,cast(mapObj.Count,'int32')*2);
for p=1:2:(cast(mapObj.Count,'int32')*2)
p = cast(p,'int32');
keyasstr = pkeys(p/2);
keyasstr = keyasstr{:};
value = mapObj(keyasstr);
varargs{1,p} = sprintf('%s', pkeys{p/2});
% Check numerical values
valuenum = str2double(value);
if isnan(valuenum) % we have a string
varargs{1,p+1} = sprintf('%s', value);
else % we have a number
varargs{1,p+1} = valuenum;
end
end
end
end
end