Skip to content

Commit 7441357

Browse files
wdevazelhesperimosocordiae
authored andcommitted
[WIP] Add verbose to NCA and MLKR (#105)
* ENH: Add verbose to NCA * ENH: add verbose to MLKR * ENH: Add test for convergence warning, and fix datasets (use classification for classification and regression for regression * STY: update code according to review #105 (review) * FIX: return the real training time * FIX: fix forgotten function call in test_no_verbose * MAINT: Add L-BFGS-B to MLKR, and improve the tests with less features for regression * FIX: remove MLKR previous arguments for conjugate gradient and put arguments for L-BFGS-B * FIX: fix test string representation for mlkr * FIX: convert y to numeric in MLKR (since it is a regression algorithm). * FIX: fix MLKR test using the method mlkr._loss
1 parent e8eb1a6 commit 7441357

File tree

4 files changed

+187
-43
lines changed

4 files changed

+187
-43
lines changed

metric_learn/mlkr.py

Lines changed: 72 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,16 @@
77
for dimensionality reduction and high dimensional data visualization.
88
"""
99
from __future__ import division, print_function
10+
import time
11+
import sys
12+
import warnings
1013
import numpy as np
1114
from sklearn.utils.fixes import logsumexp
1215
from scipy.optimize import minimize
1316
from sklearn.decomposition import PCA
1417
from sklearn.metrics import pairwise_distances
1518
from sklearn.utils.validation import check_X_y
19+
from sklearn.exceptions import ConvergenceWarning
1620

1721
from .base_metric import BaseMetricLearner
1822

@@ -21,8 +25,8 @@
2125

2226
class MLKR(BaseMetricLearner):
2327
"""Metric Learning for Kernel Regression (MLKR)"""
24-
def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001,
25-
max_iter=1000):
28+
def __init__(self, num_dims=None, A0=None, tol=None, max_iter=1000,
29+
verbose=False):
2630
"""
2731
Initialize MLKR.
2832
@@ -34,23 +38,23 @@ def __init__(self, num_dims=None, A0=None, epsilon=0.01, alpha=0.0001,
3438
A0: array-like, optional
3539
Initialization of transformation matrix. Defaults to PCA loadings.
3640
37-
epsilon: float, optional
38-
Step size for congujate gradient descent.
39-
40-
alpha: float, optional
41-
Stopping criterion for congujate gradient descent.
41+
tol: float, optional (default=None)
42+
Convergence tolerance for the optimization.
4243
4344
max_iter: int, optional
4445
Cap on number of congugate gradient iterations.
46+
47+
verbose : bool, optional (default=False)
48+
Whether to print progress messages or not.
4549
"""
4650
self.num_dims = num_dims
4751
self.A0 = A0
48-
self.epsilon = epsilon
49-
self.alpha = alpha
52+
self.tol = tol
5053
self.max_iter = max_iter
54+
self.verbose = verbose
5155

5256
def _process_inputs(self, X, y):
53-
self.X_, y = check_X_y(X, y)
57+
self.X_, y = check_X_y(X, y, y_numeric=True)
5458
n, d = self.X_.shape
5559
if y.shape[0] != n:
5660
raise ValueError('Data and label lengths mismatch: %d != %d'
@@ -80,30 +84,67 @@ def fit(self, X, y):
8084
"""
8185
X, y, A = self._process_inputs(X, y)
8286

83-
res = minimize(_loss, A.ravel(), (X, y), method='CG', jac=True,
84-
tol=self.alpha,
85-
options=dict(maxiter=self.max_iter, eps=self.epsilon))
87+
# Measure the total training time
88+
train_time = time.time()
89+
90+
self.n_iter_ = 0
91+
res = minimize(self._loss, A.ravel(), (X, y), method='L-BFGS-B',
92+
jac=True, tol=self.tol,
93+
options=dict(maxiter=self.max_iter))
8694
self.transformer_ = res.x.reshape(A.shape)
87-
self.n_iter_ = res.nit
95+
96+
# Stop timer
97+
train_time = time.time() - train_time
98+
if self.verbose:
99+
cls_name = self.__class__.__name__
100+
# Warn the user if the algorithm did not converge
101+
if not res.success:
102+
warnings.warn('[{}] MLKR did not converge: {}'
103+
.format(cls_name, res.message), ConvergenceWarning)
104+
print('[{}] Training took {:8.2f}s.'.format(cls_name, train_time))
105+
88106
return self
89107

90108
def transformer(self):
91109
return self.transformer_
92110

93-
94-
def _loss(flatA, X, y):
95-
A = flatA.reshape((-1, X.shape[1]))
96-
X_embedded = np.dot(X, A.T)
97-
dist = pairwise_distances(X_embedded, squared=True)
98-
np.fill_diagonal(dist, np.inf)
99-
softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis])
100-
yhat = softmax.dot(y)
101-
ydiff = yhat - y
102-
cost = (ydiff**2).sum()
103-
104-
# also compute the gradient
105-
W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis])
106-
W_sym = W + W.T
107-
np.fill_diagonal(W_sym, - W.sum(axis=0))
108-
grad = 4 * (X_embedded.T.dot(W_sym)).dot(X)
109-
return cost, grad.ravel()
111+
def _loss(self, flatA, X, y):
112+
113+
if self.n_iter_ == 0 and self.verbose:
114+
header_fields = ['Iteration', 'Objective Value', 'Time(s)']
115+
header_fmt = '{:>10} {:>20} {:>10}'
116+
header = header_fmt.format(*header_fields)
117+
cls_name = self.__class__.__name__
118+
print('[{cls}]'.format(cls=cls_name))
119+
print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name,
120+
header=header,
121+
sep='-' * len(header)))
122+
123+
start_time = time.time()
124+
125+
A = flatA.reshape((-1, X.shape[1]))
126+
X_embedded = np.dot(X, A.T)
127+
dist = pairwise_distances(X_embedded, squared=True)
128+
np.fill_diagonal(dist, np.inf)
129+
softmax = np.exp(- dist - logsumexp(- dist, axis=1)[:, np.newaxis])
130+
yhat = softmax.dot(y)
131+
ydiff = yhat - y
132+
cost = (ydiff ** 2).sum()
133+
134+
# also compute the gradient
135+
W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis])
136+
W_sym = W + W.T
137+
np.fill_diagonal(W_sym, - W.sum(axis=0))
138+
grad = 4 * (X_embedded.T.dot(W_sym)).dot(X)
139+
140+
if self.verbose:
141+
start_time = time.time() - start_time
142+
values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}'
143+
print(values_fmt.format(cls=self.__class__.__name__,
144+
n_iter=self.n_iter_, loss=cost,
145+
start_time=start_time))
146+
sys.stdout.flush()
147+
148+
self.n_iter_ += 1
149+
150+
return cost, grad.ravel()

metric_learn/nca.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
from __future__ import absolute_import
77

88
import warnings
9+
import time
10+
import sys
911
import numpy as np
1012
from scipy.optimize import minimize
1113
from sklearn.metrics import pairwise_distances
1214
from sklearn.utils.validation import check_X_y
13-
from numpy.linalg import multi_dot
15+
from sklearn.exceptions import ConvergenceWarning
1416

1517
try: # scipy.misc.logsumexp is deprecated in scipy 1.0.0
1618
from scipy.special import logsumexp
@@ -24,7 +26,7 @@
2426

2527
class NCA(BaseMetricLearner):
2628
def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated',
27-
tol=None):
29+
tol=None, verbose=False):
2830
"""Neighborhood Components Analysis
2931
3032
Parameters
@@ -45,11 +47,15 @@ def __init__(self, num_dims=None, max_iter=100, learning_rate='deprecated',
4547
4648
tol : float, optional (default=None)
4749
Convergence tolerance for the optimization.
50+
51+
verbose : bool, optional (default=False)
52+
Whether to print progress messages or not.
4853
"""
4954
self.num_dims = num_dims
5055
self.max_iter = max_iter
5156
self.learning_rate = learning_rate # TODO: remove in v.0.5.0
5257
self.tol = tol
58+
self.verbose = verbose
5359

5460
def transformer(self):
5561
return self.A_
@@ -70,6 +76,9 @@ def fit(self, X, y):
7076
if num_dims is None:
7177
num_dims = d
7278

79+
# Measure the total training time
80+
train_time = time.time()
81+
7382
# Initialize A to a scaling matrix
7483
A = np.zeros((num_dims, d))
7584
np.fill_diagonal(A, 1./(np.maximum(X.max(axis=0)-X.min(axis=0), EPS)))
@@ -86,15 +95,41 @@ def fit(self, X, y):
8695
}
8796

8897
# Call the optimizer
98+
self.n_iter_ = 0
8999
opt_result = minimize(**optimizer_params)
90100

91101
self.X_ = X
92102
self.A_ = opt_result.x.reshape(-1, X.shape[1])
93103
self.n_iter_ = opt_result.nit
104+
105+
# Stop timer
106+
train_time = time.time() - train_time
107+
if self.verbose:
108+
cls_name = self.__class__.__name__
109+
110+
# Warn the user if the algorithm did not converge
111+
if not opt_result.success:
112+
warnings.warn('[{}] NCA did not converge: {}'.format(
113+
cls_name, opt_result.message), ConvergenceWarning)
114+
115+
print('[{}] Training took {:8.2f}s.'.format(cls_name, train_time))
116+
94117
return self
95118

96-
@staticmethod
97-
def _loss_grad_lbfgs(A, X, mask, sign=1.0):
119+
def _loss_grad_lbfgs(self, A, X, mask, sign=1.0):
120+
121+
if self.n_iter_ == 0 and self.verbose:
122+
header_fields = ['Iteration', 'Objective Value', 'Time(s)']
123+
header_fmt = '{:>10} {:>20} {:>10}'
124+
header = header_fmt.format(*header_fields)
125+
cls_name = self.__class__.__name__
126+
print('[{cls}]'.format(cls=cls_name))
127+
print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name,
128+
header=header,
129+
sep='-' * len(header)))
130+
131+
start_time = time.time()
132+
98133
A = A.reshape(-1, X.shape[1])
99134
X_embedded = np.dot(X, A.T) # (n_samples, num_dims)
100135
# Compute softmax distances
@@ -113,4 +148,14 @@ def _loss_grad_lbfgs(A, X, mask, sign=1.0):
113148
weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T
114149
np.fill_diagonal(weighted_p_ij_sym, - weighted_p_ij.sum(axis=0))
115150
gradient = 2 * (X_embedded.T.dot(weighted_p_ij_sym)).dot(X)
151+
152+
if self.verbose:
153+
start_time = time.time() - start_time
154+
values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}'
155+
print(values_fmt.format(cls=self.__class__.__name__,
156+
n_iter=self.n_iter_, loss=loss,
157+
start_time=start_time))
158+
sys.stdout.flush()
159+
160+
self.n_iter_ += 1
116161
return sign * loss, sign * gradient.ravel()

test/metric_learn_test.py

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
import re
12
import unittest
3+
import pytest
24
import numpy as np
35
from scipy.optimize import check_grad
46
from six.moves import xrange
57
from sklearn.metrics import pairwise_distances
68
from sklearn.datasets import load_iris, make_classification, make_regression
79
from numpy.testing import assert_array_almost_equal, assert_array_equal
810
from sklearn.utils.testing import assert_warns_message
11+
from sklearn.exceptions import ConvergenceWarning
912
from sklearn.utils.validation import check_X_y
1013

1114
from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC,
@@ -111,12 +114,14 @@ def test_finite_differences(self):
111114
X, y = make_classification()
112115
M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1])
113116
mask = y[:, np.newaxis] == y[np.newaxis, :]
117+
nca = NCA()
118+
nca.n_iter_ = 0
114119

115120
def fun(M):
116-
return NCA._loss_grad_lbfgs(M, X, mask)[0]
121+
return nca._loss_grad_lbfgs(M, X, mask)[0]
117122

118123
def grad(M):
119-
return NCA._loss_grad_lbfgs(M, X, mask)[1].ravel()
124+
return nca._loss_grad_lbfgs(M, X, mask)[1].ravel()
120125

121126
# compute relative error
122127
rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M))
@@ -257,13 +262,14 @@ def test_finite_differences(self):
257262
X, y = make_regression(n_features=4, random_state=1, n_samples=20)
258263
X, y = check_X_y(X, y)
259264
M = np.random.randn(2, X.shape[1])
260-
from metric_learn.mlkr import _loss
265+
mlkr = MLKR()
266+
mlkr.n_iter_ = 0
261267

262268
def fun(M):
263-
return _loss(M, X, y)[0]
269+
return mlkr._loss(M, X, y)[0]
264270

265271
def grad_fn(M):
266-
return _loss(M, X, y)[1].ravel()
272+
return mlkr._loss(M, X, y)[1].ravel()
267273

268274
# compute relative error
269275
rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M))
@@ -307,5 +313,57 @@ def test_iris(self):
307313
self.assertLess(csep, 0.2)
308314

309315

316+
@pytest.mark.parametrize(('algo_class', 'dataset'),
317+
[(NCA, make_classification()),
318+
(MLKR, make_regression())])
319+
def test_verbose(algo_class, dataset, capsys):
320+
# assert there is proper output when verbose = True
321+
X, y = dataset
322+
model = algo_class(verbose=True)
323+
model.fit(X, y)
324+
out, _ = capsys.readouterr()
325+
326+
# check output
327+
lines = re.split('\n+', out)
328+
header = '{:>10} {:>20} {:>10}'.format('Iteration', 'Objective Value',
329+
'Time(s)')
330+
assert lines[0] == '[{}]'.format(algo_class.__name__)
331+
assert lines[1] == '[{}] {}'.format(algo_class.__name__, header)
332+
assert lines[2] == '[{}] {}'.format(algo_class.__name__, '-' * len(header))
333+
for line in lines[3:-2]:
334+
# The following regex will match for instance:
335+
# '[NCA] 0 6.988936e+01 0.01'
336+
assert re.match("\[" + algo_class.__name__ + "\]\ *\d+\ *\d\.\d{6}e[+|-]"
337+
"\d+\ *\d+\.\d{2}", line)
338+
assert re.match("\[" + algo_class.__name__ + "\] Training took\ *"
339+
"\d+\.\d{2}s\.", lines[-2])
340+
assert lines[-1] == ''
341+
342+
343+
@pytest.mark.parametrize(('algo_class', 'dataset'),
344+
[(NCA, make_classification()),
345+
(MLKR, make_regression(n_features=10))])
346+
def test_no_verbose(dataset, algo_class, capsys):
347+
# assert by default there is no output (verbose=False)
348+
X, y = dataset
349+
model = algo_class()
350+
model.fit(X, y)
351+
out, _ = capsys.readouterr()
352+
# check output
353+
assert (out == '')
354+
355+
356+
@pytest.mark.parametrize(('algo_class', 'dataset'),
357+
[(NCA, make_classification()),
358+
(MLKR, make_regression(n_features=10))])
359+
def test_convergence_warning(dataset, algo_class):
360+
X, y = dataset
361+
model = algo_class(max_iter=2, verbose=True)
362+
cls_name = model.__class__.__name__
363+
assert_warns_message(ConvergenceWarning,
364+
'[{}] {} did not converge'.format(cls_name, cls_name),
365+
model.fit, X, y)
366+
367+
310368
if __name__ == '__main__':
311369
unittest.main()

test/test_base_metric.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_lmnn(self):
1717
def test_nca(self):
1818
self.assertEqual(str(metric_learn.NCA()),
1919
("NCA(learning_rate='deprecated', max_iter=100, "
20-
"num_dims=None, tol=None)"))
20+
"num_dims=None, tol=None,\n verbose=False)"))
2121

2222
def test_lfda(self):
2323
self.assertEqual(str(metric_learn.LFDA()),
@@ -61,8 +61,8 @@ def test_rca(self):
6161

6262
def test_mlkr(self):
6363
self.assertEqual(str(metric_learn.MLKR()),
64-
"MLKR(A0=None, alpha=0.0001, epsilon=0.01, "
65-
"max_iter=1000, num_dims=None)")
64+
"MLKR(A0=None, max_iter=1000, num_dims=None, tol=None, "
65+
"verbose=False)")
6666

6767
def test_mmc(self):
6868
self.assertEqual(str(metric_learn.MMC()), """

0 commit comments

Comments
 (0)