fixes and formatting for examples

lucaskolstad · lucaskolstad · commit 63878210db74 · 2016-11-15T12:28:44.000-08:00
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include mla/datasets/data *
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/gbm.py b/examples/gbm.py
@@ -3,40 +3,50 @@
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 
-from mla.ensemble.gbm import *
+from mla.ensemble.gbm import GradientBoostingClassifier, GradientBoostingRegressor
 from mla.metrics.metrics import mean_squared_error
 
 logging.basicConfig(level=logging.DEBUG)
 
 
 def classification():
     # Generate a random binary classification problem.
-    X, y = make_classification(n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2,
+    X, y = make_classification(n_samples=350, n_features=15, n_informative=10,
+                               random_state=1111, n_classes=2,
                                class_sep=1., n_redundant=0)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
+                                                        random_state=1111)
 
-    model = GradientBoostingClassifier(n_estimators=50,
-                                       max_depth=4, max_features=8, learning_rate=0.1)
+    model = GradientBoostingClassifier(n_estimators=50, max_depth=4,
+                                       max_features=8, learning_rate=0.1)
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
     print(predictions)
     print(predictions.min())
     print(predictions.max())
-    print('classification, roc auc score: %s' % roc_auc_score(y_test, predictions))
+    print('classification, roc auc score: %s'
+          % roc_auc_score(y_test, predictions))
 
 
 def regression():
     # Generate a random regression problem
-    X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
+    X, y = make_regression(n_samples=500, n_features=5, n_informative=5,
+                           n_targets=1, noise=0.05, random_state=1111,
                            bias=0.5)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
+                                                        random_state=1111)
 
-    model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3, )
+    model = GradientBoostingRegressor(n_estimators=25, max_depth=5,
+                                      max_features=3, )
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
-    print('regression, mse: %s' % mean_squared_error(y_test.flatten(), predictions.flatten()))
+    print('regression, mse: %s'
+          % mean_squared_error(y_test.flatten(), predictions.flatten()))
 
 
 if __name__ == '__main__':
diff --git a/examples/kmeans.py b/examples/kmeans.py
@@ -1,13 +1,12 @@
-
-
 import numpy as np
 from sklearn.datasets import make_blobs
 
 from mla.kmeans import KMeans
 
 
 def kmeans_example(plot=False):
-    X, y = make_blobs(centers=4, n_samples=500, n_features=2, shuffle=True, random_state=42)
+    X, y = make_blobs(centers=4, n_samples=500, n_features=2,
+                      shuffle=True, random_state=42)
     clusters = len(np.unique(y))
     k = KMeans(K=clusters, max_iters=150, init='++')
     k.fit(X)
diff --git a/examples/linear_models.py b/examples/linear_models.py
@@ -1,21 +1,26 @@
 import logging
 
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 
 from mla.linear_models import LinearRegression, LogisticRegression
-from mla.metrics.metrics import *
+from mla.metrics.metrics import mean_squared_error, accuracy
 
 # Change to DEBUG to see convergence
 logging.basicConfig(level=logging.ERROR)
 
 
 def regression():
     # Generate a random regression problem
-    X, y = make_regression(n_samples=10000, n_features=100, n_informative=75, n_targets=1,
-                           noise=0.05, random_state=1111, bias=0.5)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
+    X, y = make_regression(n_samples=10000, n_features=100,
+                           n_informative=75, n_targets=1, noise=0.05,
+                           random_state=1111, bias=0.5)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
+                                                        random_state=1111)
 
     model = LinearRegression(lr=0.01, max_iters=2000, penalty='l2', C=0.03)
     model.fit(X_train, y_train)
@@ -25,9 +30,11 @@ def regression():
 
 def classification():
     # Generate a random binary classification problem.
-    X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
-                               class_sep=2.5, )
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+    X, y = make_classification(n_samples=1000, n_features=100,
+                               n_informative=75, random_state=1111,
+                               n_classes=2, class_sep=2.5, )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
+                                                        random_state=1111)
 
     model = LogisticRegression(lr=0.01, max_iters=500, penalty='l1', C=0.01)
     model.fit(X_train, y_train)
diff --git a/examples/nnet_mlp.py b/examples/nnet_mlp.py
@@ -1,28 +1,33 @@
 import logging
 
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score
 
-from mla.metrics.metrics import root_mean_squared_log_error, mean_squared_error
+from mla.metrics.metrics import mean_squared_error
 from mla.neuralnet import NeuralNet
-from mla.neuralnet.constraints import MaxNorm, UnitNorm
+from mla.neuralnet.constraints import MaxNorm
 from mla.neuralnet.layers import Activation, Dense, Dropout
-from mla.neuralnet.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam
+from mla.neuralnet.optimizers import Adadelta, Adam
 from mla.neuralnet.parameters import Parameters
-from mla.neuralnet.regularizers import *
+from mla.neuralnet.regularizers import L2
 from mla.utils import one_hot
 
 logging.basicConfig(level=logging.DEBUG)
 
 
 def classification():
     # Generate a random binary classification problem.
-    X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
-                               class_sep=2.5, )
+    X, y = make_classification(n_samples=1000, n_features=100,
+                               n_informative=75, random_state=1111,
+                               n_classes=2, class_sep=2.5, )
     y = one_hot(y)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
+                                                        random_state=1111)
 
     model = NeuralNet(
         layers=[
@@ -48,9 +53,11 @@ def classification():
 
 def regression():
     # Generate a random regression problem
-    X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05)
+    X, y = make_regression(n_samples=5000, n_features=25, n_informative=25,
+                           n_targets=1, random_state=100, noise=0.05)
     y *= 0.01
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
+                                                        random_state=1111)
 
     model = NeuralNet(
         layers=[
diff --git a/examples/nnet_rnn_binary_add.py b/examples/nnet_rnn_binary_add.py
@@ -2,7 +2,10 @@
 from itertools import combinations, islice
 
 import numpy as np
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 
 from mla.metrics import accuracy
 from mla.neuralnet import NeuralNet
diff --git a/examples/nnet_rnn_text_generation.py b/examples/nnet_rnn_text_generation.py
@@ -9,9 +9,9 @@
 from mla.datasets import load_nietzsche
 from mla.neuralnet import NeuralNet
 from mla.neuralnet.constraints import SmallNorm
-from mla.neuralnet.layers import Activation, Dense, Parameters
+from mla.neuralnet.layers import Activation, Dense
 from mla.neuralnet.layers.recurrent import LSTM, RNN
-from mla.neuralnet.optimizers import Adam, RMSprop
+from mla.neuralnet.optimizers import RMSprop
 
 logging.basicConfig(level=logging.DEBUG)
 
diff --git a/examples/pca.py b/examples/pca.py
@@ -1,4 +1,7 @@
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 from sklearn.datasets import make_classification
 
 from mla.linear_models import LogisticRegression
@@ -8,17 +11,19 @@
 # logging.basicConfig(level=logging.DEBUG)
 
 # Generate a random binary classification problem.
-X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
-                           class_sep=2.5, )
+X, y = make_classification(n_samples=1000, n_features=100, n_informative=75,
+                           random_state=1111, n_classes=2, class_sep=2.5, )
 
 for s in ['svd', 'eigen']:
     p = PCA(15, solver=s)
     p.fit(X)
     X = p.transform(X)
     print(X.shape)
 
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
+                                                        random_state=1111)
     model = LogisticRegression(lr=0.001, max_iters=2500)
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
-    print('Classification accuracy for %s PCA: %s' % (s, accuracy(y_test, predictions)))
+    print('Classification accuracy for %s PCA: %s'
+          % (s, accuracy(y_test, predictions)))
diff --git a/examples/random_forest.py b/examples/random_forest.py
@@ -3,7 +3,10 @@
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 
 from mla.ensemble.random_forest import RandomForestClassifier, RandomForestRegressor
 from mla.metrics.metrics import mean_squared_error
@@ -13,28 +16,34 @@
 
 def classification():
     # Generate a random binary classification problem.
-    X, y = make_classification(n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2,
+    X, y = make_classification(n_samples=500, n_features=10, n_informative=10,
+                               random_state=1111, n_classes=2,
                                class_sep=2.5, n_redundant=0)
 
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
+                                                        random_state=1111)
 
     model = RandomForestClassifier(n_estimators=10, max_depth=4)
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)[:, 1]
     # print(predictions)
-    print('classification, roc auc score: %s' % roc_auc_score(y_test, predictions))
+    print('classification, roc auc score: %s'
+          % roc_auc_score(y_test, predictions))
 
 
 def regression():
     # Generate a random regression problem
-    X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
+    X, y = make_regression(n_samples=500, n_features=5, n_informative=5,
+                           n_targets=1, noise=0.05, random_state=1111,
                            bias=0.5)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
+                                                        random_state=1111)
 
     model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3, )
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
-    print('regression, mse: %s' % mean_squared_error(y_test.flatten(), predictions.flatten()))
+    print('regression, mse: %s'
+          % mean_squared_error(y_test.flatten(), predictions.flatten()))
 
 
 if __name__ == '__main__':
diff --git a/examples/svm.py b/examples/svm.py
@@ -1,28 +1,33 @@
 import logging
 
-from sklearn.model_selection import train_test_split
+try:
+    from sklearn.model_selection import train_test_split
+except ImportError:
+    from sklearn.cross_validation import train_test_split
 from sklearn.datasets import make_classification
 
-from mla.metrics.metrics import *
-from mla.svm.kernerls import *
+from mla.metrics.metrics import accuracy
+from mla.svm.kernerls import Linear, RBF
 from mla.svm.svm import SVM
 
 logging.basicConfig(level=logging.DEBUG)
 
 
 def classification():
     # Generate a random binary classification problem.
-    X, y = make_classification(n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2,
-                               class_sep=1.75, )
+    X, y = make_classification(n_samples=1200, n_features=10, n_informative=5,
+                               random_state=1111, n_classes=2, class_sep=1.75,)
     # Convert y to {-1, 1}
     y = (y * 2) - 1
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=1111)
 
     for kernel in [RBF(gamma=0.1), Linear()]:
         model = SVM(max_iter=500, kernel=kernel, C=0.6)
         model.fit(X_train, y_train)
         predictions = model.predict(X_test)
-        print('Classification accuracy (%s): %s' % (kernel, accuracy(y_test, predictions)))
+        print('Classification accuracy (%s): %s'
+              % (kernel, accuracy(y_test, predictions)))
 
 
 if __name__ == '__main__':
diff --git a/mla/neuralnet/layers/convnet.py b/mla/neuralnet/layers/convnet.py
@@ -42,18 +42,19 @@ def forward_pass(self, X):
         n_images, n_channels, height, width = self.shape(X.shape)
         self.last_input = X
         self.col = image_to_column(X, self.filter_shape, self.stride, self.padding)
-        self.col_W = self._params.W.reshape(self.n_filters, -1).T
+        self.col_W = self._params['W'].reshape(self.n_filters, -1).T
 
-        out = np.dot(self.col, self.col_W) + self._params.b
+        out = np.dot(self.col, self.col_W) + self._params['b']
         out = out.reshape(n_images, height, width, -1).transpose(0, 3, 1, 2)
         return out
 
     def backward_pass(self, delta):
         delta = delta.transpose(0, 2, 3, 1).reshape(-1, self.n_filters)
 
-        self._params.d_b = np.sum(delta, axis=0)
-        d_W = np.dot(self.col.T, delta)
-        self._params.d_W = d_W.transpose(1, 0).reshape(self._params.W.shape)
+        d_W = np.dot(self.col.T, delta).transpose(1, 0).reshape(self._params['W'].shape)
+        d_b = np.sum(delta, axis=0)
+        self._params.update_grad('b', d_b)
+        self._params.update_grad('W', d_W)
 
         d_c = np.dot(delta, self.col_W.T)
         return column_to_image(d_c, self.last_input.shape, self.filter_shape, self.stride, self.padding)