Make examples more clear

rushter · rushter · commit 823569da234b · 2016-11-14T06:18:27.000+03:00
diff --git a/examples/gbm.py b/examples/gbm.py
@@ -12,9 +12,9 @@
 
 
 def classification():
+    # Generate a random binary classification problem.
     X, y = make_classification(n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2,
                                class_sep=1., n_redundant=0)
-    # y = y.flatten()
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
 
     model = GradientBoostingClassifier(n_estimators=50,
@@ -28,9 +28,11 @@ def classification():
 
 
 def regression():
+    # Generate a random regression problem
     X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
                            bias=0.5)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+
     model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3, )
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
diff --git a/examples/linear_models.py b/examples/linear_models.py
@@ -1,4 +1,3 @@
-
 import logging
 
 from sklearn.model_selection import train_test_split
@@ -13,20 +12,23 @@
 
 
 def regression():
+    # Generate a random regression problem
     X, y = make_regression(n_samples=10000, n_features=100, n_informative=75, n_targets=1,
                            noise=0.05, random_state=1111, bias=0.5)
-
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
+
     model = LinearRegression(lr=0.01, max_iters=2000, penalty='l2', C=0.03)
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
     print('regression mse', mean_squared_error(y_test, predictions))
 
 
 def classification():
+    # Generate a random binary classification problem.
     X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
                                class_sep=2.5, )
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+
     model = LogisticRegression(lr=0.01, max_iters=500, penalty='l1', C=0.01)
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
diff --git a/examples/nnet_convnet_mnist.py b/examples/nnet_convnet_mnist.py
@@ -10,9 +10,11 @@
 
 logging.basicConfig(level=logging.DEBUG)
 
+
+# Load MNIST dataset
 X_train, X_test, y_train, y_test = load_mnist()
 
-# Normalization
+# Normalize data
 X_train /= 255.
 X_test /= 255.
 
diff --git a/examples/nnet_mlp.py b/examples/nnet_mlp.py
@@ -5,7 +5,6 @@
 from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score
 
-from mla.datasets import *
 from mla.metrics.metrics import root_mean_squared_log_error, mean_squared_error
 from mla.neuralnet import NeuralNet
 from mla.neuralnet.constraints import MaxNorm, UnitNorm
@@ -19,6 +18,7 @@
 
 
 def classification():
+    # Generate a random binary classification problem.
     X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
                                class_sep=2.5, )
     y = one_hot(y)
@@ -47,6 +47,7 @@ def classification():
 
 
 def regression():
+    # Generate a random regression problem
     X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05)
     y *= 0.01
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
diff --git a/examples/nnet_rnn_binary_add.py b/examples/nnet_rnn_binary_add.py
@@ -6,27 +6,37 @@
 
 from mla.metrics import accuracy
 from mla.neuralnet import NeuralNet
-from mla.neuralnet.constraints import SmallNorm
-from mla.neuralnet.layers import Activation, TimeDistributedDense, Parameters
-from mla.neuralnet.layers.recurrent import RNN, LSTM
+from mla.neuralnet.layers import Activation, TimeDistributedDense
+from mla.neuralnet.layers.recurrent import LSTM
 from mla.neuralnet.optimizers import Adam
 
 logging.basicConfig(level=logging.DEBUG)
 
 
 def addition_dataset(dim=10, n_samples=10000, batch_size=64):
-    combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples))
+    """Generate binary addition dataset.
+    http://devankuleindiren.com/Projects/rnn_arithmetic.php
+    """
     binary_format = '{:0' + str(dim) + 'b}'
+
+    # Generate all possible number combinations
+    combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples))
+
+    # Initialize empty arrays
     X = np.zeros((len(combs), dim, 2), dtype=np.uint8)
     y = np.zeros((len(combs), dim, 1), dtype=np.uint8)
 
     for i, (a, b) in enumerate(combs):
+        # Convert numbers to binary format
         X[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a)]))
         X[i, :, 1] = list(reversed([int(x) for x in binary_format.format(b)]))
+
+        # Generate target variable (a+b)
         y[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a + b)]))
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111)
 
+    # Round number of examples for batch processing
     train_b = (X_train.shape[0] // batch_size) * batch_size
     test_b = (X_test.shape[0] // batch_size) * batch_size
     X_train = X_train[0:train_b]
@@ -37,7 +47,7 @@ def addition_dataset(dim=10, n_samples=10000, batch_size=64):
     return X_train, X_test, y_train, y_test
 
 
-def addition_nlp(ReccurentLayer):
+def addition_problem(ReccurentLayer):
     X_train, X_test, y_train, y_test = addition_dataset(8, 5000)
 
     print(X_train.shape, X_test.shape)
@@ -60,8 +70,7 @@ def addition_nlp(ReccurentLayer):
     print(accuracy(y_test, predictions))
 
 
-
 # RNN
-# addition_nlp(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()})))
+# addition_problem(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()})))
 # LSTM
-addition_nlp(LSTM(16))
+addition_problem(LSTM(16))
diff --git a/examples/pca.py b/examples/pca.py
@@ -7,6 +7,7 @@
 
 # logging.basicConfig(level=logging.DEBUG)
 
+# Generate a random binary classification problem.
 X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
                            class_sep=2.5, )
 
diff --git a/examples/random_forest.py b/examples/random_forest.py
@@ -1,18 +1,18 @@
 import logging
 
-from sklearn.datasets import make_regression
-from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_classification
+from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import train_test_split
 
-from mla.datasets import load_boston
 from mla.ensemble.random_forest import RandomForestClassifier, RandomForestRegressor
 from mla.metrics.metrics import mean_squared_error
 
 logging.basicConfig(level=logging.DEBUG)
 
 
 def classification():
+    # Generate a random binary classification problem.
     X, y = make_classification(n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2,
                                class_sep=2.5, n_redundant=0)
 
@@ -26,9 +26,11 @@ def classification():
 
 
 def regression():
+    # Generate a random regression problem
     X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
                            bias=0.5)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
+
     model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3, )
     model.fit(X_train, y_train)
     predictions = model.predict(X_test)
diff --git a/examples/svm.py b/examples/svm.py
@@ -11,6 +11,7 @@
 
 
 def classification():
+    # Generate a random binary classification problem.
     X, y = make_classification(n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2,
                                class_sep=1.75, )
     # Convert y to {-1, 1}