Skip to content

Commit 823569d

Browse files
committed
Make examples more clear
1 parent ab4e0b3 commit 823569d

8 files changed

+36
-16
lines changed

examples/gbm.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313

1414
def classification():
15+
# Generate a random binary classification problem.
1516
X, y = make_classification(n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2,
1617
class_sep=1., n_redundant=0)
17-
# y = y.flatten()
1818
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
1919

2020
model = GradientBoostingClassifier(n_estimators=50,
@@ -28,9 +28,11 @@ def classification():
2828

2929

3030
def regression():
31+
# Generate a random regression problem
3132
X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
3233
bias=0.5)
3334
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
35+
3436
model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3, )
3537
model.fit(X_train, y_train)
3638
predictions = model.predict(X_test)

examples/linear_models.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import logging
32

43
from sklearn.model_selection import train_test_split
@@ -13,20 +12,23 @@
1312

1413

1514
def regression():
15+
# Generate a random regression problem
1616
X, y = make_regression(n_samples=10000, n_features=100, n_informative=75, n_targets=1,
1717
noise=0.05, random_state=1111, bias=0.5)
18-
1918
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111)
19+
2020
model = LinearRegression(lr=0.01, max_iters=2000, penalty='l2', C=0.03)
2121
model.fit(X_train, y_train)
2222
predictions = model.predict(X_test)
2323
print('regression mse', mean_squared_error(y_test, predictions))
2424

2525

2626
def classification():
27+
# Generate a random binary classification problem.
2728
X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
2829
class_sep=2.5, )
2930
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
31+
3032
model = LogisticRegression(lr=0.01, max_iters=500, penalty='l1', C=0.01)
3133
model.fit(X_train, y_train)
3234
predictions = model.predict(X_test)

examples/nnet_convnet_mnist.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010

1111
logging.basicConfig(level=logging.DEBUG)
1212

13+
14+
# Load MNIST dataset
1315
X_train, X_test, y_train, y_test = load_mnist()
1416

15-
# Normalization
17+
# Normalize data
1618
X_train /= 255.
1719
X_test /= 255.
1820

examples/nnet_mlp.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from sklearn.datasets import make_regression
66
from sklearn.metrics import roc_auc_score
77

8-
from mla.datasets import *
98
from mla.metrics.metrics import root_mean_squared_log_error, mean_squared_error
109
from mla.neuralnet import NeuralNet
1110
from mla.neuralnet.constraints import MaxNorm, UnitNorm
@@ -19,6 +18,7 @@
1918

2019

2120
def classification():
21+
# Generate a random binary classification problem.
2222
X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
2323
class_sep=2.5, )
2424
y = one_hot(y)
@@ -47,6 +47,7 @@ def classification():
4747

4848

4949
def regression():
50+
# Generate a random regression problem
5051
X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05)
5152
y *= 0.01
5253
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)

examples/nnet_rnn_binary_add.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,37 @@
66

77
from mla.metrics import accuracy
88
from mla.neuralnet import NeuralNet
9-
from mla.neuralnet.constraints import SmallNorm
10-
from mla.neuralnet.layers import Activation, TimeDistributedDense, Parameters
11-
from mla.neuralnet.layers.recurrent import RNN, LSTM
9+
from mla.neuralnet.layers import Activation, TimeDistributedDense
10+
from mla.neuralnet.layers.recurrent import LSTM
1211
from mla.neuralnet.optimizers import Adam
1312

1413
logging.basicConfig(level=logging.DEBUG)
1514

1615

1716
def addition_dataset(dim=10, n_samples=10000, batch_size=64):
18-
combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples))
17+
"""Generate binary addition dataset.
18+
http://devankuleindiren.com/Projects/rnn_arithmetic.php
19+
"""
1920
binary_format = '{:0' + str(dim) + 'b}'
21+
22+
# Generate all possible number combinations
23+
combs = list(islice(combinations(range(2 ** (dim - 1)), 2), n_samples))
24+
25+
# Initialize empty arrays
2026
X = np.zeros((len(combs), dim, 2), dtype=np.uint8)
2127
y = np.zeros((len(combs), dim, 1), dtype=np.uint8)
2228

2329
for i, (a, b) in enumerate(combs):
30+
# Convert numbers to binary format
2431
X[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a)]))
2532
X[i, :, 1] = list(reversed([int(x) for x in binary_format.format(b)]))
33+
34+
# Generate target variable (a+b)
2635
y[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a + b)]))
2736

2837
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111)
2938

39+
# Round number of examples for batch processing
3040
train_b = (X_train.shape[0] // batch_size) * batch_size
3141
test_b = (X_test.shape[0] // batch_size) * batch_size
3242
X_train = X_train[0:train_b]
@@ -37,7 +47,7 @@ def addition_dataset(dim=10, n_samples=10000, batch_size=64):
3747
return X_train, X_test, y_train, y_test
3848

3949

40-
def addition_nlp(ReccurentLayer):
50+
def addition_problem(ReccurentLayer):
4151
X_train, X_test, y_train, y_test = addition_dataset(8, 5000)
4252

4353
print(X_train.shape, X_test.shape)
@@ -60,8 +70,7 @@ def addition_nlp(ReccurentLayer):
6070
print(accuracy(y_test, predictions))
6171

6272

63-
6473
# RNN
65-
# addition_nlp(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()})))
74+
# addition_problem(RNN(16, parameters=Parameters(constraints={'W': SmallNorm(), 'U': SmallNorm()})))
6675
# LSTM
67-
addition_nlp(LSTM(16))
76+
addition_problem(LSTM(16))

examples/pca.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
# logging.basicConfig(level=logging.DEBUG)
99

10+
# Generate a random binary classification problem.
1011
X, y = make_classification(n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2,
1112
class_sep=2.5, )
1213

examples/random_forest.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import logging
22

3-
from sklearn.datasets import make_regression
4-
from sklearn.model_selection import train_test_split
53
from sklearn.datasets import make_classification
4+
from sklearn.datasets import make_regression
65
from sklearn.metrics import roc_auc_score
6+
from sklearn.model_selection import train_test_split
77

8-
from mla.datasets import load_boston
98
from mla.ensemble.random_forest import RandomForestClassifier, RandomForestRegressor
109
from mla.metrics.metrics import mean_squared_error
1110

1211
logging.basicConfig(level=logging.DEBUG)
1312

1413

1514
def classification():
15+
# Generate a random binary classification problem.
1616
X, y = make_classification(n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2,
1717
class_sep=2.5, n_redundant=0)
1818

@@ -26,9 +26,11 @@ def classification():
2626

2727

2828
def regression():
29+
# Generate a random regression problem
2930
X, y = make_regression(n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111,
3031
bias=0.5)
3132
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111)
33+
3234
model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3, )
3335
model.fit(X_train, y_train)
3436
predictions = model.predict(X_test)

examples/svm.py

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212

1313
def classification():
14+
# Generate a random binary classification problem.
1415
X, y = make_classification(n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2,
1516
class_sep=1.75, )
1617
# Convert y to {-1, 1}

0 commit comments

Comments
 (0)