Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed test/validation nomenclature #18

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions glia/machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
logger = logging.getLogger('glia')


TVT = namedtuple("TVT", ['training', "validation", "test"])
TVT = namedtuple("TVT", ['training', "test", "validation"])

def tvt_by_percentage(n, training=60, validation=20,testing=20):
def tvt_by_percentage(n, training=60,testing=20, validation=20):
summed = training+validation+testing
assert summed==100
train = int(np.floor(n*training/100))
valid = int(np.ceil(n*validation/100))
test = n - valid - train
return TVT(train, valid, test)
return TVT(train, test, valid)

def f_split_dict(tvt):
"""Subset dict into training, validation, & test."""
Expand All @@ -34,14 +34,14 @@ def anonymous(dictionary):
v = dictionary[k]
if i < tvt.training:
split.training[k] = v
elif i < tvt.validation + tvt.training:
split.validation[k] = v
elif i < tvt.test + tvt.validation + tvt.training:
elif i < tvt.test + tvt.training:
split.test[k] = v
elif i < tvt.validation + tvt.test + tvt.training:
split.validation[k] = v
else:
raise(ValueError, 'bad training, validation & test split.')
raise(ValueError, 'bad training, test & validation split.')
i += 1
assert i == tvt.training+tvt.validation+tvt.test
assert i == tvt.training+tvt.test+tvt.validation
return split

return anonymous
Expand All @@ -65,7 +65,7 @@ def anonymous(dictionary):
)

def tvt_map(tvt, f):
return TVT(f(tvt.training), f(tvt.validation), f(tvt.test))
return TVT(f(tvt.training), f(tvt.test), f(tvt.validation))

def f_split_list(tvt, get_list=lambda x: x):
"""Subset list into training, validation, & test."""
Expand All @@ -75,16 +75,16 @@ def anonymous(x):
for i,v in enumerate(my_list):
if i < tvt.training:
split.training.append(v)
elif i < tvt.validation + tvt.training:
split.validation.append(v)
elif i < tvt.test + tvt.validation + tvt.training:
elif i < tvt.test + tvt.training:
split.test.append(v)
elif i < tvt.validation + tvt.test + tvt.training:
split.validation.append(v)
else:
raise(ValueError, 'bad training, validation & test split.')
raise(ValueError, 'bad training, test & validation split.')
try:
assert len(my_list) == tvt.training+tvt.validation+tvt.test
assert len(my_list) == tvt.training+tvt.test+tvt.validation
except Exception as e:
print(len(my_list), tvt.training+tvt.validation+tvt.test)
print(len(my_list), tvt.training+tvt.test+tvt.validation)
raise e
return split

Expand Down Expand Up @@ -202,13 +202,13 @@ def bin_sum(data):
sorted(list(letter_map.items()),
key=lambda x: x[1])))

def classifier_helper(classifier, training, validation, classes=letter_classes):
def classifier_helper(classifier, training, test, classes=letter_classes):
training_data, training_target = training
validation_data, validation_target = validation
test_data, test_target = test

classifier.fit(training_data, training_target)
predicted = classifier.predict(validation_data)
expected = validation_target
predicted = classifier.predict(test_data)
expected = test_target

report = metrics.classification_report(expected, predicted)
confusion = confusion_matrix(expected, predicted, classes)
Expand Down Expand Up @@ -259,12 +259,12 @@ def get_checkerboard_contrasts(stimulus_list):
assert len(contrasts)>0
return contrasts

def svm_helper(training_data, training_target, validation_data, validation_target):
def svm_helper(training_data, training_target, test_data, test_target):
# Create a classifier: a support vector classifier
classifier = svm.SVC()
classifier.fit(training_data, training_target)

predicted = classifier.predict(validation_data)
expected = validation_target
predicted = classifier.predict(test_data)
expected = test_target

return metrics.accuracy_score(expected, predicted)
10 changes: 5 additions & 5 deletions glia/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,19 +498,19 @@ def raster_group(fig, axis_gen, data):
def error_bars(data, target, ndraws=20):
n = data.shape[0]
accuracy = np.full((ndraws,), 0)
(ntrain, nvalid, _) = tvt_by_percentage(n,60,40,0)
(ntrain, ntest, _) = tvt_by_percentage(n,60,40,0)
indices = np.arange(n)
for i in range(ndraws):
np.random.shuffle(indices)
training_ind = indices[0:ntrain]
validation_ind = indices[ntrain:]
test_ind = indices[ntrain:]

training_data = data[training_ind]
training_target = target[training_ind]
validation_data = data[validation_ind]
validation_target = target[validation_ind]
test_data = data[test_ind]
test_target = target[test_ind]

accuracy[i] = svm_helper(training_data, training_target,
validation_data, validation_target)
test_data, test_target)
std = np.std(accuracy)
return (np.mean(accuracy),std,std)
92 changes: 46 additions & 46 deletions glia_scripts/classify/svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from scipy.stats import binom

def plot_acuity(logmar, accuracy, yerror,
n_validation, name, conditions, condition_name, plot_directory):
n_test, name, conditions, condition_name, plot_directory):
print(f"plotting {name} {condition_name} classification accuracy.")
sig5 = np.repeat(binom.ppf(0.95, n_validation, 0.5)/n_validation, len(logmar))
sig1 = np.repeat(binom.ppf(0.99, n_validation, 0.5)/n_validation, len(logmar))
sig5 = np.repeat(binom.ppf(0.95, n_test, 0.5)/n_test, len(logmar))
sig1 = np.repeat(binom.ppf(0.99, n_test, 0.5)/n_test, len(logmar))

fig, ax = plt.subplots()
nconditions = len(conditions)
Expand Down Expand Up @@ -51,37 +51,37 @@ def plot_acuity(logmar, accuracy, yerror,
ax.set_title(f'{name} classification by {condition_name}')
fig.savefig(os.path.join(plot_directory, f"{name}-{condition_name}_acuity.png"))

def acuity(training_data, training_target, validation_data, validation_target,
def acuity(training_data, training_target, test_data, test_target,
stimulus_list, plot_directory, name,
sizes, conditions, condition_name):
print(f"training classifiers.")
# polymorphic over ndarray or list for conditions
nconditions = len(training_data)
assert nconditions==len(training_target)
assert nconditions==len(validation_data)
assert nconditions==len(validation_target)
assert nconditions==len(test_data)
assert nconditions==len(test_target)
nsizes = training_data[0].shape[0]
assert nsizes==training_target[0].shape[0]
assert nsizes==validation_data[0].shape[0]
assert nsizes==validation_target[0].shape[0]
n_validation = validation_data[0].shape[1]
assert nsizes==test_data[0].shape[0]
assert nsizes==test_target[0].shape[0]
n_test = test_data[0].shape[1]

nclasses = 2
accuracy_100 = np.full((nconditions, nsizes), 0, dtype=np.float)
yerror = np.full((nconditions,2,nsizes),0, dtype=np.float)
for condition in range(nconditions):
for size in range(nsizes):
data = np.concatenate(
[training_data[condition][size],validation_data[condition][size]])
[training_data[condition][size],test_data[condition][size]])
target = np.concatenate(
[training_target[condition][size],validation_target[condition][size]])
[training_target[condition][size],test_target[condition][size]])
(mean,below,above) = glia.error_bars(data,target)
accuracy_100[condition, size] = mean
yerror[condition, :, size] = [below,above]

logmar = list(map(glia.px_to_logmar,sizes))

plot_acuity(logmar, accuracy_100, yerror, n_validation,
plot_acuity(logmar, accuracy_100, yerror, n_test,
name, conditions, condition_name, plot_directory)


Expand All @@ -91,32 +91,32 @@ def checkerboard_svc(data, metadata, stimulus_list, lab_notebook, plot_directory
name = metadata["name"]
if name=='checkerboard-contrast':
training_data = glia.bin_100ms(data["training_data"])
validation_data = glia.bin_100ms(data["validation_data"])
test_data = glia.bin_100ms(data["test_data"])
training_target = data["training_target"]
validation_target = data["validation_target"]
test_target = data["test_target"]

conditions = glia.get_checkerboard_contrasts(stimulus_list)
condition_name = "contrast"
elif name=="checkerboard-durations":
training_data = glia.bin_100ms(data["training_data"])
validation_data = glia.bin_100ms(data["validation_data"])
test_data = glia.bin_100ms(data["test_data"])
training_target = data["training_target"]
validation_target = data["validation_target"]
test_target = data["test_target"]

conditions = glia.get_stimulus_parameters(stimulus_list,
"CHECKERBOARD", 'lifespan')
condition_name = "durations"
elif name=="checkerboard":
training_100ms = glia.bin_100ms(data["training_data"])[0]
training_100ms = glia.glia.bin_100ms(data["training_data"])[0]
training_sum = glia.bin_sum(data["training_data"])[0]
training_data = [training_100ms, training_sum]
validation_100ms = glia.bin_100ms(data["validation_data"])[0]
validation_sum = glia.bin_sum(data["validation_data"])[0]
validation_data = [validation_100ms, validation_sum]
test_100ms = glia.bin_100ms(data["test_data"])[0]
test_sum = glia.bin_sum(data["test_data"])[0]
test_data = [test_100ms, test_sum]
tt = data["training_target"][0]
training_target = [tt,tt]
vt = data["validation_target"][0]
validation_target = [vt,vt]
vt = data["test_target"][0]
test_target = [vt,vt]

conditions = ['100ms bins', 'spike count']
condition_name = None
Expand All @@ -126,7 +126,7 @@ def checkerboard_svc(data, metadata, stimulus_list, lab_notebook, plot_directory
plot_diff_nsamples(data, stimulus_list, plot_directory,
"checkerboard", sizes, conditions, condition_name)
else:
acuity(training_data, training_target, validation_data, validation_target,
acuity(training_data, training_target, test_data, test_target,
stimulus_list, plot_directory, "checkerboard",
sizes, conditions, condition_name)

Expand All @@ -136,39 +136,39 @@ def grating_svc(data, metadata, stimulus_list, lab_notebook, plot_directory,
sizes = glia.get_stimulus_parameters(stimulus_list, "GRATING", "width")
if metadata["name"]=='grating-contrast':
training_data = glia.bin_100ms(data["training_data"])
validation_data = glia.bin_100ms(data["validation_data"])
test_data = glia.bin_100ms(data["test_data"])
training_target = data["training_target"]
validation_target = data["validation_target"]
test_target = data["test_target"]

conditions = get_grating_contrasts(stimulus_list)
condition_name = "contrast"
elif metadata["name"]=="grating-durations":
training_data = glia.bin_100ms(data["training_data"])
validation_data = glia.bin_100ms(data["validation_data"])
test_data = glia.bin_100ms(data["test_data"])
training_target = data["training_target"]
validation_target = data["validation_target"]
test_target = data["test_target"]

conditions = glia.get_stimulus_parameters(stimulus_list, "GRATING", 'lifespan')
condition_name = "durations"
elif metadata["name"]=="grating-speeds":
training_data = glia.bin_100ms(data["training_data"])
validation_data = glia.bin_100ms(data["validation_data"])
test_data = glia.bin_100ms(data["test_data"])
training_target = data["training_target"]
validation_target = data["validation_target"]
test_target = data["test_target"]

conditions = glia.get_stimulus_parameters(stimulus_list, "GRATING", 'speed')
condition_name = "speeds"
elif metadata["name"]=="grating":
training_100ms = glia.bin_100ms(data["training_data"])[0]
training_100ms = glia.glia.bin_100ms(data["training_data"])[0]
training_sum = glia.bin_sum(data["training_data"])[0]
training_data = [training_100ms, training_sum]
validation_100ms = glia.bin_100ms(data["validation_data"])[0]
validation_sum = glia.bin_sum(data["validation_data"])[0]
validation_data = [validation_100ms, validation_sum]
test_100ms = glia.bin_100ms(data["test_data"])[0]
test_sum = glia.bin_sum(data["test_data"])[0]
test_data = [test_100ms, test_sum]
tt = data["training_target"][0]
training_target = [tt,tt]
vt = data["validation_target"][0]
validation_target = [vt,vt]
vt = data["test_target"][0]
test_target = [vt,vt]

conditions = ['100ms bins', 'spike count']
condition_name = None
Expand All @@ -178,7 +178,7 @@ def grating_svc(data, metadata, stimulus_list, lab_notebook, plot_directory,
plot_diff_nsamples(data, stimulus_list, plot_directory,
"grating", sizes, conditions, condition_name)
else:
acuity(training_data, training_target, validation_data, validation_target,
acuity(training_data, training_target, test_data, test_target,
stimulus_list, plot_directory, "grating",
sizes, conditions, condition_name)

Expand All @@ -193,22 +193,22 @@ def letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory,
logger.debug(data["training_data"].shape)
# add nconditions dim
training_100ms = glia.bin_100ms(np.expand_dims(data["training_data"],0))
validation_100ms = glia.bin_100ms(np.expand_dims(data["validation_data"],0))
test_100ms = glia.bin_100ms(np.expand_dims(data["test_data"],0))
logger.debug(f'training_100ms shape {training_100ms.shape}')
logger.debug(f'sizes {sizes}')
for i, size in enumerate(sizes):
print(f'SVC for size {size}')
# note: no expand dims, hardcoded 1 ncondition
training_target = data["training_target"][i]
validation_target = data["validation_target"][i]
test_target = data["test_target"][i]
logger.debug(np.size(training_target))
svr = svm.SVC()
parameters = {'C': [1, 10, 100, 1000],
'gamma': [0.001, 0.0001]},
clf = GridSearchCV(svr, parameters, n_jobs=12)
report, confusion = glia.classifier_helper(clf,
(training_100ms[0,i], training_target),
(validation_100ms[0,i], validation_target))
(test_100ms[0,i], test_target))
with open(f"{plot_directory}/letter-{size}.txt", "w") as f:
f.write(report+'\n')
f.write(str(confusion))
Expand All @@ -224,23 +224,23 @@ def tiled_letter_svc(data, metadata, stimulus_list, lab_notebook, plot_directory
# n_sizes, n_training, n_steps, n_x, n_y, n_units = data["training_data"].shape
logger.debug(data["training_data"].shape)
# add nconditions dim
training_100ms = glia.bin_100ms(np.expand_dims(data["training_data"],0))
validation_100ms = glia.bin_100ms(np.expand_dims(data["validation_data"],0))
training_100ms = glia.glia.bin_100ms(np.expand_dims(data["training_data"],0))
test_100ms = glia.glia.bin_100ms(np.expand_dims(data["test_data"],0))
logger.debug(f'training_100ms shape {training_100ms.shape}')
logger.debug(f'sizes {sizes}')
for i, size in enumerate(sizes):
print(f'SVC for size {size}')
# note: no expand dims, hardcoded 1 ncondition
training_target = data["training_target"][i]
validation_target = data["validation_target"][i]
test_target = data["test_target"][i]
logger.debug(np.size(training_target))
svr = svm.SVC()
parameters = {'C': [1, 10, 100, 1000],
'gamma': [0.001, 0.0001]},
clf = GridSearchCV(svr, parameters, n_jobs=12)
report, confusion = glia.classifier_helper(clf,
(training_100ms[0,i], training_target),
(validation_100ms[0,i], validation_target))
(test_100ms[0,i], test_target))
with open(f"{plot_directory}/letter-{size}.txt", "w") as f:
f.write(report+'\n')
f.write(str(confusion))
Expand All @@ -254,22 +254,22 @@ def image_svc(data, metadata, stimulus_list, lab_notebook, plot_directory,
logger.debug(data["training_data"].shape)
# add nconditions dim
training_100ms = glia.bin_100ms(np.expand_dims(data["training_data"],0))
validation_100ms = glia.bin_100ms(np.expand_dims(data["validation_data"],0))
test_100ms = glia.bin_100ms(np.expand_dims(data["test_data"],0))
logger.debug(f'training_100ms shape {training_100ms.shape}')
logger.debug(f'sizes {sizes}')
for i, size in enumerate(sizes):
print(f'SVC for size {size}')
# note: no expand dims, hardcoded 1 ncondition
training_target = data["training_target"][i]
validation_target = data["validation_target"][i]
test_target = data["test_target"][i]
logger.debug(np.size(training_target))
svr = svm.SVC()
parameters = {'C': [1, 10, 100, 1000],
'gamma': [0.001, 0.0001]},
clf = GridSearchCV(svr, parameters, n_jobs=12)
report, confusion = glia.classifier_helper(clf,
(training_100ms[0,i], training_target),
(validation_100ms[0,i], validation_target))
(test_100ms[0,i], test_target))
with open(f"{plot_directory}/letter-{size}.txt", "w") as f:
f.write(report+'\n')
f.write(str(confusion))
Loading