Skip to content

code updated to use python3, common defs file added #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/dataset
/output
41 changes: 41 additions & 0 deletions common_defs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import mahotas
import cv2

# fixed-sizes for image
fixed_size = tuple((500, 500))

# path to training data
train_path = "dataset/train"

# bins for histogram
bins = 8


# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
feature = cv2.HuMoments(cv2.moments(image)).flatten()
return feature


# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the haralick texture feature vector
haralick = mahotas.features.haralick(gray).mean(axis=0)
# return the result
return haralick


# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
# convert the image to HSV color-space
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# compute the color histogram
hist = cv2.calcHist(
[image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
# normalize the histogram
cv2.normalize(hist, hist)
# return the histogram
return hist.flatten()
63 changes: 10 additions & 53 deletions global.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,11 @@
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py

# fixed-sizes for image
fixed_size = tuple((500, 500))

# path to training data
train_path = "dataset/train"

# no.of.trees for Random Forests
num_trees = 100

# bins for histogram
bins = 8

# train_test_split size
test_size = 0.10

# seed for reproducing same results
seed = 9

# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
feature = cv2.HuMoments(cv2.moments(image)).flatten()
return feature

# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
# convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# compute the haralick texture feature vector
haralick = mahotas.features.haralick(gray).mean(axis=0)
# return the result
return haralick

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
# convert the image to HSV color-space
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# compute the color histogram
hist = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
# normalize the histogram
cv2.normalize(hist, hist)
# return the histogram
return hist.flatten()
from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path

# get the training labels
train_labels = os.listdir(train_path)
Expand Down Expand Up @@ -108,30 +65,30 @@ def fd_histogram(image, mask=None):

i += 1
k += 1
print "[STATUS] processed folder: {}".format(current_label)
print("[STATUS] processed folder: {}".format(current_label))
j += 1

print "[STATUS] completed Global Feature Extraction..."
print("[STATUS] completed Global Feature Extraction...")

# get the overall feature vector size
print "[STATUS] feature vector size {}".format(np.array(global_features).shape)
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print "[STATUS] training Labels {}".format(np.array(labels).shape)
print("[STATUS] training Labels {}".format(np.array(labels).shape))

# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print "[STATUS] training labels encoded..."
print("[STATUS] training labels encoded...")

# normalize the feature vector in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print "[STATUS] feature vector normalized..."
print("[STATUS] feature vector normalized...")

print "[STATUS] target labels: {}".format(target)
print "[STATUS] target labels shape: {}".format(target.shape)
print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

# save the feature vector using HDF5
h5f_data = h5py.File('output/data.h5', 'w')
Expand All @@ -143,4 +100,4 @@ def fd_histogram(image, mask=None):
h5f_data.close()
h5f_label.close()

print "[STATUS] end of training.."
print("[STATUS] end of training..")
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sklearn
h5py
opencv-python
mahotas
scipy
matplotlib
39 changes: 27 additions & 12 deletions train_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,35 @@
import h5py
import numpy as np
import os
import glob
import cv2
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.externals import joblib

from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path

# no.of.trees for Random Forests
num_trees = 100

# train_test_split size
test_size = 0.10

# seed for reproducing same results
seed = 9

# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# create all the machine learning models
models = []
Expand Down Expand Up @@ -50,22 +65,22 @@
h5f_label.close()

# verify the shape of the feature vector and labels
print "[STATUS] features shape: {}".format(global_features.shape)
print "[STATUS] labels shape: {}".format(global_labels.shape)
print("[STATUS] features shape: {}".format(global_features.shape))
print("[STATUS] labels shape: {}".format(global_labels.shape))

print "[STATUS] training started..."
print("[STATUS] training started...")

# split the training and testing data
(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
np.array(global_labels),
test_size=test_size,
random_state=seed)

print "[STATUS] splitted train and test data..."
print "Train data : {}".format(trainDataGlobal.shape)
print "Test data : {}".format(testDataGlobal.shape)
print "Train labels: {}".format(trainLabelsGlobal.shape)
print "Test labels : {}".format(testLabelsGlobal.shape)
print("[STATUS] splitted train and test data...")
print("Train data : {}".format(trainDataGlobal.shape))
print("Test data : {}".format(testDataGlobal.shape))
print("Train labels: {}".format(trainLabelsGlobal.shape))
print("Test labels : {}".format(testLabelsGlobal.shape))

# filter all the warnings
import warnings
Expand Down