Gogul09 · campisano · Aug 14, 2018 · Aug 14, 2018
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+/dataset
+/output
diff --git a/common_defs.py b/common_defs.py
@@ -0,0 +1,41 @@
+import mahotas
+import cv2
+
+# fixed-sizes for image
+fixed_size = tuple((500, 500))
+
+# path to training data
+train_path = "dataset/train"
+
+# bins for histogram
+bins = 8
+
+
+# feature-descriptor-1: Hu Moments
+def fd_hu_moments(image):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    feature = cv2.HuMoments(cv2.moments(image)).flatten()
+    return feature
+
+
+# feature-descriptor-2: Haralick Texture
+def fd_haralick(image):
+    # convert the image to grayscale
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # compute the haralick texture feature vector
+    haralick = mahotas.features.haralick(gray).mean(axis=0)
+    # return the result
+    return haralick
+
+
+# feature-descriptor-3: Color Histogram
+def fd_histogram(image, mask=None):
+    # convert the image to HSV color-space
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+    # compute the color histogram
+    hist = cv2.calcHist(
+        [image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
+    # normalize the histogram
+    cv2.normalize(hist, hist)
+    # return the histogram
+    return hist.flatten()
diff --git a/global.py b/global.py
@@ -6,54 +6,11 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import MinMaxScaler
 import numpy as np
-import mahotas
 import cv2
 import os
 import h5py
 
-# fixed-sizes for image
-fixed_size = tuple((500, 500))
-
-# path to training data
-train_path = "dataset/train"
-
-# no.of.trees for Random Forests
-num_trees = 100
-
-# bins for histogram
-bins = 8
-
-# train_test_split size
-test_size = 0.10
-
-# seed for reproducing same results
-seed = 9
-
-# feature-descriptor-1: Hu Moments
-def fd_hu_moments(image):
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    feature = cv2.HuMoments(cv2.moments(image)).flatten()
-    return feature
-
-# feature-descriptor-2: Haralick Texture
-def fd_haralick(image):
-    # convert the image to grayscale
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # compute the haralick texture feature vector
-    haralick = mahotas.features.haralick(gray).mean(axis=0)
-    # return the result
-    return haralick
-
-# feature-descriptor-3: Color Histogram
-def fd_histogram(image, mask=None):
-    # convert the image to HSV color-space
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-    # compute the color histogram
-    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
-    # normalize the histogram
-    cv2.normalize(hist, hist)
-    # return the histogram
-    return hist.flatten()
+from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path
 
 # get the training labels
 train_labels = os.listdir(train_path)
@@ -108,30 +65,30 @@ def fd_histogram(image, mask=None):
 
         i += 1
         k += 1
-    print "[STATUS] processed folder: {}".format(current_label)
+    print("[STATUS] processed folder: {}".format(current_label))
     j += 1
 
-print "[STATUS] completed Global Feature Extraction..."
+print("[STATUS] completed Global Feature Extraction...")
 
 # get the overall feature vector size
-print "[STATUS] feature vector size {}".format(np.array(global_features).shape)
+print("[STATUS] feature vector size {}".format(np.array(global_features).shape))
 
 # get the overall training label size
-print "[STATUS] training Labels {}".format(np.array(labels).shape)
+print("[STATUS] training Labels {}".format(np.array(labels).shape))
 
 # encode the target labels
 targetNames = np.unique(labels)
 le = LabelEncoder()
 target = le.fit_transform(labels)
-print "[STATUS] training labels encoded..."
+print("[STATUS] training labels encoded...")
 
 # normalize the feature vector in the range (0-1)
 scaler = MinMaxScaler(feature_range=(0, 1))
 rescaled_features = scaler.fit_transform(global_features)
-print "[STATUS] feature vector normalized..."
+print("[STATUS] feature vector normalized...")
 
-print "[STATUS] target labels: {}".format(target)
-print "[STATUS] target labels shape: {}".format(target.shape)
+print("[STATUS] target labels: {}".format(target))
+print("[STATUS] target labels shape: {}".format(target.shape))
 
 # save the feature vector using HDF5
 h5f_data = h5py.File('output/data.h5', 'w')
@@ -143,4 +100,4 @@ def fd_histogram(image, mask=None):
 h5f_data.close()
 h5f_label.close()
 
-print "[STATUS] end of training.."
+print("[STATUS] end of training..")
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+sklearn
+h5py
+opencv-python
+mahotas
+scipy
+matplotlib
diff --git a/train_test.py b/train_test.py
@@ -6,20 +6,35 @@
 import h5py
 import numpy as np
 import os
-import glob
 import cv2
 from matplotlib import pyplot
 from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.model_selection import KFold, StratifiedKFold
-from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
+from sklearn.model_selection import KFold
 from sklearn.linear_model import LogisticRegression
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.naive_bayes import GaussianNB
 from sklearn.svm import SVC
-from sklearn.externals import joblib
+
+from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path
+
+# no.of.trees for Random Forests
+num_trees = 100
+
+# train_test_split size
+test_size = 0.10
+
+# seed for reproducing same results
+seed = 9
+
+# get the training labels
+train_labels = os.listdir(train_path)
+
+# sort the training labels
+train_labels.sort()
+print(train_labels)
 
 # create all the machine learning models
 models = []
@@ -50,22 +65,22 @@
 h5f_label.close()
 
 # verify the shape of the feature vector and labels
-print "[STATUS] features shape: {}".format(global_features.shape)
-print "[STATUS] labels shape: {}".format(global_labels.shape)
+print("[STATUS] features shape: {}".format(global_features.shape))
+print("[STATUS] labels shape: {}".format(global_labels.shape))
 
-print "[STATUS] training started..."
+print("[STATUS] training started...")
 
 # split the training and testing data
 (trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
                                                                                           np.array(global_labels),
                                                                                           test_size=test_size,
                                                                                           random_state=seed)
 
-print "[STATUS] splitted train and test data..."
-print "Train data  : {}".format(trainDataGlobal.shape)
-print "Test data   : {}".format(testDataGlobal.shape)
-print "Train labels: {}".format(trainLabelsGlobal.shape)
-print "Test labels : {}".format(testLabelsGlobal.shape)
+print("[STATUS] splitted train and test data...")
+print("Train data  : {}".format(trainDataGlobal.shape))
+print("Test data   : {}".format(testDataGlobal.shape))
+print("Train labels: {}".format(trainLabelsGlobal.shape))
+print("Test labels : {}".format(testLabelsGlobal.shape))
 
 # filter all the warnings
 import warnings