Clean up and document code

FrankWhoee · Feb 12, 2019 · f074990 · f074990
1 parent 08544c3
commit f074990
Show file tree

Hide file tree

Showing 10 changed files with 260 additions and 128 deletions.
diff --git a/CNN-evaluator.py b/CNN-evaluator.py
@@ -1,47 +1,18 @@
 from __future__ import print_function
-import numpy as np
-import keras
 from keras.models import load_model
-from aura.extractor_util import reshape
-from aura.extractor_util import parseAuraDimensions as pAD
-from aura.aura_loader import read_file
+from aura.aura_loader import get_data
 
-root = "../Aura_Data/";
-cancerPath = root + "ChunkedCancerTestset/"
-healthyPath = root + "ChunkedHealthyTestset/"
-cancerSize = "{256x256x270}"
-healthySize = "{136x136x181}"
+model = load_model("Model-11.hf")
 
-cl,cw,cn = pAD(cancerSize)
-hl,hw,hn = pAD(healthySize)
-fl, fw = max(cl, cw, hl, hw), max(cl, cw, hl, hw)
-fn = cn + hn
-num_classes = 2
-
-model = load_model("Model-v4.hf")
-
-cancerous_test_data = read_file(path=cancerPath + cancerSize + "Chunk9.aura").T
-healthy_test_data = read_file(path=healthyPath + healthySize + "Chunk9.aura")
-healthy_test_data = reshape(healthy_test_data, (fl,fw, hn)).T
-test_data = np.zeros((fn, fl,fw))
-for i in range(cn):
-    test_data[i] = cancerous_test_data[i]
-for i in range(hn):
-    test_data[i + cn] = healthy_test_data[i]
-
-labels = np.zeros(fn)
-for i in range(cn):
-    labels[i] = 1
-
-x_test = test_data
-y_test = labels
-
-x_test = test_data.reshape(fn,fl,fw,1)
-
-model.compile(loss=keras.losses.sparse_categorical_crossentropy,
-              optimizer=keras.optimizers.Adadelta(),
-              metrics=['accuracy'])
+# Prepare paths for GCP training
+root = "../Aura_Data/"
+test_paths = [root + "{136x136x22118}HealthyTestset.aura", root + "{256x256x7021}RIDERTestset.aura",
+              root + "{256x256x879}BTPTestset.aura"]
+test_data, test_label = get_data(test_paths)
+test_n, test_l, test_w = test_data.shape
+x_test = test_data.reshape(test_n, test_l, test_w, 1)
+y_test = test_label.copy()
 
 score = model.evaluate(x_test, y_test, verbose=0)
 print('Test loss:', score[0])
-print('Test accuracy:', score[1])
+print('Test accuracy:', score[1])
diff --git a/CNN-single-demo.py b/CNN-single-demo.py
@@ -4,32 +4,74 @@
 from aura.decode import decode
 from aura.decode import preprocess
 from aura.decode import view_image as view
+from aura.aura_loader import parse_aura_dimensions
+from sys import stderr
+from time import sleep
 
+print("Loading model...")
+model = load_model("Model-11.hf")
+print("Model loaded.")
 
-root = "../Aura_Data/Dataset/";
+# Prepare paths
+root = "../Aura_Data/Dataset/"
+cancer_path = root + "{256x256x7021}RIDERTestset.aura"
+healthy_path = root + "{136x136x22118}HealthyTestset.aura"
+btp_path = root + "{256x256x879}BTPTestset.aura"
 
-model = load_model("Model-11-1.hf")
-# image = read_file(root + "ChunkedHealthyTestset/{136x136x181}Chunk1.aura").T[50]
-imageCancer = read_file(root + "{256x256x7021}RIDERTestset.aura").T[int(input("Choose image from cancerous test set (0-7020)"))]
-imageHealthy = read_file(root + "{136x136x22118}HealthyTestset.aura").T[int(input("Choose image from healthy test set (0-22117)"))]
-imageBTP = read_file(root + "{256x256x879}BTPTestset.aura").T[int(input("Choose image from a cancerous test set from another database (0-879)"))]
-# image = dcm.read_file(root + "Unextracted/CPTAC-GBM/C3L-00016/11-15-1999-MR BRAIN WOW CONTRAST-47088/8-AX 3D SPGR-43615/000199.dcm").pixel_array
+cl, cw, cn = parse_aura_dimensions(cancer_path)
+hl, hw, hn = parse_aura_dimensions(healthy_path)
+bl, bw, bn = parse_aura_dimensions(btp_path)
 
-print("")
 
+def query_user(question, n, min=0):
+    """
+    Queries a user from the console, and returns the user's
+
+    :param question: Type string that is asked to the user.
+    :param n: Upper bound
+    :param min: Lower bound
+    :return: Integer type
+    """
+
+    user_question = question + " (" + str(min) + "-" + str(n) + ")"
+    image_index = input(user_question)
+    while not image_index.isdigit() or int(image_index) > n or int(image_index) < 0:
+        stderr.write("\nPlease enter a number between "+str(min)+" and " + str(n) + "\n")
+        sleep(0.01)
+        image_index = input(user_question)
+    return int(image_index)
+
+
+# Query users for input
+cancer_image_index = query_user("Choose image from cancerous test set", cn - 1)
+healthy_image_index = query_user("Choose image from healthy test set", hn - 1)
+btp_image_index = query_user("Choose image from another cancerous test set", bn - 1)
+
+imageCancer = read_file(cancer_path).T[cancer_image_index]
+imageHealthy = read_file(healthy_path).T[healthy_image_index]
+imageBTP = read_file(btp_path).T[btp_image_index]
+
+print("Processing images...")
+# Compile images into one array
 all_images = [imageHealthy, imageCancer, imageBTP]
 all_predictions = []
 
+# Preprocess all images and plot them.
 for index, image in enumerate(all_images):
     view(image)
     all_images[index] = preprocess(image)
+print("Images processed.")
 
+print("Analysing images...")
+# Use model to predict all images, and compile into all_predictions
 for index, image in enumerate(all_images):
     all_predictions.append(decode(model.predict(image)))
-
-for i,prediction in enumerate(all_predictions):
+print("Images analysed. Processing results...")
+print("\n---------------------RESULTS---------------------")
+# Print out results.
+for i, prediction in enumerate(all_predictions):
     if prediction[0][1] > 0.5:
-        print("Patient "+str(i)+" is healthy.")
+        print("Patient " + str(i) + " is healthy.")
         print("Confidence: " + str(prediction[0][1] * 100)[0:4] + "%\n")
     elif prediction[1][1] > 0.5:
         print("Patient " + str(i) + " has GBM.")

diff --git a/aura/aura_breaker.py b/aura/aura_breaker.py
@@ -3,9 +3,14 @@
 import os
 import time
 import random
-from matplotlib import pyplot as plt
 
 def break_aura(path,pieces):
+    """
+    Breaks an aura file into smaller chunks. Saves chunks to local folders.
+
+    :param path:  A string type of the path to the aura file that is being chunked.
+    :param pieces: An integer type of how many pieces should result
+    """
     array = aura_loader.read_file(path)
     filepath = "../ChunkedAura" + str(time.time())[5:10]
     print("Saving to " + filepath)
@@ -25,7 +30,14 @@ def break_aura(path,pieces):
         chunk.tofile(f)
     print("----------------- CHUNKING COMPLETE -----------------")
 
+
 def percentise_aura(path,percent):
+    """
+    Breaks an aura file into two pieces of percent sizes.
+
+    :param path: A string type of the path to the aura file that is being chunked.
+    :param percent: A float or double type of the percentage that should be in the first chunk.
+    """
     array = aura_loader.read_file(path).T
     random.shuffle(array)
     filepath = "../ChunkedAura" + str(time.time())[5:10]
@@ -58,7 +70,3 @@ def percentise_aura(path,percent):
 
     print("----------------- CHUNKING COMPLETE -----------------")
 
-
-percentise_aura("{256x256x8798}btp.aura", 0.90)
-# percentise_aura("../../Aura_Data/{136x136x221182}Healthy.aura", 0.90)
-
diff --git a/aura/aura_loader.py b/aura/aura_loader.py
@@ -1,25 +1,113 @@
 import time
 import numpy
-import os
+from aura.extractor_util import parse_aura_dimensions
+import numpy as np
+from aura.extractor_util import reshape
+from aura.extractor_util import parse_aura_dimensions as pAD
+import random
+
 
 def read_file(path):
+    """
+    Reads an aura file, converting it to numpy array.
+
+    :param path: Path to aura file.
+    :return: A numpy array.
+    """
     filename = path.split("/")
     filename = filename[len(filename) - 1]
-    l, w, n = filename[filename.find("{") + 1: filename.rfind("}")].split("x")
-    l, w, n = int(l), int(w), int(n)
+    l, w, n = parse_aura_dimensions(filename)
     print("Loading " + filename + "...")
     initial = time.time()
+
     # Load unshaped array into numpy
-    unshapedArray = numpy.fromfile(path, dtype=numpy.float16);
+    unshaped_array = numpy.fromfile(path, dtype=numpy.float16);
+
     # Determine number of images by dividing the length of the unshaped array by the area of each image.
-    num_of_images = int(len(unshapedArray) / (l * w))
+    num_of_images = int(len(unshaped_array) / (l * w))
     if num_of_images != n:
-        unshapedArray = numpy.fromfile(path);
-        num_of_images = int(len(unshapedArray) / (l * w))
+        unshaped_array = numpy.fromfile(path);
+        num_of_images = int(len(unshaped_array) / (l * w))
     final = time.time()
     difference = final - initial
     print(num_of_images, "images loaded in", str(difference)[0:5], "seconds.")
 
     # Reshape the array to a 3D matrix.
-    Array = unshapedArray.reshape(l, w, num_of_images)
-    return Array
+    return unshaped_array.reshape(l, w, num_of_images)
+
+
+# This function takes in a list of paths to extract data and converts it to a numpy array.
+def get_data(training_data_paths, shuffle=True):
+    """
+    :param training_data_paths: a list of paths from which to extract data, shapes must be (l,w,n)
+    :return: two numpy arrays with shuffled data, shape of (n,l,w), of data type numpy.float16 and a numpy array of shape (n) with labels
+
+    n: number of images
+
+    l: length of each image
+
+    w: width of each image
+    """
+    init_time = time()
+    print("Retrieving data from " + str(training_data_paths.__len__()) + " paths.")
+    sizes = []
+    l, w = pAD(training_data_paths[0][training_data_paths[0].find("{"):training_data_paths[0].find("}") + 1])[0:2]
+    for filename in training_data_paths:
+        print("Recording dimensions of " + filename)
+        """
+        fl: file length
+        fw: file width
+        fn: file number of images
+        """
+        fl, fw, fn = pAD(filename[filename.find("{"):filename.find("}") + 1])
+        if fl > l:
+            l = fl
+        if fw > w:
+            w = fw
+        sizes.append(fn)
+    n = sum(sizes)
+    print(str(n) + " images found.")
+    # train_data is a numpy array of (n,l,w) with data type numpy.float16
+    train_data = np.zeros((n, l, w), dtype=np.float16)
+
+    # Load in all data
+    print("Loading data.")
+    data = []
+    for size, path in enumerate(training_data_paths):
+        raw_data = read_file(path=path)
+        raw_data = reshape(raw_data, (l, w, sizes[size])).T
+        data.append(raw_data)
+
+    # Compile data[] into output
+    print("Compiling data into one array.")
+    index_of_train_data = 0
+    for index, package in enumerate(data):
+        for image in package:
+            train_data[index_of_train_data] = image
+            index_of_train_data += 1
+
+    # Label training data
+    print("Labelling data.")
+    data = []
+    index_of_train_data = 0
+    for size_index in range(sizes.__len__()):
+        for index in range(sizes[size_index]):
+            data.append((train_data[index_of_train_data], size_index))
+            index_of_train_data += 1
+
+    if shuffle:
+        print("Shuffling data.")
+        random.shuffle(data)
+
+    print("Separating labels.")
+    # Separate training images and labels
+    labels = np.zeros(n)
+    train_data = np.zeros((n, l, w))
+    for i, (data, label) in enumerate(data):
+        train_data[i] = data
+        labels[i] = label
+
+    final_time = time()
+    duration = final_time - init_time
+    print("Data retrieval complete. Process took " + str(duration) + " seconds.")
+    return train_data, labels
diff --git a/aura/automate_organisation.py b/aura/automate_organisation.py
@@ -1,5 +1,5 @@
 import os
-
+# Deprecated class
 # This python script was used to extract all the nii files from a download from humanconnectome
 # To use it:
 # 1. Dump all of your data into one folder named Aura_Data/Healthy
@@ -52,4 +52,4 @@
                                   root_path + "/Healthy/NIFTI/" + foldername[0:6] + "/" + f)
 
         else:
-            print(foldername + " is a file")
+            print(foldername + " is a file")
diff --git a/aura/dcm_extractor.py b/aura/dcm_extractor.py
@@ -1,15 +1,14 @@
 import pydicom as dicom
 import os, numpy, sys, time
-from matplotlib import pyplot as plt
-from aura import extractor_util as eu
 import scipy.misc
-sys.stderr.write("WARNING: All .dcm files must have the same image dimensions.\n")
-time.sleep(0.01)
-# path_data = input("Path to folder containing all .dcm files: ")
-newFilename = input("Filename to dump information into: ")
-path_data = "../../Aura_Data/Unextracted/Brain-Tumor-Progression"
-if ".aura" not in newFilename:
-    newFilename += ".aura"
+
+path_data = input("Path to folder containing all .dcm files: ")
+new_filename = input("Filename to dump information into: ")
+resize_l = input("Length to resize images to:")
+resize_w = input("Width to resize images to:")
+
+if ".aura" not in new_filename:
+    new_filename += ".aura"
 
 lstFilesDCM = []
 print("Reading path...")
@@ -30,27 +29,21 @@
 x = numpy.arange(0.0, (ConstPixelDims[0]+1)*ConstPixelSpacing[0], ConstPixelSpacing[0])
 y = numpy.arange(0.0, (ConstPixelDims[1]+1)*ConstPixelSpacing[1], ConstPixelSpacing[1])
 z = numpy.arange(0.0, (ConstPixelDims[2]+1)*ConstPixelSpacing[2], ConstPixelSpacing[2])
-ConstPixelDims = (256,256,len(lstFilesDCM))
-ArrayDicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)
+ConstPixelDims = (resize_l, resize_w, len(lstFilesDCM))
+array_dicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)
+
+new_filename = "{" + str(array_dicom.shape[0]) + "x" + str(array_dicom.shape[1]) + "x" + str(array_dicom.shape[2]) + "}" + new_filename
+print("Saving to " + new_filename)
 
-newFilename = "{" + str(ArrayDicom.shape[0]) + "x" + str(ArrayDicom.shape[1]) + "x" + str(ArrayDicom.shape[2]) + "}" + newFilename
-print("Saving to " + newFilename)
 # loop through all the DICOM files
 print("Loading images into numpy array...")
 image_num = 0
-progress_bar_length = 50;
+progress_bar_length = 50
 
 for filenameDCM in lstFilesDCM:
-    # print("Extracting " + filenameDCM)
-    # read the file
     ds = dicom.dcmread(filenameDCM)
-    # print(ds.tags)
-    # if image_num > 400:
-    # plt.imshow(ds.pixel_array)
-    # plt.show()
-    # store the raw image data
     try:
-        ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)] = scipy.misc.imresize(ds.pixel_array, (256,256))
+        array_dicom[:, :, lstFilesDCM.index(filenameDCM)] = scipy.misc.imresize(ds.pixel_array, (256, 256))
     except:
         print(ds.pixel_array.shape)
     sys.stdout.write('\r')
@@ -64,7 +57,5 @@
     sys.stdout.flush()
     image_num += 1
 
-ArrayDicom.tofile(newFilename)
+array_dicom.tofile(new_filename)
 print("\n\n----------------------- DATA EXTRACTION COMPLETE. -----------------------")
-print("Your matrix dimensions are (length, width, number of images): ", ConstPixelDims)
-