Skip to content

Commit

Permalink
Clean up and document code
Browse files Browse the repository at this point in the history
  • Loading branch information
FrankWhoee committed Feb 12, 2019
1 parent 08544c3 commit f074990
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 128 deletions.
51 changes: 11 additions & 40 deletions CNN-evaluator.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,18 @@
from __future__ import print_function
import numpy as np
import keras
from keras.models import load_model
from aura.extractor_util import reshape
from aura.extractor_util import parseAuraDimensions as pAD
from aura.aura_loader import read_file
from aura.aura_loader import get_data

root = "../Aura_Data/";
cancerPath = root + "ChunkedCancerTestset/"
healthyPath = root + "ChunkedHealthyTestset/"
cancerSize = "{256x256x270}"
healthySize = "{136x136x181}"
model = load_model("Model-11.hf")

cl,cw,cn = pAD(cancerSize)
hl,hw,hn = pAD(healthySize)
fl, fw = max(cl, cw, hl, hw), max(cl, cw, hl, hw)
fn = cn + hn
num_classes = 2

model = load_model("Model-v4.hf")

cancerous_test_data = read_file(path=cancerPath + cancerSize + "Chunk9.aura").T
healthy_test_data = read_file(path=healthyPath + healthySize + "Chunk9.aura")
healthy_test_data = reshape(healthy_test_data, (fl,fw, hn)).T
test_data = np.zeros((fn, fl,fw))
for i in range(cn):
test_data[i] = cancerous_test_data[i]
for i in range(hn):
test_data[i + cn] = healthy_test_data[i]

labels = np.zeros(fn)
for i in range(cn):
labels[i] = 1

x_test = test_data
y_test = labels

x_test = test_data.reshape(fn,fl,fw,1)

model.compile(loss=keras.losses.sparse_categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
# Prepare paths for GCP training
root = "../Aura_Data/"
test_paths = [root + "{136x136x22118}HealthyTestset.aura", root + "{256x256x7021}RIDERTestset.aura",
root + "{256x256x879}BTPTestset.aura"]
test_data, test_label = get_data(test_paths)
test_n, test_l, test_w = test_data.shape
x_test = test_data.reshape(test_n, test_l, test_w, 1)
y_test = test_label.copy()

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print('Test accuracy:', score[1])
64 changes: 53 additions & 11 deletions CNN-single-demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,74 @@
from aura.decode import decode
from aura.decode import preprocess
from aura.decode import view_image as view
from aura.aura_loader import parse_aura_dimensions
from sys import stderr
from time import sleep

print("Loading model...")
model = load_model("Model-11.hf")
print("Model loaded.")

root = "../Aura_Data/Dataset/";
# Prepare paths
root = "../Aura_Data/Dataset/"
cancer_path = root + "{256x256x7021}RIDERTestset.aura"
healthy_path = root + "{136x136x22118}HealthyTestset.aura"
btp_path = root + "{256x256x879}BTPTestset.aura"

model = load_model("Model-11-1.hf")
# image = read_file(root + "ChunkedHealthyTestset/{136x136x181}Chunk1.aura").T[50]
imageCancer = read_file(root + "{256x256x7021}RIDERTestset.aura").T[int(input("Choose image from cancerous test set (0-7020)"))]
imageHealthy = read_file(root + "{136x136x22118}HealthyTestset.aura").T[int(input("Choose image from healthy test set (0-22117)"))]
imageBTP = read_file(root + "{256x256x879}BTPTestset.aura").T[int(input("Choose image from a cancerous test set from another database (0-879)"))]
# image = dcm.read_file(root + "Unextracted/CPTAC-GBM/C3L-00016/11-15-1999-MR BRAIN WOW CONTRAST-47088/8-AX 3D SPGR-43615/000199.dcm").pixel_array
cl, cw, cn = parse_aura_dimensions(cancer_path)
hl, hw, hn = parse_aura_dimensions(healthy_path)
bl, bw, bn = parse_aura_dimensions(btp_path)

print("")

def query_user(question, n, min=0):
"""
Queries a user from the console, and returns the user's
:param question: Type string that is asked to the user.
:param n: Upper bound
:param min: Lower bound
:return: Integer type
"""

user_question = question + " (" + str(min) + "-" + str(n) + ")"
image_index = input(user_question)
while not image_index.isdigit() or int(image_index) > n or int(image_index) < 0:
stderr.write("\nPlease enter a number between "+str(min)+" and " + str(n) + "\n")
sleep(0.01)
image_index = input(user_question)
return int(image_index)


# Query users for input
cancer_image_index = query_user("Choose image from cancerous test set", cn - 1)
healthy_image_index = query_user("Choose image from healthy test set", hn - 1)
btp_image_index = query_user("Choose image from another cancerous test set", bn - 1)

imageCancer = read_file(cancer_path).T[cancer_image_index]
imageHealthy = read_file(healthy_path).T[healthy_image_index]
imageBTP = read_file(btp_path).T[btp_image_index]

print("Processing images...")
# Compile images into one array
all_images = [imageHealthy, imageCancer, imageBTP]
all_predictions = []

# Preprocess all images and plot them.
for index, image in enumerate(all_images):
view(image)
all_images[index] = preprocess(image)
print("Images processed.")

print("Analysing images...")
# Use model to predict all images, and compile into all_predictions
for index, image in enumerate(all_images):
all_predictions.append(decode(model.predict(image)))

for i,prediction in enumerate(all_predictions):
print("Images analysed. Processing results...")
print("\n---------------------RESULTS---------------------")
# Print out results.
for i, prediction in enumerate(all_predictions):
if prediction[0][1] > 0.5:
print("Patient "+str(i)+" is healthy.")
print("Patient " + str(i) + " is healthy.")
print("Confidence: " + str(prediction[0][1] * 100)[0:4] + "%\n")
elif prediction[1][1] > 0.5:
print("Patient " + str(i) + " has GBM.")
Expand Down
18 changes: 13 additions & 5 deletions aura/aura_breaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
import os
import time
import random
from matplotlib import pyplot as plt

def break_aura(path,pieces):
"""
Breaks an aura file into smaller chunks. Saves chunks to local folders.
:param path: A string type of the path to the aura file that is being chunked.
:param pieces: An integer type of how many pieces should result
"""
array = aura_loader.read_file(path)
filepath = "../ChunkedAura" + str(time.time())[5:10]
print("Saving to " + filepath)
Expand All @@ -25,7 +30,14 @@ def break_aura(path,pieces):
chunk.tofile(f)
print("----------------- CHUNKING COMPLETE -----------------")


def percentise_aura(path,percent):
"""
Breaks an aura file into two pieces of percent sizes.
:param path: A string type of the path to the aura file that is being chunked.
:param percent: A float or double type of the percentage that should be in the first chunk.
"""
array = aura_loader.read_file(path).T
random.shuffle(array)
filepath = "../ChunkedAura" + str(time.time())[5:10]
Expand Down Expand Up @@ -58,7 +70,3 @@ def percentise_aura(path,percent):

print("----------------- CHUNKING COMPLETE -----------------")


percentise_aura("{256x256x8798}btp.aura", 0.90)
# percentise_aura("../../Aura_Data/{136x136x221182}Healthy.aura", 0.90)

106 changes: 97 additions & 9 deletions aura/aura_loader.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,113 @@
import time
import numpy
import os
from aura.extractor_util import parse_aura_dimensions
import numpy as np
from aura.extractor_util import reshape
from aura.extractor_util import parse_aura_dimensions as pAD
import random


def read_file(path):
"""
Reads an aura file, converting it to numpy array.
:param path: Path to aura file.
:return: A numpy array.
"""
filename = path.split("/")
filename = filename[len(filename) - 1]
l, w, n = filename[filename.find("{") + 1: filename.rfind("}")].split("x")
l, w, n = int(l), int(w), int(n)
l, w, n = parse_aura_dimensions(filename)
print("Loading " + filename + "...")
initial = time.time()

# Load unshaped array into numpy
unshapedArray = numpy.fromfile(path, dtype=numpy.float16);
unshaped_array = numpy.fromfile(path, dtype=numpy.float16);

# Determine number of images by dividing the length of the unshaped array by the area of each image.
num_of_images = int(len(unshapedArray) / (l * w))
num_of_images = int(len(unshaped_array) / (l * w))
if num_of_images != n:
unshapedArray = numpy.fromfile(path);
num_of_images = int(len(unshapedArray) / (l * w))
unshaped_array = numpy.fromfile(path);
num_of_images = int(len(unshaped_array) / (l * w))
final = time.time()
difference = final - initial
print(num_of_images, "images loaded in", str(difference)[0:5], "seconds.")

# Reshape the array to a 3D matrix.
Array = unshapedArray.reshape(l, w, num_of_images)
return Array
return unshaped_array.reshape(l, w, num_of_images)


# This function takes in a list of paths to extract data and converts it to a numpy array.
def get_data(training_data_paths, shuffle=True):
"""
:param training_data_paths: a list of paths from which to extract data, shapes must be (l,w,n)
:return: two numpy arrays with shuffled data, shape of (n,l,w), of data type numpy.float16 and a numpy array of shape (n) with labels
n: number of images
l: length of each image
w: width of each image
"""
init_time = time()
print("Retrieving data from " + str(training_data_paths.__len__()) + " paths.")
sizes = []
l, w = pAD(training_data_paths[0][training_data_paths[0].find("{"):training_data_paths[0].find("}") + 1])[0:2]
for filename in training_data_paths:
print("Recording dimensions of " + filename)
"""
fl: file length
fw: file width
fn: file number of images
"""
fl, fw, fn = pAD(filename[filename.find("{"):filename.find("}") + 1])
if fl > l:
l = fl
if fw > w:
w = fw
sizes.append(fn)
n = sum(sizes)
print(str(n) + " images found.")
# train_data is a numpy array of (n,l,w) with data type numpy.float16
train_data = np.zeros((n, l, w), dtype=np.float16)

# Load in all data
print("Loading data.")
data = []
for size, path in enumerate(training_data_paths):
raw_data = read_file(path=path)
raw_data = reshape(raw_data, (l, w, sizes[size])).T
data.append(raw_data)

# Compile data[] into output
print("Compiling data into one array.")
index_of_train_data = 0
for index, package in enumerate(data):
for image in package:
train_data[index_of_train_data] = image
index_of_train_data += 1

# Label training data
print("Labelling data.")
data = []
index_of_train_data = 0
for size_index in range(sizes.__len__()):
for index in range(sizes[size_index]):
data.append((train_data[index_of_train_data], size_index))
index_of_train_data += 1

if shuffle:
print("Shuffling data.")
random.shuffle(data)

print("Separating labels.")
# Separate training images and labels
labels = np.zeros(n)
train_data = np.zeros((n, l, w))
for i, (data, label) in enumerate(data):
train_data[i] = data
labels[i] = label

final_time = time()
duration = final_time - init_time
print("Data retrieval complete. Process took " + str(duration) + " seconds.")
return train_data, labels
4 changes: 2 additions & 2 deletions aura/automate_organisation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

# Deprecated class
# This python script was used to extract all the nii files from a download from humanconnectome
# To use it:
# 1. Dump all of your data into one folder named Aura_Data/Healthy
Expand Down Expand Up @@ -52,4 +52,4 @@
root_path + "/Healthy/NIFTI/" + foldername[0:6] + "/" + f)

else:
print(foldername + " is a file")
print(foldername + " is a file")
41 changes: 16 additions & 25 deletions aura/dcm_extractor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import pydicom as dicom
import os, numpy, sys, time
from matplotlib import pyplot as plt
from aura import extractor_util as eu
import scipy.misc
sys.stderr.write("WARNING: All .dcm files must have the same image dimensions.\n")
time.sleep(0.01)
# path_data = input("Path to folder containing all .dcm files: ")
newFilename = input("Filename to dump information into: ")
path_data = "../../Aura_Data/Unextracted/Brain-Tumor-Progression"
if ".aura" not in newFilename:
newFilename += ".aura"

path_data = input("Path to folder containing all .dcm files: ")
new_filename = input("Filename to dump information into: ")
resize_l = input("Length to resize images to:")
resize_w = input("Width to resize images to:")

if ".aura" not in new_filename:
new_filename += ".aura"

lstFilesDCM = []
print("Reading path...")
Expand All @@ -30,27 +29,21 @@
x = numpy.arange(0.0, (ConstPixelDims[0]+1)*ConstPixelSpacing[0], ConstPixelSpacing[0])
y = numpy.arange(0.0, (ConstPixelDims[1]+1)*ConstPixelSpacing[1], ConstPixelSpacing[1])
z = numpy.arange(0.0, (ConstPixelDims[2]+1)*ConstPixelSpacing[2], ConstPixelSpacing[2])
ConstPixelDims = (256,256,len(lstFilesDCM))
ArrayDicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)
ConstPixelDims = (resize_l, resize_w, len(lstFilesDCM))
array_dicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype)

new_filename = "{" + str(array_dicom.shape[0]) + "x" + str(array_dicom.shape[1]) + "x" + str(array_dicom.shape[2]) + "}" + new_filename
print("Saving to " + new_filename)

newFilename = "{" + str(ArrayDicom.shape[0]) + "x" + str(ArrayDicom.shape[1]) + "x" + str(ArrayDicom.shape[2]) + "}" + newFilename
print("Saving to " + newFilename)
# loop through all the DICOM files
print("Loading images into numpy array...")
image_num = 0
progress_bar_length = 50;
progress_bar_length = 50

for filenameDCM in lstFilesDCM:
# print("Extracting " + filenameDCM)
# read the file
ds = dicom.dcmread(filenameDCM)
# print(ds.tags)
# if image_num > 400:
# plt.imshow(ds.pixel_array)
# plt.show()
# store the raw image data
try:
ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)] = scipy.misc.imresize(ds.pixel_array, (256,256))
array_dicom[:, :, lstFilesDCM.index(filenameDCM)] = scipy.misc.imresize(ds.pixel_array, (256, 256))
except:
print(ds.pixel_array.shape)
sys.stdout.write('\r')
Expand All @@ -64,7 +57,5 @@
sys.stdout.flush()
image_num += 1

ArrayDicom.tofile(newFilename)
array_dicom.tofile(new_filename)
print("\n\n----------------------- DATA EXTRACTION COMPLETE. -----------------------")
print("Your matrix dimensions are (length, width, number of images): ", ConstPixelDims)

Loading

0 comments on commit f074990

Please sign in to comment.