Skip to content

Commit 72e02c1

Browse files
committed
added additional comments and features to functions
1 parent d97cba1 commit 72e02c1

File tree

5 files changed

+279
-448
lines changed

5 files changed

+279
-448
lines changed
33.9 KB
Loading

Notebooks/02_Baseline Model.ipynb

+126
Large diffs are not rendered by default.

Notebooks/02_baseline_model.ipynb

-244
This file was deleted.

Notebooks/baseline 2.ipynb

-204
This file was deleted.

modules/model.py

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import os
2+
import cv2
3+
import numpy as np
4+
import pandas as pd
5+
from tensorflow.keras.preprocessing.image import img_to_array, load_img
6+
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
7+
import seaborn as sns
8+
import matplotlib.pyplot as plt
9+
10+
11+
'''
12+
This portion uses code from a previous project from this [notebook](https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/01_Potato_PlantVillageEDA.ipynb).
13+
The code was originally developed by chat GPT 4 with the prompt: "I have an image data set that I want to do EDA on. How can I average out the pixel values of all the images in a class. python keras."
14+
15+
This function takes two arguments the dataset: training or testing, and the sub_folder for the type of tumor e.g. ['glioma', 'meningioma', 'notumor', 'pituitary']
16+
This function is used to find the average pixel values of each class
17+
The purpose is to find if there is a difference in each class
18+
19+
'''
20+
def avg_images(class_name, dataset='Training'):
21+
'''
22+
This function is used to find the average pixel value of each class
23+
24+
Users will need to assign the images to a variable.
25+
For example:
26+
meningioma_tumor = eda.avg_images('meningioma')
27+
=============================================================================
28+
Keyword arguments:
29+
folders -- The sub folder containing the classifcation for tumor type ( 'glioma',
30+
'meningioma', 'notumor', 'pituitary' )
31+
dataset -- The main folder either Train and Test folder (default = Training)
32+
'''
33+
path = f'../Images/{dataset}/{class_name}/'
34+
image_files = os.listdir(path)
35+
num_images = len(image_files)
36+
average_image = np.zeros((256, 256, 1), dtype=np.float32)
37+
38+
for image_file in image_files:
39+
image_path = os.path.join(path, image_file)
40+
image = load_img(image_path, color_mode='grayscale', target_size=(256, 256))
41+
image_array = img_to_array(image)
42+
average_image += image_array / num_images
43+
44+
return average_image
45+
46+
'''
47+
The Code was originally developed by chat GPT 3 with the prompt: "I want to find the average pixel value of each class and then use the mean of an image to find which class it belongs to. The path to the class looks like '../Images/Training/glioma/' the classes are 'glioma', 'meningioma', 'notumor', 'pituitary'"
48+
49+
was later prompted to adjust the code to be able to pass a parameter to the classify_images function. Took a total of 8 prompts and manual adjustments.
50+
'''
51+
def find_closest_class(mean_pixel_value, class_averages):
52+
#initialize the cloest class variable
53+
closest_class = None
54+
#initialize the cloest class variable
55+
closest_distance = float('inf')
56+
for class_name, average in class_averages.items():
57+
#finds the distance between the mean pixel value and the class average
58+
distance = np.linalg.norm(mean_pixel_value - average)
59+
# Finds the smaller distance
60+
if distance < closest_distance:
61+
#update the closest distance
62+
closest_distance = distance
63+
#updates the closest class
64+
closest_class = class_name
65+
66+
return closest_class
67+
68+
'''
69+
The Code was originally developed by chat GPT 3 with the prompt: "How do I dynamically classify images using the folder they are in as a class. Please use the OS module"
70+
71+
was later prompted to adjust the code to be able to pass a parameter to the find_closest_class function. Took a total of 8 prompts and manual adjustments.
72+
'''
73+
def classify_images(test_folder_path, class_paths):
74+
#create a list for the actual images
75+
actual_classes = []
76+
#create a list for the predictions
77+
predicted_classes = []
78+
#store the average pixel values for each class
79+
class_averages = {}
80+
for class_name, class_path in class_paths.items():
81+
#calculates the average pixcel value for each class (function should default to the training data set)
82+
average_image = avg_images(class_name)
83+
#finds the mean pixel value and stores it as a key value pair
84+
class_averages[class_name] = np.mean(average_image)
85+
86+
for class_name, class_path in class_paths.items():
87+
#gets the path to the class folders under the testing data set
88+
class_folder_path = os.path.join(test_folder_path, class_name)
89+
#gets a list of the images within the folder
90+
image_files = os.listdir(class_folder_path)
91+
92+
for image_file in image_files:
93+
#gets the path an individual image
94+
image_path = os.path.join(class_folder_path, image_file)
95+
#reads the image path using Open CV
96+
test_image = cv2.imread(image_path)
97+
#gets the mean pixcel value of the image
98+
mean_pixel_value = np.mean(test_image, axis=(0, 1))
99+
#uses the find_cloest_class function to find what class its closest to
100+
closest_class = find_closest_class(mean_pixel_value, class_averages)
101+
#appends the actual class to the actual classes list
102+
actual_classes.append(class_name)
103+
#appends the predicted class to the predicted classes list
104+
predicted_classes.append(closest_class)
105+
106+
return actual_classes, predicted_classes
107+
'''
108+
This portion reuses code from prior projects. The confusion matrix used the project: https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/02_plant_village_potato_modeling.ipynb
109+
which prompted ChatGPT 4 to help grab the labels information from the validation dataset and get it into a numpy array, so that we can use that to make a confusion matrix.
110+
111+
The creatation of the data frame was taken from this project: https://github.com/DerikVo/NN_hackathon/blob/main/Code/Training/pre-trained-models.ipynb
112+
113+
portions of the code have been adapted to work with file pathways.
114+
115+
Prompted ChatGPT 3 with incorporating the code as a function that uses the classify_images function to get the confusion matrix and classification metrics. Then prompted and correct some syntax errors.
116+
'''
117+
118+
def calculate_metrics(actual_classes, predicted_classes, class_paths):
119+
#creates the confusion matrix
120+
cm = confusion_matrix(actual_classes, predicted_classes,
121+
# gets the label of each class
122+
labels=list(class_paths.keys()))
123+
#Finds the weighted scores for each metric
124+
precision = precision_score(actual_classes, predicted_classes, average='weighted')
125+
recall = recall_score(actual_classes, predicted_classes, average='weighted')
126+
f1 = f1_score(actual_classes, predicted_classes, average='weighted')
127+
#adds the scores into a data frame
128+
data = {'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}
129+
metrics_df = pd.DataFrame(data, index=['baseline'])
130+
131+
return cm, metrics_df
132+
133+
'''
134+
This portion reuses code from prior projects. The confusion matrix used the project: https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/02_plant_village_potato_modeling.ipynb
135+
which prompted ChatGPT 4 to help grab the labels information from the validation dataset and get it into a numpy array, so that we can use that to make a confusion matrix.
136+
137+
There were slight modifications to fit the purposes of this code such as assigning class paths and a title parameter.
138+
'''
139+
def plot_confusion_matrix(confusion_matrix, class_paths, title):
140+
#sets the figure size
141+
plt.figure(figsize=(10,10))
142+
#Plots the confusion matrix and assigns the class names on the axis ticks
143+
sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='g',
144+
xticklabels=list(class_paths.keys()), yticklabels=list(class_paths.keys()))
145+
#labels the axis
146+
plt.xlabel('Predicted Class')
147+
plt.ylabel('True Class')
148+
#sets the title
149+
plt.title(f'{title} Confusion Matrix')
150+
#saves the figure
151+
plt.savefig(f'../Created_images/{title} confusion matrix.png')
152+
#displays the image
153+
plt.show()

0 commit comments

Comments
 (0)