Skip to content

Commit 65b65db

Browse files
authored
Merge pull request #3 from DerikVo/eda_create_module_avg_pixels
Eda create module avg pixels
2 parents d16b351 + ac1285d commit 65b65db

9 files changed

+398
-200
lines changed

Created_images/Pituitary contrast.png

378 KB
Loading

Created_images/glioma contrast.png

404 KB
Loading

Created_images/glioma tumor.png

229 KB
Loading
376 KB
Loading

Created_images/meningioma.png

241 KB
Loading

Created_images/no tumor.png

339 KB
Loading

Created_images/pituitary tumor.png

269 KB
Loading

Notebooks/01_EDA.ipynb

+242-151
Large diffs are not rendered by default.

modules/eda.py

+156-49
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,58 @@
22
from math import ceil
33
import os
44
import random
5+
import numpy as np
56
from tensorflow.keras.layers.experimental import preprocessing
67
from tensorflow.keras.preprocessing.image import img_to_array, load_img
78
from tensorflow.keras.preprocessing import image_dataset_from_directory
89
from keras.preprocessing.image import ImageDataGenerator
910
from sklearn.decomposition import PCA
1011
import matplotlib.pyplot as plt
11-
import numpy as np
12-
np.random.seed(42)
13-
from tensorflow.keras.utils import set_random_seed
14-
set_random_seed(42)
1512

16-
def folders(dataset):
13+
'''
14+
The concept of this code was learned from General Assembly's Data science Immersive's Excel lab excercise
15+
The concepts has been adapted to identify file paths ways to work with the data
16+
'''
17+
def folders(dataset = 'Training'):
18+
'''
19+
Finds the folders within either the 'Training' dataset or the 'Testing' data set.
20+
=============================================================================
21+
Keyword arguments:
22+
dataset -- The main folder either Train and Test folder (default = Training)
23+
24+
=================================================
25+
Example:
26+
train_folders = eda.folders(train_dataset)
27+
test_folders = eda.folders(test_dataset)
28+
'''
29+
#sets the path to the Testing and training folders
1730
path = f'../Images/{dataset}'
18-
images_dir = [x for x in os.listdir(path)]
31+
#lists the classification folders within the dataset
1932
folders= os.listdir(path)
2033
return folders
2134

22-
def image_len(dataset, folders):
35+
36+
'''
37+
38+
The concept of this code was learned from General Assembly's Data science Immersive's Excel lab excercise during the 2023-03 to 2023-06 cohort.
39+
The concepts has been adapted to identify file paths ways to work with the data
40+
'''
41+
def image_len(folders, dataset ='Training'):
2342
'''
24-
This code takes in the list directory of the folder containing the classification folders. And the dataset.
25-
this code was heavily inspired by this project: https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/01_Potato_PlantVillageEDA.ipynb
26-
Has since been adapted to work with a jupyter notebook
27-
TODO:convert all image eda into a class/method script
43+
44+
List subfolders with the main folder containing the classification folders for each image set. As well as shows a random image from the classification
45+
=============================================================================
46+
Keyword arguments:
47+
folders -- The sub folder containing the classifcation for tumor type ( 'glioma', 'meningioma', 'notumor', 'pituitary' )
48+
dataset -- The main folder either Training and Testing folder (default = Training)
49+
50+
===============================================
51+
Example:
52+
eda.image_len(train_folders)
2853
'''
29-
#loop to each sub folder so we can get the class sizes
54+
#sets the path to the Testing and training folders
3055
path = f'../Images/{dataset}'
56+
#loop through each classification folder
3157
for i in folders:
3258
#get number of images of folder
3359
num = len(os.listdir(f'{path}/{i}'))
@@ -39,51 +65,132 @@ def image_len(dataset, folders):
3965
image_name = (os.listdir(f'{path}/{i}')[rand])
4066
#assigns the file path to the image
4167
image = load_img(f'{path}/{i}/{image_name}')
42-
#shows the image
68+
#shows the image for the classification for reference
4369
plt.title(f'{image_name}')
4470
plt.imshow(image)
4571
plt.axis('off')
4672
plt.show()
47-
class Image:
48-
def __init__(self, dataset, sub_folder):
49-
#learned i didnt need a comma because that creates a tuple: https://stackoverflow.com/questions/39192261/class-init-takes-parameters-but-turns-them-into-tuples-for-some-reason
50-
self.dataset = dataset
51-
self.sub_folder = sub_folder
52-
53-
def avg_images(self):
54-
'''
55-
This function takes two arguments the dataset: training or testing, and the sub_folder for the type of tumor e.g. ['glioma', 'meningioma', 'notumor', 'pituitary']
56-
This function is used to find the average pixel values of each class
57-
The purpose is to find if there is a difference in each class
58-
'''
59-
#assign the path in the function for readability and understanding
60-
#assign the sub folder (class name) that was passed to the function
61-
path = (f'../Images/{self.dataset}')
62-
class_name = self.sub_folder
63-
batch_size = 32 # Modify this to suit your needs
64-
#instantiate ImageDataGenerator
65-
datagen = ImageDataGenerator(rescale=1./255) # normalize pixel values to [0,1]
66-
#get the images from the directory
67-
generator = datagen.flow_from_directory(path,
68-
classes=[class_name],
69-
class_mode=None,
70-
color_mode='grayscale',
71-
target_size=(256, 256),
72-
batch_size=batch_size)
73-
n_samples = generator.samples
74-
average_image = np.zeros((256, 256, 1))
73+
'''
74+
This portion uses code from a previous project from this [notebook](https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/01_Potato_PlantVillageEDA.ipynb).
75+
The code was originally developed by chat GPT 4 with the prompt: "I have an image data set that I want to do EDA on. How can I average out the pixel values of all the images in a class. python keras."
7576
76-
for i in range(n_samples // batch_size): # Integer division to avoid partial batches
77-
images = next(generator)
78-
average_image += np.sum(images, axis=0)
77+
This function takes two arguments the dataset: training or testing, and the sub_folder for the type of tumor e.g. ['glioma', 'meningioma', 'notumor', 'pituitary']
78+
This function is used to find the average pixel values of each class
79+
The purpose is to find if there is a difference in each class
7980
80-
average_image /= n_samples
81-
return average_image
81+
'''
82+
def avg_images(folders, dataset = 'Training'):
83+
'''
84+
85+
This function is used to find the average pixel value of each class
8286
87+
Users will need to assign the images to a variable.
88+
For example:
89+
meningioma_tumor = eda.avg_images('meningioma')
90+
=============================================================================
91+
Keyword arguments:
92+
folders -- The sub folder containing the classifcation for tumor type ( 'glioma', 'meningioma', 'notumor', 'pituitary' )
93+
dataset -- The main folder either Train and Test folder (default = Training)
94+
'''
95+
#sets the path to the Testing and training folders
96+
path = (f'../Images/{dataset}')
97+
98+
class_name = folders
99+
batch_size = 32 # Modify this to suit your needs
100+
#instantiate ImageDataGenerator
101+
datagen = ImageDataGenerator(rescale=1./255) # normalize pixel values to [0,1]
102+
#get the images from the directory
103+
generator = datagen.flow_from_directory(path,
104+
classes=[class_name],
105+
class_mode=None,
106+
color_mode='grayscale',
107+
target_size=(256, 256),
108+
batch_size=batch_size)
109+
n_samples = generator.samples
110+
average_image = np.zeros((256, 256, 1))
111+
112+
for i in range(n_samples // batch_size): # Integer division to avoid partial batches
113+
images = next(generator)
114+
average_image += np.sum(images, axis=0)
115+
116+
average_image /= n_samples
117+
return average_image
118+
119+
'''
120+
This portion uses code from a previous project from this [notebook](https://github.com/DerikVo/DSI_project_4_plant_disease/blob/main/notebooks/01_Potato_PlantVillageEDA.ipynb). The concept was originally developed by [Yasser Siddiqui]([email protected]) and has been adapted to use with this notebook.
121+
122+
This function is used to find the differences of the average pixel value between each class 'glioma', 'meningioma', and 'pituitary' compared to 'notumor'.
123+
These different characteristics can help us understand how the classes are unique when compared to not having a tumor.
124+
If there are significant differences we can better interpret our model.
125+
'''
83126
def image_contrast(comparision, base_image):
127+
'''
128+
This function finds the differences between the pixel averages of two classes to identify how the model can differentiate classes
129+
Users will need to have ran the avg_images function for each class before running the image_contrats function.
130+
131+
Users will need to assign the images to a variable.
132+
For example:
133+
meningioma_contrast = eda.image_contrast(meningioma_tumor, notumor)
134+
=============================================================================
135+
Keyword arguments:
136+
comparision -- The image that contains the type of tumor ( 'glioma', 'meningioma', 'pituitary' )
137+
base_image -- The image youre comparing against ('notumor')
138+
'''
84139
# we need to rescale the contrasts
85140
image = base_image - comparision
86-
image -= image.min() # subtract minimum
87-
image /= image.max() # divide by new max
141+
# subtract minimum
142+
image -= image.min()
143+
# divide by new max
144+
image /= image.max()
145+
146+
return image
147+
'''
148+
refernced resources for colorblindness accomadation:
149+
https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html#colorblindness
88150
89-
return image
151+
'''
152+
def display_image(image, title):
153+
'''
154+
155+
This function is to plot the three of the same images with different supports for varying color blindness
156+
This function can be used for both the image_contrast and avg_images funcitons.
157+
158+
Users will need to assign the images to a variable.
159+
For example:
160+
meningioma_contrast = eda.image_contrast(meningioma_tumor, notumor)
161+
=============================================================================
162+
Keyword arguments:
163+
Image -- The image you want to display
164+
Title -- The title of the image. This will be used for saving the figure as well
165+
'''
166+
167+
#sets up the figure for the subplots
168+
fig, ax = plt.subplots(1,3, figsize = (10,10))
169+
#plots the title for the color map
170+
plt.suptitle(f'Pixel average: {title}', y = .75, fontsize = 22)
171+
#plotting the images
172+
ax[0].imshow(image)
173+
#uses the default Viridis color map; default colorblind friendly
174+
ax[0].set_title('Viridis', fontsize=18)
175+
#turns of axis bcause we only want the image
176+
ax[0].axis('off')
177+
178+
#plotting the images
179+
ax[1].imshow(image, cmap='cividis')
180+
#plots the title for the color map
181+
ax[1].set_title('cividis', fontsize=18)
182+
#turns of axis bcause we only want the image
183+
ax[1].axis('off')
184+
185+
#plotting the images
186+
ax[2].imshow(image, cmap='magma')
187+
#plots the title for the color map
188+
ax[2].set_title('magma', fontsize=18)
189+
#turns of axis bcause we only want the image
190+
ax[2].axis('off')
191+
192+
plt.tight_layout()
193+
#saves the image
194+
plt.savefig(f'../Created_images/{title}.png')
195+
#displays the image in the notebook
196+
plt.show()

0 commit comments

Comments
 (0)