Skip to content

Commit

Permalink
Module Machine Learning: reduction NN (#21)
Browse files Browse the repository at this point in the history
* new module smithers.plot. function for plotting complex numbers

* rename file. remove unused parameter. add working example to docs

* Added file for the reduction of a Neural Network

* Add tutorial for construction reduced net

* Added scripts and tutorial for Object Detection

* RandSVD implementation, (a)hosvd implementation, tutorials tweaking, cut-off index estimator


---------

Co-authored-by: Francesco Andreuzzi <[email protected]>
Co-authored-by: Laura Meneghetti (lmeneghe) <[email protected]>
Co-authored-by: Stefano Zanin <[email protected]>
  • Loading branch information
4 people authored Jan 30, 2023
1 parent b545ba5 commit 4247066
Show file tree
Hide file tree
Showing 44 changed files with 9,004 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/testing_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install Python dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install .[test,vtk]
python3 -m pip install .[test,vtk,ml]
- name: Test with pytest
run: |
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
EXTRAS = {
'docs': ['Sphinx', 'sphinx_rtd_theme'],
'vtk': ['vtk'],
'ml': ['torch', 'torchvision', 'scikit-learn', 'tqdm'],
'test': ['pytest', 'pytest-cov'],
}

Expand Down
11 changes: 11 additions & 0 deletions smithers/ml/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
'''
Dataset Preparation init
'''
__project__ = 'Object_Detector'
__title__ = 'object_detector'
__author__ = 'Laura Meneghetti, Nicola Demo'
__maintainer__ = __author__

#from smithers.ml.dataset.create_json import *
from smithers.ml.dataset.imagerec_dataset import Imagerec_Dataset
from smithers.ml.dataset.pascalvoc_dataset import PascalVOCDataset
105 changes: 105 additions & 0 deletions smithers/ml/dataset/change_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
'''
Utilities to perform changes inside xml files.
'''

from __future__ import print_function
from os import listdir, path
import re


WIDTH_NEW = 800
HEIGHT_NEW = 600

DIMLINE_MASK = r'<(?P<type1>width|height)>(?P<size>\d+)</(?P<type2>width|height)>'
BBLINE_MASK = r'<(?P<type1>xmin|xmax|ymin|ymax)>(?P<size>\d+)</(?P<type2>xmin|xmax|ymin|ymax)>'
NAMELINE_MASK = r'<(?P<type1>filename)>(?P<size>\S+)</(?P<type2>filename)>'
PATHLINE_MASK = r'<(?P<type1>path)>(?P<size>.+)</(?P<type2>path)>'
#regular expression

def resize_file(file_lines):
'''
Function performing the requested changes on the xml file, like changing
th coordinates x and y of the boxes and the height and width accordingly.
:param list file_lines: list containing the lines of the file under
consideration.
'''
new_lines = []
for line in file_lines:
match = re.search(DIMLINE_MASK, line) or re.search(BBLINE_MASK, line)
print(match)
if match is not None:
size = match.group('size')
type1 = match.group('type1')
type2 = match.group('type2')
print(size)
print(type1)
print(type2)
if type1 != type2:
raise ValueError('Malformed line: {}'.format(line))

if type1.startswith('f'):
print('f')
if type1.startswith('x'):
size = int(size)
new_size = int(round(size * WIDTH_NEW / width_old))
new_line = '\t\t\t<{}>{}</{}>\n'.format(type1, new_size, type1)
elif type1.startswith('y'):
size = int(size)
new_size = int(round(size * HEIGHT_NEW / height_old))
new_line = '\t\t\t<{}>{}</{}>\n'.format(type1, new_size, type1)
elif type1.startswith('w'):
size = int(size)
width_old = size
new_size = int(WIDTH_NEW)
new_line = '\t\t<{}>{}</{}>\n'.format(type1, new_size, type1)
elif type1.startswith('h'):
size = int(size)
height_old = size
new_size = int(HEIGHT_NEW)
new_line = '\t\t<{}>{}</{}>\n'.format(type1, new_size, type1)
else:
raise ValueError('Unknown type: {}'.format(type1))
#new_line = '\t\t\t<{}>{}</{}>\n'.format(type1, new_size, type1)
new_lines.append(new_line)
else:
new_lines.append(line)

return ''.join(new_lines)


def change_xml(nome_file):
'''
Function that chnages an xml file.
:param str nome_file: path where the xml files are
contained (if it is a directory) or path of an xml file,
'''
if len(nome_file) < 1:
raise ValueError('No file submitted')

if path.isdir(nome_file):
# the argument is a directory
files = listdir(nome_file)
for file in files:
file_path = path.join(nome_file, file)
_, file_ext = path.splitext(file)
if file_ext.lower() == '.xml':
with open(file_path, 'r') as f:
rows = f.readlines()

new_file = resize_file(rows)
with open(file_path, 'w') as f:
f.write(new_file)
else:
# otherwise i have a file (hopefully)
with open(nome_file, 'r') as f:
rows = f.readlines()

new_file = resize_file(rows)
with open(nome_file, 'w') as f:
f.write(new_file)

#insert name of the xml file or directory that contains them
xml_file = 'voc_dir/VOC_cow/Annotations'
change_xml(xml_file)
151 changes: 151 additions & 0 deletions smithers/ml/dataset/create_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
'''
Utilities to perform the creation of JSON files starting from the xml files.
'''
import json
import xml.etree.ElementTree as ET
import argparse
import os

parser = argparse.ArgumentParser()
parser.add_argument("voc07_path", help="Path to VOC2007 folder", type=str)
parser.add_argument("voc12_path", help="Path to VOC2012 folder")
parser.add_argument("output_folder", help="Path to JSON output folder",
type=str)
args = parser.parse_args()

voc07_path = args.voc07_path
voc12_path = args.voc12_path
output_folder = args.output_folder

# Label map
# NOTE: The labels have to be written using lower case, since in the function
# parse_annotation the label is transformed in the lower_case mode in order to
# avoid problems if in the labeling phase a label was written in a wrong way.
labels_list = ('aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
#labels_list = ('cat', 'dog')
label_map = {k: v + 1 for v, k in enumerate(labels_list)}
label_map['background'] = 0
rev_label_map = {v: k for k, v in label_map.items()} # Inverse mapping


def parse_annotation(annotation_path):
'''
:param string annotation_path: string for the path to Annotations
return dict: dictionary containing boxes, labels, difficulties for the
different objects in a picture
'''
tree = ET.parse(annotation_path)
root = tree.getroot()

boxes = list()
labels = list()
difficulties = list()
for obj in root.iter('object'):

difficult = int(obj.find('difficult').text == '1')
label = obj.find('name').text.lower().strip()
if label not in label_map:
continue

bbox = obj.find('bndbox')
xmin = int(bbox.find('xmin').text)# - 1
ymin = int(bbox.find('ymin').text)# - 1
xmax = int(bbox.find('xmax').text)# - 1
ymax = int(bbox.find('ymax').text)# - 1
boxes.append([xmin, ymin, xmax, ymax])
labels.append(label_map[label])
difficulties.append(difficult)
return {'boxes': boxes, 'labels': labels, 'difficulties': difficulties}


def create_data_lists(voc07_path, voc12_path, out_folder):
"""
Create lists of images, the bounding boxes and labels of the objects
in these images, and save these to file.
:param string voc07_path: path to the 'VOC2007' folder
:param string voc12_path: path to the 'VOC2012' folder
:param string out_folder: folder where the JSONs must be saved
:output json files: saved json files obtained from our dataset
(images + xml files) saved in the output folder chosen
"""
voc07_path = os.path.abspath(voc07_path)
voc12_path = os.path.abspath(voc12_path)
print(voc07_path)

train_images = list()
train_objects = list()
n_objects = 0

# Training data
for path in [voc07_path, voc12_path]:
if not path.endswith('/None'):
# Find IDs of images in training data
print(path)
with open(os.path.join(path, 'ImageSets/Main/trainval.txt')) as f:
ids = f.read().splitlines()
for ID in ids:
# Parse annotation's XML file
objects = parse_annotation(
os.path.join(path, 'Annotations', ID + '.xml'))
if len(objects) == 0:
continue
n_objects += len(objects)
train_objects.append(objects)
train_images.append(os.path.join(path, 'JPEGImages', ID +
'.jpg'))

assert len(train_objects) == len(train_images)

# Save to file
with open(os.path.join(out_folder, 'TRAIN_images.json'), 'w') as j:
json.dump(train_images, j)
with open(os.path.join(out_folder, 'TRAIN_objects.json'), 'w') as j:
json.dump(train_objects, j)
with open(os.path.join(out_folder, 'label_map.json'), 'w') as j:
json.dump(label_map, j) # save label map too

print(
'\nThere are %d training images containing a total of %d \
objects. Files have been saved to %s.'
%(len(train_images), n_objects, os.path.abspath(out_folder)))

# Test data
test_images = list()
test_objects = list()
n_objects = 0

# Find IDs of images in the test data
with open(os.path.join(voc07_path, 'ImageSets/Main/test.txt')) as f:
ids = f.read().splitlines()

for ID in ids:
# Parse annotation's XML file
ID = ID[0:6]
objects = parse_annotation(
os.path.join(voc07_path, 'Annotations', ID + '.xml'))
if len(objects) == 0:
continue
test_objects.append(objects)
n_objects += len(objects)
test_images.append(os.path.join(voc07_path, 'JPEGImages', ID + '.jpg'))

assert len(test_objects) == len(test_images)

# Save to file
with open(os.path.join(out_folder, 'TEST_images.json'), 'w') as j:
json.dump(test_images, j)
with open(os.path.join(out_folder, 'TEST_objects.json'), 'w') as j:
json.dump(test_objects, j)

print(
'\nThere are %d test images containing a total of %d \
objects. Files have been saved to %s.'
% (len(test_images), n_objects, os.path.abspath(out_folder)))


create_data_lists(voc07_path, voc12_path, output_folder)
74 changes: 74 additions & 0 deletions smithers/ml/dataset/imagerec_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
'''
Module focused on the creation of a custom dataset class in order
to use our custom dataset for the problem of image recognition
and thus classification.
'''
import os
import torch
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms


# CUSTOM DATASET CLASS
class Imagerec_Dataset(Dataset):
'''
Class that handles the creation of a custom dataset class to
be used by data loader.
:param pandas.DataFrame img_data: tabular containing all the
relations (image, label)
:param str img_path: path to the directiory containing all the
images
:param transform_obj transform: list of transoforms to apply to
images. Defaul value set to None.
:param list resize_dim: list of integers corresponding to the
size to which we want to resize the images
'''
def __init__(self, img_data, img_path, resize_dim, transform=None):
self.img_data = img_data
self.img_path = img_path
self.resize_dim = resize_dim
self.transform = transform
self.targets = self.img_data['encoded_labels']

def __len__(self):
'''
Function that returns the number of images in the dataset
:return int: integer number representing the number of
images in the dataset
'''
return len(self.img_data)

def __getitem__(self, index):
'''
Function that returns the data and labels
:param int index: number representing a specific image in the
dataset
:return tensor image, label: image and label associated
with the index given as input
'''
img_name = os.path.join(self.img_path,
self.img_data.loc[index, 'labels'],
self.img_data.loc[index, 'Images'])
image = Image.open(img_name)
image = image.resize((self.resize_dim[0], self.resize_dim[1]))
label = torch.tensor(self.img_data.loc[index, 'encoded_labels'])
if self.transform is not None:
image = self.transform(image)
else:
image = transforms.ToTensor()(image)
return image, label

def getdata(self, index):
'''
Function that returns a subset of the dataset
:param list index: number representing a specific image in the
dataset
:return: subset of the dataset composed by obs of type (img, label)
:rtype: list
'''
output = []
for idx in index:
image, label = self.__getitem__(idx)
output.append([image, label])
return output
Loading

0 comments on commit 4247066

Please sign in to comment.