Skip to content

Commit 7235fb2

Browse files
committed
Add analysis
1 parent a4ae398 commit 7235fb2

15 files changed

+6267
-21
lines changed

NoiseFiltersPy/NeighborwiseInjector.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class NeighborwiseInjector(Injector):
77
def generate(self, seed: int = None):
88
self._new_noise = self._define_noise_examples()
99
self._gen_random(seed = seed)
10+
return self
1011

1112
def _cal_dNN(self, distances, example_indx):
1213
equal_class = self._labels == self._labels.iloc[example_indx]

NoiseFiltersPy/NoiseHandler.py

+40-9
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,51 @@ def filter(self, methods):
2626
times = []
2727
for filter_type in methods:
2828
if filter_type in _implemented_filters:
29-
filter = _implemented_filters()
30-
filter, time = timeit(
31-
filter,
32-
data = self._attrs,
33-
classes = self._labels
34-
)
29+
filter = _implemented_filters[filter_type]()
30+
if self._measure_time:
31+
filter, time = timeit(
32+
filter,
33+
data = self._attrs,
34+
classes = self._labels
35+
)
36+
times.append(time)
37+
else:
38+
filter = filter(
39+
filter,
40+
data = self._attrs,
41+
classes = self._labels
42+
)
3543
self._attrs = filter.clean_data
3644
self._classes = filter.clean_classes
3745
filters.append(filter)
38-
times.append(time)
39-
return filters, times
46+
if self._measure_time:
47+
return filters, times
48+
return filters
49+
4050

4151
def inject(self, methods):
42-
pass
52+
injectors = []
53+
times = []
54+
for injector_type in methods:
55+
if injector_type in _implemented_injectors:
56+
injector = _implemented_injectors[injector_type]()
57+
if self._measure_time:
58+
filter, time = timeit(
59+
injector,
60+
data = self._attrs,
61+
classes = self._labels
62+
)
63+
times.append(time)
64+
else:
65+
filter = filter(
66+
filter,
67+
data = self._attrs,
68+
classes = self._labels
69+
)
70+
injectors.append(filter)
71+
if self._measure_time:
72+
return injectors, times
73+
return injectors
4374

4475
def _output_converter(self):
4576
raise NotImplementedError

NoiseFiltersPy/NonlinearwiseInjector.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class NonlinearwiseInjector(Injector):
88
def generate(self, seed: int = None):
99
self._new_noise = self._define_noise_examples(seed = seed)
1010
self._gen_random(seed = seed)
11+
return self
1112

1213
def _one_vs_one(self):
1314
labels_comb = itertools.combinations(self._label_types, 2)

NoiseFiltersPy/RandomInjector.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ class RandomInjector(Injector):
66
def generate(self, seed: int = None):
77
rng = np.random.default_rng(seed)
88
self._new_noise = rng.choice(self._labels.shape[0], size = self._num_noise, replace = False)
9-
self._gen_random(seed = seed)
9+
self._gen_random(seed = seed)
10+
return self

NoiseFiltersPy/_filters.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from NoiseFiltersPy import AENN, CNN, DROP, ENN, TomekLinks
22

33
_implemented_filters = {
4-
"AENN": AENN,
5-
"CNN": CNN,
6-
"DROP": DROP,
7-
"ENN": ENN,
8-
"TomekLinks": TomekLinks
4+
"AENN": AENN.AENN,
5+
"CNN": CNN.CNN,
6+
"DROP": DROP.DROPv1,
7+
"ENN": ENN.ENN,
8+
"TomekLinks": TomekLinks.TomekLinks
99
}

NoiseFiltersPy/_injectors.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from NoiseFiltersPy import RandomInjector, NonlinearwiseInjector, NeighborwiseInjector
22

33
_implemented_injectors = {
4-
"RandomInjector": RandomInjector,
5-
"NonlinearwiseInjector": NonlinearwiseInjector,
6-
"NeighborwiseInjector": NeighborwiseInjector
4+
"RandomInjector": RandomInjector.RandomInjector,
5+
"NonlinearwiseInjector": NonlinearwiseInjector.NonlinearwiseInjector,
6+
"NeighborwiseInjector": NeighborwiseInjector.NeighborwiseInjector
77
}

analysis/compare_filters.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from os import listdir
2+
from os.path import isfile, join
3+
import numpy as np
4+
import pandas as pd
5+
from scipy.io import arff as arff_io
6+
from sklearn import preprocessing, metrics
7+
8+
from NoiseFiltersPy._filters import _implemented_filters
9+
from NoiseFiltersPy._injectors import _implemented_injectors
10+
11+
DATASETS_PATH = "analysis/datasets/"
12+
13+
datasets = [f for f in listdir(DATASETS_PATH)
14+
if ( isfile(join(DATASETS_PATH, f)) and
15+
( f.endswith("json") or f.endswith("arff") ) )]
16+
17+
enc = preprocessing.OneHotEncoder(handle_unknown = 'ignore')
18+
le = preprocessing.LabelEncoder()
19+
20+
def calculate_filter_f1(dataset, filter, injector, rate = 0.1):
21+
# Reading dataset
22+
if dataset.endswith("json"):
23+
data = pd.read_json(DATASETS_PATH + dataset)
24+
elif dataset.endswith("arff"):
25+
data = arff_io.loadarff(DATASETS_PATH + dataset)
26+
data = pd.DataFrame(data[0])
27+
target = data["class"].values
28+
# Data preprocessing (type transformation)
29+
if target.dtype == object:
30+
le.fit(target)
31+
target = le.transform(target)
32+
attrs = data.drop("class", axis = 1).values
33+
if not np.issubdtype(attrs.dtype, np.number):
34+
enc.fit(attrs)
35+
attrs = enc.transform(attrs).toarray()
36+
37+
injector = injector(attrs, target, rate)
38+
injector.generate()
39+
40+
filter = filter()
41+
filter = filter(attrs, np.ravel(injector.labels.values))
42+
real_values = [1 if indx in injector.noise_indx else 0 for indx in range(len(target))]
43+
pred_values = [1 if indx in filter.rem_indx else 0 for indx in range(len(target))]
44+
return metrics.f1_score(real_values, pred_values, average = "micro")
45+
46+
results = {}
47+
for filter in _implemented_filters.keys():
48+
results[filter] = {}
49+
for injector in _implemented_injectors.keys():
50+
results[filter][injector] = {}
51+
for dataset in datasets:
52+
results[filter][injector][dataset] = calculate_filter_f1(
53+
dataset,
54+
_implemented_filters[filter],
55+
_implemented_injectors[injector]
56+
)
57+
58+
results = pd.DataFrame(results)
59+
results.to_csv("compare_filters.csv")

0 commit comments

Comments
 (0)