1
+ from os import listdir
2
+ from os .path import isfile , join
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy .io import arff as arff_io
6
+ from sklearn import preprocessing , metrics
7
+
8
+ from NoiseFiltersPy ._filters import _implemented_filters
9
+ from NoiseFiltersPy ._injectors import _implemented_injectors
10
+
11
+ DATASETS_PATH = "analysis/datasets/"
12
+
13
+ datasets = [f for f in listdir (DATASETS_PATH )
14
+ if ( isfile (join (DATASETS_PATH , f )) and
15
+ ( f .endswith ("json" ) or f .endswith ("arff" ) ) )]
16
+
17
+ enc = preprocessing .OneHotEncoder (handle_unknown = 'ignore' )
18
+ le = preprocessing .LabelEncoder ()
19
+
20
+ def calculate_filter_f1 (dataset , filter , injector , rate = 0.1 ):
21
+ # Reading dataset
22
+ if dataset .endswith ("json" ):
23
+ data = pd .read_json (DATASETS_PATH + dataset )
24
+ elif dataset .endswith ("arff" ):
25
+ data = arff_io .loadarff (DATASETS_PATH + dataset )
26
+ data = pd .DataFrame (data [0 ])
27
+ target = data ["class" ].values
28
+ # Data preprocessing (type transformation)
29
+ if target .dtype == object :
30
+ le .fit (target )
31
+ target = le .transform (target )
32
+ attrs = data .drop ("class" , axis = 1 ).values
33
+ if not np .issubdtype (attrs .dtype , np .number ):
34
+ enc .fit (attrs )
35
+ attrs = enc .transform (attrs ).toarray ()
36
+
37
+ injector = injector (attrs , target , rate )
38
+ injector .generate ()
39
+
40
+ filter = filter ()
41
+ filter = filter (attrs , np .ravel (injector .labels .values ))
42
+ real_values = [1 if indx in injector .noise_indx else 0 for indx in range (len (target ))]
43
+ pred_values = [1 if indx in filter .rem_indx else 0 for indx in range (len (target ))]
44
+ return metrics .f1_score (real_values , pred_values , average = "micro" )
45
+
46
+ results = {}
47
+ for filter in _implemented_filters .keys ():
48
+ results [filter ] = {}
49
+ for injector in _implemented_injectors .keys ():
50
+ results [filter ][injector ] = {}
51
+ for dataset in datasets :
52
+ results [filter ][injector ][dataset ] = calculate_filter_f1 (
53
+ dataset ,
54
+ _implemented_filters [filter ],
55
+ _implemented_injectors [injector ]
56
+ )
57
+
58
+ results = pd .DataFrame (results )
59
+ results .to_csv ("compare_filters.csv" )
0 commit comments