Skip to content

Commit 24bedc5

Browse files
committed
Add some injectors
1 parent 124b018 commit 24bedc5

18 files changed

+362
-18
lines changed

NoiseFiltersPy/AENN.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
from sklearn.model_selection import KFold
22
from sklearn.neighbors import KNeighborsClassifier
33
import numpy as np
4+
import typing as t
45

56
from NoiseFiltersPy.Filter import *
67

78

89
class AENN:
9-
def __init__(self, max_neighbours = 5):
10+
def __init__(self, max_neighbours: int = 5):
1011
self.max_neighbours = max_neighbours
1112
self.filter = Filter(parameters = {"max_neighbours": self.max_neighbours})
1213

13-
def __call__(self, data, classes):
14+
def __call__(self, data: t.Sequence, classes: t.Sequence) -> Filter:
1415
self.isNoise = np.array([False] * len(classes))
1516
for n_neigh in range(1, self.max_neighbours + 1):
1617
self.clf = KNeighborsClassifier(n_neighbors = n_neigh, algorithm = 'kd_tree', n_jobs = -1)

NoiseFiltersPy/CNN.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def __init__(self, max_neighbours: int = 5):
1111
self.filter = Filter(parameters = {})
1212
self.clf = KNeighborsClassifier(n_neighbors = 1, n_jobs = -1)
1313

14-
def __call__(self, data, classes):
14+
def __call__(self, data: t.Sequence, classes: t.Sequence):
1515
self.isNoise = np.array([False] * len(classes))
1616

1717
firstDifIndx = next(indx for indx, num in enumerate(classes) if num != classes[0])

NoiseFiltersPy/DROP.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88

99
class DROPv1:
10-
def __init__(self, num_neighbours = 1):
10+
def __init__(self, num_neighbours: int = 1):
1111
self.n_neigh = num_neighbours
1212
self.filter = Filter(parameters = {num_neighbours: self.n_neigh})
1313

14-
def __call__(self, data, classes):
14+
def __call__(self, data: t.Sequence, classes: t.Sequence) -> Filter:
1515
self.clf = KNeighborsClassifier()
1616
preds = []
1717
for indx in range(len(classes)):

NoiseFiltersPy/Filter.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,43 @@
11
import typing as t
22

3-
class Filter():
3+
class Filter:
4+
"""Base class for all the implemented class noise filters.
5+
6+
Attributes
7+
----------
8+
rem_indx : :obj:`List`
9+
Removed indexes (rows) from the dataset after the filtering.
10+
parameters : :obj:`Dict`
11+
Parameters used to define the behaviour of the filter.
12+
clean_data : :obj:`Sequence`
13+
Filtered independent attributes(X) of the dataset.
14+
clean_classes : :obj:`Sequence`
15+
Filtered target attributes(y) of the dataset.
16+
17+
"""
18+
419
def __init__(self, parameters: t.Dict):
20+
"""
21+
22+
Parameters
23+
----------
24+
parameters : :obj:`Dict`
25+
Dictionary that provides hyperparameters for filters algorithms.
26+
"""
527
# Removed Indexes
6-
self.remIndx: t.List = []
28+
self.rem_indx: t.List = []
729
self.parameters = parameters
830

931
def set_cleanData(self, data: t.Sequence, classes: t.Sequence) -> t.NoReturn:
10-
# Helper function to set data without noise
11-
self.cleanData = data
12-
self.cleanClasses = classes
32+
"""Helper function to set data and classes to Filter instance.
33+
34+
Parameters
35+
----------
36+
data : :obj:`Sequence`
37+
Filtered independent attributes(X) of the dataset.
38+
classes : :obj:`Sequence`
39+
Filtered target attributes(y) of the dataset.
40+
41+
"""
42+
self.clean_data = data
43+
self.clean_classes = classes

NoiseFiltersPy/Injector.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import pandas as pd
2+
3+
class Injector:
4+
"""Base class for the injectors of artificial noise.
5+
6+
Attributes
7+
----------
8+
rem_indx : :obj:`List`
9+
Removed indexes (rows) from the dataset after the filtering.
10+
parameters : :obj:`Dict`
11+
Parameters used to define the behaviour of the filter.
12+
clean_data : :obj:`Sequence`
13+
Filtered independent attributes(X) of the dataset.
14+
clean_classes : :obj:`Sequence`
15+
Filtered target attributes(y) of the dataset.
16+
17+
"""
18+
19+
def __init__(self, attributes, labels, rate: float = 0.1) -> None:
20+
self.new_noise = []
21+
if not isinstance(attributes, pd.DataFrame):
22+
self.attrs = pd.DataFrame(attributes)
23+
else:
24+
self.attrs = attributes
25+
26+
if not isinstance(attributes, pd.DataFrame):
27+
self.labels = pd.DataFrame(labels)
28+
else:
29+
self.labels = labels
30+
31+
self.label_types = set(self.labels[0].unique())
32+
self.rate = rate
33+
self.verify()
34+
self.num_noise = int(self.rate * self.attrs.shape[0])
35+
36+
def verify(self) -> None:
37+
if min(self.labels.value_count()) < 2:
38+
raise ValueError("Number of examples in the minority class must be >= 2.")
39+
40+
if self.attrs.shape[0] != self.labels.shape[0]:
41+
raise ValueError("Attributes and classes must have the sime size.")
42+
43+
if self.rate < 0 or self.rate > 1:
44+
raise ValueError("")
45+
46+

NoiseFiltersPy/NeighborwiseInjector.py

Whitespace-only changes.

NoiseFiltersPy/NoiseHandler.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
class NoiseHandler:
2+
def __init__(self):
3+
pass
4+
5+
def fit(self, data):
6+
pass
7+
8+
def filter(self, filters, params):
9+
pass
10+
11+
def inject(self, method, params):
12+
pass

NoiseFiltersPy/RandomInjector.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from sklearn.utils.validation import check_random_state
2+
import numpy as np
3+
from NoiseFiltersPy.Injector import Injector
4+
5+
class RandomInjector(Injector):
6+
def gen(self, seed: int = None):
7+
rng = np.random.default_rng(seed)
8+
self.new_noise = rng.choice(self.labels.shape[0], size = self.num_noise, replace = False)
9+
for example in self.new_noise:
10+
self.labels.iloc[example] = rng.choice(list(self.label_types - set(self.labels.iloc[example])))

docs/Makefile

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Minimal makefile for Sphinx documentation
2+
#
3+
4+
# You can set these variables from the command line, and also
5+
# from the environment for the first two.
6+
SPHINXOPTS ?=
7+
SPHINXBUILD ?= sphinx-build
8+
SOURCEDIR = source
9+
BUILDDIR = build
10+
11+
# Put it first so that "make" without argument is like "make help".
12+
help:
13+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14+
15+
.PHONY: help Makefile
16+
17+
# Catch-all target: route all unknown targets to Sphinx using the new
18+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19+
%: Makefile
20+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

docs/make.bat

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
@ECHO OFF
2+
3+
pushd %~dp0
4+
5+
REM Command file for Sphinx documentation
6+
7+
if "%SPHINXBUILD%" == "" (
8+
set SPHINXBUILD=sphinx-build
9+
)
10+
set SOURCEDIR=source
11+
set BUILDDIR=build
12+
13+
if "%1" == "" goto help
14+
15+
%SPHINXBUILD% >NUL 2>NUL
16+
if errorlevel 9009 (
17+
echo.
18+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19+
echo.installed, then set the SPHINXBUILD environment variable to point
20+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
21+
echo.may add the Sphinx directory to PATH.
22+
echo.
23+
echo.If you don't have Sphinx installed, grab it from
24+
echo.http://sphinx-doc.org/
25+
exit /b 1
26+
)
27+
28+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29+
goto end
30+
31+
:help
32+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33+
34+
:end
35+
popd

docs/source/NoiseFiltersPy.rst

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
NoiseFiltersPy package
2+
======================
3+
4+
Submodules
5+
----------
6+
7+
NoiseFiltersPy.AENN module
8+
--------------------------
9+
10+
.. automodule:: NoiseFiltersPy.AENN
11+
:members:
12+
:undoc-members:
13+
:show-inheritance:
14+
15+
NoiseFiltersPy.CNN module
16+
-------------------------
17+
18+
.. automodule:: NoiseFiltersPy.CNN
19+
:members:
20+
:undoc-members:
21+
:show-inheritance:
22+
23+
NoiseFiltersPy.DROP module
24+
--------------------------
25+
26+
.. automodule:: NoiseFiltersPy.DROP
27+
:members:
28+
:undoc-members:
29+
:show-inheritance:
30+
31+
NoiseFiltersPy.ENN module
32+
-------------------------
33+
34+
.. automodule:: NoiseFiltersPy.ENN
35+
:members:
36+
:undoc-members:
37+
:show-inheritance:
38+
39+
NoiseFiltersPy.Filter module
40+
----------------------------
41+
42+
.. automodule:: NoiseFiltersPy.Filter
43+
:members:
44+
:undoc-members:
45+
:show-inheritance:
46+
47+
NoiseFiltersPy.HARF module
48+
--------------------------
49+
50+
.. automodule:: NoiseFiltersPy.HARF
51+
:members:
52+
:undoc-members:
53+
:show-inheritance:
54+
55+
NoiseFiltersPy.Injector module
56+
------------------------------
57+
58+
.. automodule:: NoiseFiltersPy.Injector
59+
:members:
60+
:undoc-members:
61+
:show-inheritance:
62+
63+
NoiseFiltersPy.NoiseHandler module
64+
----------------------------------
65+
66+
.. automodule:: NoiseFiltersPy.NoiseHandler
67+
:members:
68+
:undoc-members:
69+
:show-inheritance:
70+
71+
NoiseFiltersPy.TomekLinks module
72+
--------------------------------
73+
74+
.. automodule:: NoiseFiltersPy.TomekLinks
75+
:members:
76+
:undoc-members:
77+
:show-inheritance:
78+
79+
Module contents
80+
---------------
81+
82+
.. automodule:: NoiseFiltersPy
83+
:members:
84+
:undoc-members:
85+
:show-inheritance:

docs/source/conf.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Configuration file for the Sphinx documentation builder.
2+
#
3+
# This file only contains a selection of the most common options. For a full
4+
# list see the documentation:
5+
# https://www.sphinx-doc.org/en/master/usage/configuration.html
6+
7+
# -- Path setup --------------------------------------------------------------
8+
9+
# If extensions (or modules to document with autodoc) are in another directory,
10+
# add these directories to sys.path here. If the directory is relative to the
11+
# documentation root, use os.path.abspath to make it absolute, like shown here.
12+
#
13+
import os
14+
import sys
15+
import divio_docs_theme
16+
17+
sys.path.insert(0, os.path.abspath('..'))
18+
19+
# import sphinx_gallery
20+
21+
# -- Project information -----------------------------------------------------
22+
23+
project = 'NoiseFiltersPy'
24+
copyright = '2021, Juliana Hosoume and Luís Paulo Faina Garcia'
25+
author = 'Juliana Hosoume and Luís Paulo Faina Garcia'
26+
27+
# The full version, including alpha/beta/rc tags
28+
release = '0.0.1'
29+
30+
31+
# -- General configuration ---------------------------------------------------
32+
33+
# Add any Sphinx extension module names here, as strings. They can be
34+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35+
# ones.
36+
extensions = [
37+
'sphinx.ext.autodoc',
38+
'sphinx.ext.autosummary',
39+
'sphinx.ext.intersphinx',
40+
# 'sphinx_gallery.gen_gallery',
41+
'sphinx.ext.mathjax',
42+
'sphinx.ext.viewcode',
43+
'numpydoc',
44+
]
45+
46+
# Add any paths that contain templates here, relative to this directory.
47+
templates_path = ['_templates']
48+
49+
# List of patterns, relative to source directory, that match files and
50+
# directories to ignore when looking for source files.
51+
# This pattern also affects html_static_path and html_extra_path.
52+
exclude_patterns = []
53+
54+
55+
# -- Options for HTML output -------------------------------------------------
56+
57+
# The theme to use for HTML and HTML Help pages. See the documentation for
58+
# a list of builtin themes.
59+
#
60+
html_theme = 'divio_docs_theme'
61+
62+
# Add any paths that contain custom static files (such as style sheets) here,
63+
# relative to this directory. They are copied after the builtin static files,
64+
# so a file named "default.css" will overwrite the builtin "default.css".
65+
html_static_path = ['_static']

docs/source/index.rst

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
.. NoiseFiltersPy documentation master file, created by
2+
sphinx-quickstart on Sun Mar 28 20:00:21 2021.
3+
You can adapt this file completely to your liking, but it should at least
4+
contain the root `toctree` directive.
5+
6+
Welcome to NoiseFiltersPy's documentation!
7+
==========================================
8+
9+
.. toctree::
10+
:maxdepth: 2
11+
:caption: Contents:
12+
13+
modules
14+
15+
Indices and tables
16+
==================
17+
18+
* :ref:`genindex`
19+
* :ref:`modindex`
20+
* :ref:`search`

0 commit comments

Comments
 (0)