Skip to content

Commit 124b018

Browse files
committed
Basic structure
1 parent a723ead commit 124b018

15 files changed

+98
-13
lines changed

Makefile

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
PACKAGE := NoiseFiltersPy
2+
3+
all: clean install-complete code-check
4+
.PHONE: all clean code-check pypi install-complete install
5+
6+
clean:
7+
find . -name "*.so" -o -name "*.pyc" -o -name "*.md5" -o -name "*.pyd" -o -name "*~" | xargs rm -f
8+
rm -rf dist
9+
rm -rf build
10+
rm -rf .pytest_cache/
11+
rm -rf .mypy_cache/
12+
13+
code-check:
14+
flake8 $(PACKAGE)
15+
pylint $(PACKAGE) -j 0 -d 'C0103, R0913, R0902, R0914, C0302, R0904, R0801, E1101, C0330, E1136'
16+
mypy $(PACKAGE) --ignore-missing-imports
17+
18+
pypi: clean ## Send the package to pypi.
19+
pip install -U twine wheel
20+
python3 setup.py sdist bdist_wheel
21+
twine upload dist/*
22+
23+
install-complete:
24+
pip install -U -e .
25+
pip install -U -r requirements.txt
26+
pip install -U -r requirements-dev.txt
27+
pip install -U -r requirements-docs.txt
28+
29+
install:
30+
pip install .

NoiseFiltersPy/CNN.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
class CNN:
9-
def __init__(self, max_neighbours = 5):
9+
def __init__(self, max_neighbours: int = 5):
1010
self.max_neighbours = max_neighbours
1111
self.filter = Filter(parameters = {})
1212
self.clf = KNeighborsClassifier(n_neighbors = 1, n_jobs = -1)

NoiseFiltersPy/DROP.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from sklearn.model_selection import KFold
22
from sklearn.neighbors import KNeighborsClassifier
33
import numpy as np
4+
import typing as t
45

56
from NoiseFiltersPy.Filter import *
67

NoiseFiltersPy/ENN.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
from sklearn.model_selection import KFold
22
from sklearn.neighbors import KNeighborsClassifier
33
import numpy as np
4+
import typing as t
45

56
from NoiseFiltersPy.Filter import *
67

78
class ENN:
8-
def __init__(self, neighbours = 3):
9+
def __init__(self, neighbours: int = 3):
910
self.neighbours = neighbours
1011
self.filter = Filter(parameters = {"neighbours": self.neighbours})
1112

12-
def __call__(self, data, classes):
13+
def __call__(self, data: t.Sequence, classes: t.Sequence) -> Filter:
1314
self.isNoise = np.array([False] * len(classes))
1415
self.clf = KNeighborsClassifier(n_neighbors = self.neighbours, algorithm = 'kd_tree', n_jobs = -1)
1516
for indx in range(len(data)):

NoiseFiltersPy/Filter.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
import typing as t
2+
13
class Filter():
2-
def __init__(self, parameters):
4+
def __init__(self, parameters: t.Dict):
35
# Removed Indexes
4-
self.remIndx = []
5-
self.repIndx = []
6+
self.remIndx: t.List = []
67
self.parameters = parameters
7-
def set_cleanData(self, data, classes):
8+
9+
def set_cleanData(self, data: t.Sequence, classes: t.Sequence) -> t.NoReturn:
10+
# Helper function to set data without noise
811
self.cleanData = data
912
self.cleanClasses = classes

NoiseFiltersPy/HARF.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
from sklearn.model_selection import KFold
22
from sklearn.ensemble import RandomForestClassifier
33
import numpy as np
4+
import typing as t
45

56
from NoiseFiltersPy.Filter import *
67

78

89
class HARF:
9-
def __init__(self, nfolds = 10, agreementLevel = 0.7,
10-
ntrees = 500, seed = 0):
10+
def __init__(self, nfolds: int = 10, agreementLevel: float = 0.7,
11+
ntrees: int = 500, seed: int = 0):
1112
# Some data verification
1213
# Data can be a DataFrame or a Numpy Array
1314
if (agreementLevel < 0.5 or agreementLevel > 1):
@@ -22,7 +23,7 @@ def __init__(self, nfolds = 10, agreementLevel = 0.7,
2223
self.clf = RandomForestClassifier(n_estimators = ntrees, random_state = seed, n_jobs = -1)
2324
self.filter = Filter(parameters = {"nfolds": self.nfolds, "ntrees": self.ntrees, "agreementLevel": self.agreementLevel})
2425

25-
def __call__(self, data, classes):
26+
def __call__(self, data: t.Sequence, classes: t.Sequence) -> Filter:
2627
self.splits = self.k_fold.split(data)
2728
self.isNoise = np.array([False] * len(classes))
2829
for train_indx, test_indx in self.splits:

NoiseFiltersPy/TomekLinks.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import numpy as np
2+
import typing as t
23

34
from NoiseFiltersPy.Filter import *
45

5-
66
class TomekLinks:
77
def __init__(self):
88
self.filter = Filter(parameters = {})
99

10-
def __call__(self, data, classes):
10+
def __call__(self, data: t.Sequence, classes: t.Sequence) -> Filter:
1111
levels = list(set(classes))
1212
classes = np.array(classes)
1313
class1Indxes = np.argwhere(classes == levels[0])

examples/aenn_example.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
classes = dataset.target
77
aenn = AENN()
88
filter = aenn(data, classes)
9+
print(filter.cleanData)

examples/cnn_example.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from sklearn import datasets
2+
3+
import context
24
from NoiseFiltersPy.CNN import CNN
35

46
dataset = datasets.load_iris()

examples/context.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import os
2+
import sys
3+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

requirements-dev.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pytest
2+
pytest-cov
3+
pytest-xdist
4+
mypy
5+
flake8
6+
pylint

requirements-docs.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
sphinx
2+
sphinx_rtd_theme
3+
pillow
4+
seaborn
5+
sphinx-gallery
6+
numpydoc

requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy
2+
scipy
3+
scikit-learn

setup.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,31 @@
33
with open("README.md", "r") as fh:
44
long_description = fh.read()
55

6+
INSTALL_REQUIRES = []
7+
8+
EXTRAS_REQUIRE = {
9+
'code-check': [
10+
'pytest',
11+
'mypy',
12+
'flake8',
13+
'pylint'
14+
],
15+
'tests': [
16+
'pytest',
17+
'pytest-cov',
18+
'pytest-xdist',
19+
],
20+
'docs': [
21+
'sphinx',
22+
'sphinx-gallery',
23+
'sphinx_rtd_theme',
24+
'numpydoc',
25+
]
26+
}
27+
628
setuptools.setup(
729
name="NoiseFiltersPy",
8-
version="0.0.5",
30+
version="0.0.8",
931
author="Juliana Hosoume and Luis Faina",
1032
author_email="[email protected]",
1133
description="Python implementation of NoiseFiltersR",
@@ -18,5 +40,11 @@
1840
"License :: OSI Approved :: MIT License",
1941
"Operating System :: OS Independent",
2042
],
43+
install_requires=[
44+
'scikit-learn>=0.24',
45+
'numpy',
46+
'scipy',
47+
'pandas'
48+
],
2149
python_requires='>=3.5',
2250
)

0 commit comments

Comments
 (0)