Skip to content

Commit

Permalink
Merge pull request #7 from joachimwolff/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
joachimwolff authored Feb 24, 2020
2 parents 017a067 + 028ea19 commit 9ed5d0f
Show file tree
Hide file tree
Showing 34 changed files with 381 additions and 217 deletions.
48 changes: 9 additions & 39 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#! /usr/bin/python
# Copyright 2016 Joachim Wolff
# For license see file LICENSE
# Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
# PhD Thesis
#
# Copyright 2015, 2016 Joachim Wolff
# Master Thesis
# Tutors: Fabrizio Costa, Milad Miladi
# Tutor: Fabrizio Costa
# Winter semester 2015/2016
#
# Chair of Bioinformatics
Expand All @@ -26,11 +28,11 @@
import time
__author__ = "Joachim Wolff"
__contact__ = "[email protected]"
__copyright__ = "Copyright 2019, Joachim Wolff"
__copyright__ = "Copyright 2020, Joachim Wolff"
__credits__ = ["Milad Miladi", "Fabrizio Costa"]
__license__ = "MIT"
__date__ = time.strftime("%d/%m/%Y")
__version__ = "0.4"
__version__ = "0.5"

from setuptools import setup, find_packages
import platform
Expand Down Expand Up @@ -125,11 +127,6 @@ def locate_cuda():
CUDA = locate_cuda()


# Obtain the numpy include directory. This logic works across numpy versions.
# try:
# numpy_include = numpy.get_include()
# except AttributeError:
# numpy_include = numpy.get_numpy_include()
def customize_compiler_gcc(self):
"""inject deep into distutils to customize how the dispatch
to gcc/nvcc works.
Expand All @@ -151,13 +148,6 @@ def customize_compiler_gcc(self):
# object but distutils doesn't have the ability to change compilers
# based on source extension: we add it.
def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
# if os.path.splitext(src)[1] == '.cu':
# # use the cuda for .cu files
# self.set_executable('compiler_so', CUDA['nvcc'])
# # use only a subset of the extra_postargs, which are 1-1 translated
# # from the extra_compile_args in the Extension class
# postargs = extra_postargs['nvcc']
# else:
postargs = extra_postargs['gcc']

super(obj, src, ext, cc_args, postargs, pp_opts)
Expand All @@ -168,15 +158,6 @@ def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
self._compile = _compile


# run the customize_compiler
# class custom_build_ext_gcc(build_ext):
# def build_extensions(self):
# customize_compiler_for_gcc(self.compiler)
# build_ext.build_extensions(self)





def customize_compiler_for_nvcc(self):
"""inject deep into distutils to customize how the dispatch
Expand Down Expand Up @@ -245,11 +226,9 @@ def build_extensions(self):
"scipy >= 1.3.0",
"scikit-learn >= 0.21.0",],
ext_modules = [module1],
# cmdclass={'build_ext': custom_build_ext_gcc},
packages=['sparse_neighbors_search',
'sparse_neighbors_search.neighbors',
'sparse_neighbors_search.cluster',
# 'bioinf.computation',
],
platforms = "Linux",
version = __version__
Expand All @@ -258,9 +237,6 @@ def build_extensions(self):
print ("CUDA found on system. Installing MinHash with CUDA-Support.")
sources_list.extend(['sparse_neighbors_search/computation/kernel.cu', 'sparse_neighbors_search/computation/inverseIndexCuda.cu', 'sparse_neighbors_search/computation/nearestNeighborsCuda.cu'])
depends_list.extend(['sparse_neighbors_search/computation/typeDefinitionsCuda.h', 'sparse_neighbors_search/computation/kernel.h', 'sparse_neighbors_search/computation/inverseIndexCuda.h', 'sparse_neighbors_search/computation/nearestNeighborsCuda.h', ])
# Extension('_nearestNeighbors', sources = sources_list, depends = depends_list,
# define_macros=[('OPENMP', None)], extra_link_args = ["-lm", "-lrt","-lgomp"],
# extra_compile_args=["-fopenmp", "-O3", "-std=c++11"])
if openmp:
ext = Extension('_nearestNeighbors',
sources = sources_list, depends = depends_list,
Expand All @@ -272,12 +248,10 @@ def build_extensions(self):
# we're only going to use certain compiler args with nvcc and not with gcc
# the implementation of this trick is in customize_compiler() below
define_macros=[('OPENMP', None), ('CUDA', None)],
# extra_link_args={'gcc': ["-lm", "-lrt","-lgomp"],
# 'nvcc' :[] },
extra_link_args=["-lm", "-lrt","-lgomp"],
extra_compile_args={'gcc': ["-fopenmp", "-O3", "-std=c++11", "-funroll-loops", "-msse4.1"],
'nvcc': ['-arch=sm_60', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'", '-std=c++11' ]},
include_dirs = [CUDA['include'], 'src'],#, '/home/joachim/Software/cub-1.5.1'],
include_dirs = [CUDA['include'], 'src'],
platforms = "Linux, Mac OS X"
)
else:
Expand All @@ -291,17 +265,14 @@ def build_extensions(self):
# we're only going to use certain compiler args with nvcc and not with gcc
# the implementation of this trick is in customize_compiler() below
define_macros=[('CUDA', None)],
# extra_link_args={'gcc': ["-lm", "-lrt","-lgomp"],
# 'nvcc' :[] },
extra_link_args=["-lm", "-lrt","-lgomp"],
extra_compile_args={'gcc': ["-O3", "-std=c++11", "-msse4.1"],
'nvcc': ['-arch=sm_60', '--ptxas-options=-v', '-c', '--compiler-options', "'-fPIC'", '-std=c++11' ]},
include_dirs = [CUDA['include'], 'src'],#, '/home/joachim/Software/cub-1.5.1'],
include_dirs = [CUDA['include'], 'src'],
platforms = "Linux, Mac OS X"
)

setup(name='sparse_neighbors_search',
# random metadata. there's more you can supploy
author='Joachim Wolff',
ext_modules = [ext],

Expand All @@ -319,7 +290,6 @@ def build_extensions(self):
"numpy >= 1.17.0",
"scipy >= 1.3.0",
"scikit-learn >= 0.21.0",],
# ext_modules = [module1],
packages=['sparse_neighbors_search',
'sparse_neighbors_search.neighbors',
'sparse_neighbors_search.cluster',
Expand Down
11 changes: 7 additions & 4 deletions sparse_neighbors_search/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Copyright 2015 Joachim Wolff
# Master Project
# Tutors: Milad Miladi, Fabrizio Costa
# Summer semester 2015
# Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
# PhD Thesis
#
# Copyright 2015, 2016 Joachim Wolff
# Master Thesis
# Tutor: Fabrizio Costa
# Winter semester 2015/2016
#
# Chair of Bioinformatics
# Department of Computer Science
Expand Down
38 changes: 28 additions & 10 deletions sparse_neighbors_search/cluster/minHashClustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright 2015 Joachim Wolff
# Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
# PhD Thesis
#
# Copyright 2015, 2016 Joachim Wolff
# Master Thesis
# Tutors: Milad Miladi, Fabrizio Costa
# Tutor: Fabrizio Costa
# Winter semester 2015/2016
#
# Chair of Bioinformatics
Expand All @@ -10,20 +13,35 @@

from ..neighbors import MinHash
import numpy as np
from scipy.sparse import vstack

class MinHashClustering():
def __init__(self, minHashObject, clusteringObject):
self._minHashObject = minHashObject
self._clusteringObject = clusteringObject

def fit(self, X, y=None):
self._minHashObject.fit(X)
precomputed_graph = self._minHashObject.kneighbors_graph(mode='distance')
self._clusteringObject.fit(precomputed_graph)
self._precomputed_graph = None
def fit(self, X, y=None, pSaveMemory=None):
if pSaveMemory is not None and pSaveMemory > 0:
if pSaveMemory > 1:
pSaveMemory = 1
number_of_elements = X.shape[0]
batch_size = int(np.floor(number_of_elements * pSaveMemory))
if batch_size < 1:
batch_size = 1
self._minHashObject.fit(X[0:batch_size, :])
if batch_size < number_of_elements:
for i in range(batch_size, X.shape[0], batch_size):
self._minHashObject.partial_fit(X[i:i+batch_size, :])
else:
self._minHashObject.fit(X)
self._precomputed_graph = self._minHashObject.kneighbors_graph(mode='distance')
self._clusteringObject.fit(self._precomputed_graph)

def fit_predict(self, X, y=None):
self.fit(X, y)
return self.predict(X, y)
def fit_predict(self, X, y=None, pSaveMemory=None):

self.fit(X, y, pSaveMemory=pSaveMemory)

return self.predict(self._precomputed_graph, y)

def predict(self, X, y=None):
if hasattr(self._clusteringObject, 'labels_'):
Expand Down
34 changes: 27 additions & 7 deletions sparse_neighbors_search/cluster/minHashDBSCAN.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Copyright 2015 Joachim Wolff
# Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
# PhD Thesis
#
# Copyright 2015, 2016 Joachim Wolff
# Master Thesis
# Tutor: Fabrizio Costa
# Winter semester 2015/2016
Expand Down Expand Up @@ -43,8 +46,9 @@ def __init__(self, eps=0.5, min_samples=5,
self._dbscan = DBSCAN(eps=self.eps, min_samples=min_samples, metric='precomputed',
algorithm=self.algorithm, leaf_size=self.leaf_size, p=self.p)
self.labels_ = None
self._precomputed_graph = None
# only for compatible issues
def fit(self, X, y=None):
def fit(self, X, y=None, pSaveMemory=None):
minHashNeighbors = MinHash(n_neighbors = self.n_neighbors,
radius = self.radius, fast = self.fast,
number_of_hash_functions = self.number_of_hash_functions,
Expand All @@ -54,10 +58,26 @@ def fit(self, X, y=None):
excess_factor = self.excess_factor,
number_of_cores = self.number_of_cores,
chunk_size = self.chunk_size, similarity=False)
minHashNeighbors.fit(X, y)
graph_result = minHashNeighbors.kneighbors_graph(mode='distance')
self._dbscan.fit(graph_result)

if pSaveMemory is not None and pSaveMemory > 0:
if pSaveMemory > 1:
pSaveMemory = 1
number_of_elements = X.shape[0]
batch_size = int(np.floor(number_of_elements * pSaveMemory))
if batch_size < 1:
batch_size = 1
minHashNeighbors.fit(X[0:batch_size, :])
if batch_size < number_of_elements:
for i in range(batch_size, X.shape[0], batch_size):
minHashNeighbors.partial_fit(X[i:i+batch_size, :])
else:
minHashNeighbors.fit(X)


# minHashNeighbors.fit(X, y)
self._precomputed_graph = minHashNeighbors.kneighbors_graph(mode='distance')
self._dbscan.fit(self._precomputed_graph)
self.labels_ = self._dbscan.labels_
def fit_predict(self, X, y=None):
self.fit(X, y)
def fit_predict(self, X, y=None, pSaveMemory=None):
self.fit(X, y, pSaveMemory=None)
return self.labels_
27 changes: 22 additions & 5 deletions sparse_neighbors_search/cluster/minHashSpectralClustering.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright 2015 Joachim Wolff
# Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
# PhD Thesis
#
# Copyright 2015, 2016 Joachim Wolff
# Master Thesis
# Tutors: Milad Miladi, Fabrizio Costa
# Tutor: Fabrizio Costa
# Winter semester 2015/2016
#
# Chair of Bioinformatics
Expand Down Expand Up @@ -58,7 +61,7 @@ def __init__(self, n_clusters=8, eigen_solver=None,
kernel_params = self.kernel_params)
# only for compatible issues
self.labels_ = None
def fit(self, X, y=None):
def fit(self, X, y=None, pSaveMemory=None):
minHashNeighbors = MinHash(n_neighbors = self.n_neighbors,
radius = self.radius, fast = self.fast,
number_of_hash_functions = self.number_of_hash_functions,
Expand All @@ -68,11 +71,25 @@ def fit(self, X, y=None):
excess_factor = self.excess_factor,
number_of_cores = self.number_of_cores,
chunk_size = self.chunk_size, similarity=True)
minHashNeighbors.fit(X, y)

if pSaveMemory is not None and pSaveMemory > 0:
if pSaveMemory > 1:
pSaveMemory = 1
number_of_elements = X.shape[0]
batch_size = int(np.floor(number_of_elements * pSaveMemory))
if batch_size < 1:
batch_size = 1
minHashNeighbors.fit(X[0:batch_size, :])
if batch_size < number_of_elements:
for i in range(batch_size, X.shape[0], batch_size):
minHashNeighbors.partial_fit(X[i:i+batch_size, :])
else:
minHashNeighbors.fit(X)

graph_result = minHashNeighbors.kneighbors_graph(mode='distance')
self._spectralClustering.fit(graph_result)
self.labels_ = self._spectralClustering.labels_
def fit_predict(self, X, y=None):
def fit_predict(self, X, y=None, pSaveMemory=None):
self.fit(X, y)

return self._spectralClustering.labels_
15 changes: 15 additions & 0 deletions sparse_neighbors_search/computation/hash.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
/**
Copyright 2016, 2017, 2018, 2019, 2020 Joachim Wolff
PhD Thesis
Copyright 2015, 2016 Joachim Wolff
Master Thesis
Tutor: Fabrizio Costa
Winter semester 2015/2016
Chair of Bioinformatics
Department of Computer Science
Faculty of Engineering
Albert-Ludwigs-University Freiburg im Breisgau
**/

#include "typeDefinitions.h"
#include "sseExtension.h"

Expand Down
Loading

0 comments on commit 9ed5d0f

Please sign in to comment.