Skip to content

Commit cf5edc7

Browse files
authored
Merge pull request #9 from EvanYathon/master
Added `get_all_distances` tests and stub
2 parents 4ac85cb + 9c7a03e commit cf5edc7

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

pydistrrr/get_all_distances.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""
2+
Created on 2019-02-08
3+
4+
@author: Evan Yathon
5+
6+
Implementation of get_all_distances function in the pydistrrr package.
7+
"""
8+
9+
def get_all_distances(point, data, dist = "euclidean"):
10+
"""
11+
Return distance/similarity metric for each row in a dataframe
12+
13+
Compares an input reference vector to all rows of an input data frame, calculating the specified distance/similarity metric for each row.
14+
15+
Parameters
16+
----------
17+
data : pandas dataframe
18+
dataframe of size n by k to compare to ref_vec
19+
20+
point: list
21+
list of length k to compare to the data frame
22+
23+
dist: string
24+
string indicating type of distance metric
25+
26+
Returns
27+
-------
28+
list
29+
numeric vector of length n containing distances for each row of data
30+
"""
31+
32+
distances = []
33+
return distances
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
"""
2+
Created on 2019-02-08
3+
4+
@author: Evan Yathon
5+
6+
This script tests the get_all_distances function of the pydistrrr package.
7+
8+
get_all_distances compares an input reference vector to all rows of an input data
9+
frame, calculating the specified distance/similarity metric for each row.
10+
"""
11+
import pytest
12+
import pandas as pd
13+
import sys
14+
from pydistrrr.get_all_distances import get_all_distances
15+
16+
#initialize a sample dataframe and reference vector
17+
df = pd.DataFrame({"A" : [1,2,3], "B" : [8,2,4]})
18+
ref_vec = [-2,4]
19+
20+
def test_pandas_loaded():
21+
"""
22+
Test that pandas package is loaded
23+
"""
24+
assert('pandas' in sys.modules)
25+
26+
def test_output_length():
27+
"""
28+
Test that the output vector length is the same as the
29+
number of rows in the input dataframe
30+
"""
31+
assert(len(get_all_distances(ref_vec, df)) == df.shape[0])
32+
33+
def test_output_type():
34+
"""
35+
Test that output is of type list
36+
"""
37+
assert(type(get_all_distances(ref_vec, df)) == list)
38+
39+
def test_euclidean():
40+
"""
41+
Test that the euclidean output works correctly
42+
"""
43+
assert(round(get_all_distances(ref_vec, df, dist = "euclidean"),2) == [5,4.47,5])
44+
45+
def test_cosine():
46+
"""
47+
Test that the euclidean output works correctly
48+
"""
49+
assert(round(get_all_distances(ref_vec, df, dist = "cosine"),2) == [0.83,0.32,0.45])
50+
51+
def test_manhattan():
52+
"""
53+
Test that the euclidean output works correctly
54+
"""
55+
assert(round(get_all_distances(ref_vec, df, dist = "manhattan"),2) == [7,6,5])
56+
57+
def test_second_arg_df():
58+
"""
59+
Test that if the second argument isn't a data frame, an exception should be thrown
60+
"""
61+
try:
62+
get_all_distances(ref_vec,[1,2,3])
63+
except:
64+
assert True
65+
else:
66+
assert False
67+
68+
def test_second_arg_list():
69+
"""
70+
Test that if the first argument isn't a list, an exception should be thrown
71+
"""
72+
try:
73+
get_all_distances(df, df)
74+
except:
75+
assert True
76+
else:
77+
assert False
78+
79+
def test_point_correct_length():
80+
"""
81+
Point vector should be length k, the number of columns of the input dataframe
82+
"""
83+
try:
84+
get_all_distances([1,2,3,4],df)
85+
except:
86+
assert True
87+
else:
88+
assert False
89+
90+
def test_metric_input():
91+
"""
92+
dist should be a string and one of 'cosine', 'euclidean' or 'manhattan'
93+
"""
94+
try:
95+
get_all_distances(ref_vec,df, type = "cityblock")
96+
except:
97+
assert True
98+
else:
99+
assert False

0 commit comments

Comments
 (0)