1
+ CREATE OR REPLACE FUNCTION gds.vector.distance(list<double> list1, list<double> list2, string metric) RETURNS(float) {
2
+
3
+ /*
4
+ First Author: Jue Yuan
5
+ First Commit Date: Nov 27, 2024
6
+
7
+ Recent Author: Jue Yuan
8
+ Recent Commit Date: Nov 27, 2024
9
+
10
+ Maturity:
11
+ alpha
12
+
13
+ Description:
14
+ Calculates the distance between two vectors represented as lists of double values,
15
+ based on a specified distance metric. This function supports multiple metrics,
16
+ allowing for flexible similarity or dissimilarity measurements in various computational tasks.
17
+
18
+ Parameters:
19
+ list<double> list1:
20
+ The first vector as a list of double values.
21
+ list<double> list2:
22
+ The second vector as a list of double values.
23
+ string metric:
24
+ The distance metric to use. Supported metrics are:
25
+ "cosine": Cosine distance
26
+ "l2": Euclidean distance
27
+ "ip": Inner product (dot product)
28
+ Returns:
29
+ float:
30
+ The computed distance between the two input vectors based on the specified metric.
31
+
32
+ Exceptions:
33
+ list_size_mismatch (90000):
34
+ Raised when the input vectors are not of equal size.
35
+ zero_divisor(90001);
36
+ Raised either list is all zero to avoid zero-divisor issue.
37
+ invalid_metric_type (90002):
38
+ Raised when an unsupported distance metric is provided.
39
+
40
+ Logic Overview:
41
+ Input Validation:
42
+ Ensures both vectors have the same size.
43
+ Metric Handling:
44
+ Cosine Distance:
45
+ Calculated as 1 - (inner product of vectors) / (product of magnitudes).
46
+ L2 Distance:
47
+ Computes the square root of the sum of squared differences between corresponding elements.
48
+ Inner Product:
49
+ Directly computes the dot product of the two vectors.
50
+
51
+ Error Handling:
52
+ Raises an exception if the provided metric is invalid.
53
+
54
+ Use Case:
55
+ This function is essential for machine learning, data science, and information retrieval applications,
56
+ where distance or similarity calculations between vector representations (such as embeddings or feature vectors) are required.
57
+ */
58
+
59
+ EXCEPTION list_size_mismatch (90000);
60
+ EXCEPTION zero_divisor(90001);
61
+ EXCEPTION invalid_metric_type (90002);
62
+ ListAccum<double> @@myList1 = list1;
63
+ ListAccum<double> @@myList2 = list2;
64
+
65
+ IF (@@myList1.size() != @@myList2.size()) THEN
66
+ RAISE list_size_mismatch ("Two lists provided for gds.vector.distance have different sizes.");
67
+ END;
68
+
69
+ SumAccum<float> @@myResult;
70
+ SumAccum<float> @@sqrSum;
71
+
72
+ CASE lower(metric)
73
+ WHEN "cosine" THEN
74
+ double inner_p = inner_product(@@myList1, @@myList2);
75
+ double v1_magn = sqrt(inner_product(@@myList1, @@myList1));
76
+ double v2_magn = sqrt(inner_product(@@myList2, @@myList2));
77
+ IF (abs(v1_magn) < 0.0000001) THEN
78
+ // use a small positive float to avoid numeric comparison error
79
+ RAISE zero_divisor ("The elements in the first list are all zero. It will introduce a zero divisor.");
80
+ END;
81
+ IF (abs(v2_magn) < 0.0000001) THEN
82
+ // use a small positive float to avoid numeric comparison error
83
+ RAISE zero_divisor ("The elements in the second list are all zero. It will introduce a zero divisor.");
84
+ END;
85
+ @@myResult = 1 - inner_p / (v1_magn * v2_magn);
86
+ WHEN "l2" THEN
87
+ FOREACH i IN RANGE [0, @@myList1.size() - 1 ] DO
88
+ @@sqrSum += (@@myList1.get(i) - @@myList2.get(i)) * (@@myList1.get(i) - @@myList2.get(i));
89
+ END;
90
+ @@myResult = sqrt(@@sqrSum);
91
+ WHEN "ip" THEN
92
+ @@myResult = inner_product(@@myList1, @@myList2);
93
+ ELSE
94
+ RAISE invalid_metric_type ("Invalid metric algorithm provided, currently supported: cosine, l2 and ip.");
95
+ END
96
+ ;
97
+
98
+ RETURN @@myResult;
99
+ }
0 commit comments