Skip to content

Commit d1a2f71

Browse files
committed
feat: add sortable keys for record linkage
1 parent 0d7acaa commit d1a2f71

File tree

7 files changed

+682
-0
lines changed

7 files changed

+682
-0
lines changed

go.mod

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/adamdecaf/merge v0.1.1
1111
github.com/antchfx/htmlquery v1.3.4
1212
github.com/bbalet/stopwords v1.0.0
13+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076
1314
github.com/dongri/phonenumber v0.1.12
1415
github.com/gorilla/mux v1.8.1
1516
github.com/hashicorp/go-retryablehttp v0.7.8
@@ -19,6 +20,7 @@ require (
1920
github.com/moov-io/iso3166 v0.3.0
2021
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
2122
github.com/pariz/gountries v0.1.6
23+
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72
2224
github.com/stretchr/testify v1.11.1
2325
github.com/urfave/cli/v3 v3.5.0
2426
github.com/vmihailenco/msgpack/v5 v5.4.1
@@ -28,6 +30,7 @@ require (
2830
go.uber.org/automaxprocs v1.6.0
2931
golang.org/x/sync v0.17.0
3032
golang.org/x/text v0.30.0
33+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65
3134
)
3235

3336
require (
@@ -55,6 +58,8 @@ require (
5558
github.com/cespare/xxhash/v2 v2.3.0 // indirect
5659
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
5760
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
61+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 // indirect
62+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2 // indirect
5863
github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
5964
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
6065
github.com/felixge/httpsnoop v1.0.4 // indirect
@@ -64,6 +69,9 @@ require (
6469
github.com/fyne-io/glfw-js v0.3.0 // indirect
6570
github.com/fyne-io/image v0.1.1 // indirect
6671
github.com/fyne-io/oksvg v0.2.0 // indirect
72+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5 // indirect
73+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c // indirect
74+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7 // indirect
6775
github.com/go-gl/gl v0.0.0-20231021071112-07e5d0ea2e71 // indirect
6876
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20240506104042-037f3cc74f2a // indirect
6977
github.com/go-jose/go-jose/v4 v4.1.2 // indirect

go.sum

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,12 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
710710
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
711711
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
712712
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
713+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM=
714+
github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
715+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076 h1:EB7M2v8Svo3kvIDy+P1YDE22XskDQP+TEYGzeDwPAN4=
716+
github.com/dgryski/go-minhash v0.0.0-20190315135803-ad340ca03076/go.mod h1:VBi0XHpFy0xiMySf6YpVbRqrupW4RprJ5QTyN+XvGSM=
717+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2 h1:lx1ZQgST/imDhmLpYDma1O3Cx9L+4Ie4E8S2RjFPQ30=
718+
github.com/dgryski/go-spooky v0.0.0-20170606183049-ed3d087f40e2/go.mod h1:hgHYKsoIw7S/hlWtP7wD1wZ7SX1jPTtKko5X9jrOgPQ=
713719
github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
714720
github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
715721
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
@@ -770,6 +776,12 @@ github.com/fyne-io/image v0.1.1/go.mod h1:xrfYBh6yspc+KjkgdZU/ifUC9sPA5Iv7WYUBzQ
770776
github.com/fyne-io/oksvg v0.2.0 h1:mxcGU2dx6nwjJsSA9PCYZDuoAcsZ/OuJlvg/Q9Njfo8=
771777
github.com/fyne-io/oksvg v0.2.0/go.mod h1:dJ9oEkPiWhnTFNCmRgEze+YNprJF7YRbpjgpWS4kzoI=
772778
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
779+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5 h1:4U+x+EB1P66zwYgTjxWXSOT8vF+651Ksr1lojiCZnT8=
780+
github.com/go-dedup/megophone v0.0.0-20170830025436-f01be21026f5/go.mod h1:poR/Cp00iqtqu9ltFwl6C00sKC0HY13u/Gh05ZBmP54=
781+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c h1:mucYYQn+sMGNSxidhleonzAdwL203RxhjJGnxQU4NWU=
782+
github.com/go-dedup/simhash v0.0.0-20170904020510-9ecaca7b509c/go.mod h1:gO3u2bjRAgUaLdQd2XK+3oooxrheOAx1BzS7WmPzw1s=
783+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7 h1:11wFcswN+37U+ByjxdKzsRY5KzNqqq5Uk5ztxnLOc7w=
784+
github.com/go-dedup/text v0.0.0-20170907015346-8bb1b95e3cb7/go.mod h1:wSsK4VOECOSfSYTzkBFw+iGY7wj59e7X96ABtNj9aCQ=
773785
github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
774786
github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
775787
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
@@ -1114,6 +1126,7 @@ github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDc
11141126
github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik=
11151127
github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw=
11161128
github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U=
1129+
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
11171130
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
11181131
github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4=
11191132
github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
@@ -1893,6 +1906,8 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8
18931906
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
18941907
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
18951908
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
1909+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65 h1:TJ8gu/i0KT5Sc0rsphBYjf3yU5BKZcG5DPcP2syQqQ8=
1910+
gopkg.in/go-dedup/simhash.v1 v1.0.0-20170701025421-ab6ea107ab65/go.mod h1:BHrmqRyqVLqZ7iHTOShMqvNcbExK9Cfyyc+tVN547+4=
18961911
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
18971912
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
18981913
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

internal/linksim/helpers_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package linksim
2+
3+
import (
4+
"github.com/moov-io/watchman/pkg/search"
5+
)
6+
7+
var (
8+
john = (search.Entity[search.Value]{
9+
Name: "John Smith",
10+
Type: search.EntityPerson,
11+
Source: search.SourceUSOFAC,
12+
Person: &search.Person{
13+
Name: "John Smith",
14+
GovernmentIDs: []search.GovernmentID{
15+
{
16+
Type: search.GovernmentIDPassport,
17+
Country: "US",
18+
Identifier: "1234567890",
19+
},
20+
},
21+
},
22+
Contact: search.ContactInfo{
23+
EmailAddresses: []string{"[email protected]"},
24+
},
25+
Addresses: []search.Address{
26+
{
27+
Line1: "541 First St",
28+
City: "Anytown",
29+
State: "CA",
30+
PostalCode: "90210",
31+
Country: "US",
32+
},
33+
},
34+
}).Normalize()
35+
36+
johnathon = (search.Entity[search.Value]{
37+
Name: "Johnathon Smith",
38+
Type: search.EntityPerson,
39+
Source: search.SourceUSOFAC,
40+
Person: &search.Person{
41+
Name: "Johnathon Smith",
42+
GovernmentIDs: []search.GovernmentID{
43+
{
44+
Type: search.GovernmentIDPassport,
45+
Country: "US",
46+
Identifier: "1234567890",
47+
},
48+
},
49+
},
50+
Contact: search.ContactInfo{
51+
EmailAddresses: []string{"[email protected]"},
52+
},
53+
Addresses: []search.Address{
54+
{
55+
Line1: "541 First St",
56+
Line2: "Apt 301",
57+
City: "Anytown",
58+
State: "CA",
59+
PostalCode: "90210",
60+
Country: "US",
61+
},
62+
},
63+
}).Normalize()
64+
)

0 commit comments

Comments
 (0)