Skip to content

Commit fb40819

Browse files
committed
Add Levenshtein distance in C as part of String algorithms
1 parent 98d20f9 commit fb40819

File tree

8 files changed

+112
-18
lines changed

8 files changed

+112
-18
lines changed

History.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
=== Jan 3, 2009
2+
3+
* Levenshtein distance in C
4+
15
=== April 3, 2009
26

37
* Finished C refactorization of SplayTree

Manifest

+12-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
1+
History.txt
2+
Manifest
3+
README.markdown
4+
Rakefile
15
algorithms.gemspec
26
benchmarks/deque.rb
37
benchmarks/sorts.rb
48
benchmarks/treemaps.rb
9+
ext/algorithms/string/extconf.rb
10+
ext/algorithms/string/string.c
511
ext/containers/deque/deque.c
612
ext/containers/deque/extconf.rb
713
ext/containers/rbtree_map/extconf.rb
814
ext/containers/rbtree_map/rbtree.c
915
ext/containers/splaytree_map/extconf.rb
1016
ext/containers/splaytree_map/splaytree.c
11-
History.txt
17+
lib/algorithms.rb
1218
lib/algorithms/search.rb
1319
lib/algorithms/sort.rb
14-
lib/algorithms.rb
20+
lib/algorithms/string.rb
1521
lib/containers/deque.rb
1622
lib/containers/heap.rb
1723
lib/containers/kd_tree.rb
@@ -22,20 +28,20 @@ lib/containers/splay_tree_map.rb
2228
lib/containers/stack.rb
2329
lib/containers/suffix_array.rb
2430
lib/containers/trie.rb
25-
Manifest
26-
Rakefile
27-
README.markdown
2831
spec/deque_gc_mark_spec.rb
2932
spec/deque_spec.rb
3033
spec/heap_spec.rb
34+
spec/kd_expected_out.txt
35+
spec/kd_test_in.txt
3136
spec/kd_tree_spec.rb
37+
spec/map_gc_mark_spec.rb
3238
spec/priority_queue_spec.rb
3339
spec/queue_spec.rb
34-
spec/rb_tree_map_gc_mark_spec.rb
3540
spec/rb_tree_map_spec.rb
3641
spec/search_spec.rb
3742
spec/sort_spec.rb
3843
spec/splay_tree_map_spec.rb
3944
spec/stack_spec.rb
45+
spec/string_spec.rb
4046
spec/suffix_array_spec.rb
4147
spec/trie_spec.rb

Rakefile

-6
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,6 @@ Echoe.new('algorithms') do |p|
1010
p.runtime_dependencies = []
1111
end
1212

13-
task :default => :spec
14-
15-
task :spec do
16-
sh "spec spec/*.rb --color"
17-
end
18-
1913
task :push do
2014
sh "git push" # Rubyforge
2115
sh "git push --tags" # Rubyforge

ext/algorithms/string/extconf.rb

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
require 'mkmf'
2+
extension_name = "CString"
3+
dir_config(extension_name)
4+
create_makefile(extension_name)

ext/algorithms/string/string.c

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include "ruby.h"
2+
3+
int min(int a, int b, int c) {
4+
int min = a;
5+
if (b < min)
6+
min = b;
7+
if( c < min)
8+
min = c;
9+
return min;
10+
}
11+
12+
int levenshtein_distance(VALUE str1, VALUE str2) {
13+
int i, j, s1_len, s2_len, *d;
14+
char * s = RSTRING(str1)->ptr;
15+
char * t = RSTRING(str2)->ptr;
16+
s1_len = RSTRING(str1)->len;
17+
s2_len = RSTRING(str2)->len;
18+
19+
if (s1_len == 0) {
20+
return s2_len;
21+
} else if (s2_len == 0) {
22+
return s1_len;
23+
}
24+
25+
// We need one extra col and row for the matrix for starting values
26+
s1_len++;
27+
s2_len++;
28+
29+
d = malloc(sizeof(int) * (s1_len) * (s2_len));
30+
31+
for (i = 0; i < s1_len; i++) {
32+
d[i] = i; // d[i, 0] = i
33+
}
34+
for (j = 0; j < s2_len; j++) {
35+
d[j*s1_len] = j; // d[0, j] = j
36+
}
37+
38+
for (i = 1; i < s1_len; i++) {
39+
for (j = 1; j < s2_len; j++) {
40+
if (s[i-1] == t[j-1]) {
41+
d[j * s1_len + i] = d[(j-1) * s1_len + (i-1)];
42+
} else {
43+
d[j * s1_len + i] = min(
44+
d[j * s1_len + (i-1)],
45+
d[(j-1) * s1_len + i],
46+
d[(j-1) * s1_len + (i-1)]
47+
) + 1;
48+
}
49+
}
50+
}
51+
i = d[s1_len * s2_len -1];
52+
free(d);
53+
return i;
54+
}
55+
56+
static VALUE lev_dist(VALUE self, VALUE str1, VALUE str2) {
57+
return INT2FIX(levenshtein_distance( str1, str2 ));
58+
}
59+
60+
static VALUE mAlgorithms;
61+
static VALUE mString;
62+
63+
void Init_CString() {
64+
mAlgorithms = rb_define_module("Algorithms");
65+
mString = rb_define_module_under(mAlgorithms, "String");
66+
rb_define_singleton_method(mString, "levenshtein_dist", lev_dist, 2);
67+
}
68+

lib/algorithms.rb

+3-6
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,16 @@
4343
- Shell sort - Algorithms::Sort.shell_sort
4444
- Quicksort - Algorithms::Sort.quicksort
4545
- Mergesort - Algorithms::Sort.mergesort
46+
* String algorithms
47+
- Levenshtein distance - Algorithms::String.levenshtein_dist
4648
=end
4749

4850
module Algorithms; end
4951
module Containers; end
5052

51-
begin
52-
require 'CBst'
53-
Containers::Bst = Containers::CBst
54-
rescue LoadError # C Version could not be found
55-
end
56-
5753
require 'algorithms/search'
5854
require 'algorithms/sort'
55+
require 'algorithms/string'
5956
require 'containers/heap'
6057
require 'containers/stack'
6158
require 'containers/deque'

lib/algorithms/string.rb

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
=begin rdoc
2+
This module implements string algorithms. Documentation is provided for each algorithm.
3+
4+
=end
5+
6+
begin
7+
require 'CString'
8+
end

spec/string_spec.rb

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
$: << File.join(File.expand_path(File.dirname(__FILE__)), '..', 'lib')
2+
require 'algorithms'
3+
4+
describe "string algorithms" do
5+
it "should do levenshtein distance" do
6+
Algorithms::String.levenshtein_dist("Hello", "Hel").should eql(2)
7+
Algorithms::String.levenshtein_dist("Hello", "").should eql(5)
8+
Algorithms::String.levenshtein_dist("", "Hello").should eql(5)
9+
Algorithms::String.levenshtein_dist("Hello", "Hello").should eql(0)
10+
Algorithms::String.levenshtein_dist("Hello", "ello").should eql(1)
11+
Algorithms::String.levenshtein_dist("Hello", "Mello").should eql(1)
12+
end
13+
end

0 commit comments

Comments
 (0)