Skip to content

Commit 351b95b

Browse files
authored
Add Damerau-Levenshtein Distance (#460)
1 parent 9eb2196 commit 351b95b

File tree

3 files changed

+221
-0
lines changed

3 files changed

+221
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
using Algorithms.Strings.Similarity;
2+
using NUnit.Framework;
3+
4+
namespace Algorithms.Tests.Strings.Similarity;
5+
6+
[TestFixture]
7+
public class DamerauLevenshteinDistanceTests
8+
{
9+
[Test]
10+
public void Calculate_IdenticalStrings_ReturnsZero()
11+
{
12+
var str1 = "test";
13+
var str2 = "test";
14+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
15+
Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0.");
16+
}
17+
18+
[Test]
19+
public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString()
20+
{
21+
var str1 = "abc";
22+
var str2 = "xyz";
23+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
24+
Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string.");
25+
}
26+
27+
[Test]
28+
public void Calculate_OneEmptyString_ReturnsLengthOfOtherString()
29+
{
30+
var str1 = "test";
31+
var str2 = "";
32+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
33+
Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string.");
34+
}
35+
36+
[Test]
37+
public void Calculate_BothEmptyStrings_ReturnsZero()
38+
{
39+
var str1 = "";
40+
var str2 = "";
41+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
42+
Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0.");
43+
}
44+
45+
[Test]
46+
public void Calculate_DifferentLengths_ReturnsCorrectValue()
47+
{
48+
var str1 = "short";
49+
var str2 = "longer";
50+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
51+
Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance.");
52+
}
53+
54+
[Test]
55+
public void Calculate_SpecialCharacters_ReturnsCorrectValue()
56+
{
57+
var str1 = "hello!";
58+
var str2 = "hello?";
59+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
60+
Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance.");
61+
}
62+
63+
[Test]
64+
public void Calculate_DifferentCases_ReturnsCorrectValue()
65+
{
66+
var str1 = "Hello";
67+
var str2 = "hello";
68+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
69+
Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance.");
70+
}
71+
72+
[Test]
73+
public void Calculate_CommonPrefixes_ReturnsCorrectValue()
74+
{
75+
var str1 = "prefix";
76+
var str2 = "pre";
77+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
78+
Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance.");
79+
}
80+
81+
[Test]
82+
public void Calculate_CommonSuffixes_ReturnsCorrectValue()
83+
{
84+
var str1 = "suffix";
85+
var str2 = "fix";
86+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
87+
Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance.");
88+
}
89+
90+
[Test]
91+
public void Calculate_Transpositions_ReturnsCorrectValue()
92+
{
93+
var str1 = "abcd";
94+
var str2 = "acbd";
95+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
96+
Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance.");
97+
}
98+
99+
[Test]
100+
public void Calculate_RepeatedCharacters_ReturnsCorrectValue()
101+
{
102+
var str1 = "aaa";
103+
var str2 = "aaaaa";
104+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
105+
Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance.");
106+
}
107+
108+
[Test]
109+
public void Calculate_UnicodeCharacters_ReturnsCorrectValue()
110+
{
111+
var str1 = "こんにちは";
112+
var str2 = "こんばんは";
113+
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
114+
Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance.");
115+
}
116+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
using System;
2+
3+
namespace Algorithms.Strings.Similarity;
4+
5+
public static class DamerauLevenshteinDistance
6+
{
7+
/// <summary>
8+
/// Calculates the Damerau-Levenshtein distance between two strings.
9+
/// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences.
10+
/// It is calculated as the minimum number of operations needed to transform one sequence into the other.
11+
/// The possible operations are insertion, deletion, substitution, and transposition.
12+
/// </summary>
13+
/// <param name="left">The first string.</param>
14+
/// <param name="right">The second string.</param>
15+
/// <returns>The Damerau-Levenshtein distance between the two strings.</returns>
16+
public static int Calculate(string left, string right)
17+
{
18+
// Get the lengths of the input strings.
19+
var leftSize = left.Length;
20+
var rightSize = right.Length;
21+
22+
// Initialize a matrix of distances between the two strings.
23+
var distances = InitializeDistanceArray(leftSize, rightSize);
24+
25+
// Iterate over each character in the left string.
26+
for (var i = 1; i < leftSize + 1; i++)
27+
{
28+
// Iterate over each character in the right string.
29+
for (var j = 1; j < rightSize + 1; j++)
30+
{
31+
// Calculate the cost of the current operation.
32+
// If the characters at the current positions are the same, the cost is 0.
33+
// Otherwise, the cost is 1.
34+
var cost = left[i - 1] == right[j - 1] ? 0 : 1;
35+
36+
// Calculate the minimum distance by considering three possible operations:
37+
// deletion, insertion, and substitution.
38+
distances[i, j] = Math.Min(
39+
Math.Min( // deletion
40+
distances[i - 1, j] + 1, // delete the character from the left string
41+
distances[i, j - 1] + 1), // insert the character into the right string
42+
distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string
43+
44+
// If the current character in the left string is the same as the character
45+
// two positions to the left in the right string and the current character
46+
// in the right string is the same as the character one position to the right
47+
// in the left string, then we can also consider a transposition operation.
48+
if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1])
49+
{
50+
distances[i, j] = Math.Min(
51+
distances[i, j], // current minimum distance
52+
distances[i - 2, j - 2] + cost); // transpose the last two characters
53+
}
54+
}
55+
}
56+
57+
// Return the distance between the two strings.
58+
return distances[leftSize, rightSize];
59+
}
60+
61+
/// <summary>
62+
/// Initializes a matrix of distances between two string representations.
63+
///
64+
/// This method creates a matrix of distances where the dimensions are one larger
65+
/// than the input strings. The first row of the matrix represents the distances
66+
/// when the left string is empty, and the first column represents the distances
67+
/// when the right string is empty. The values in the first row and first column
68+
/// are the lengths of the corresponding strings.
69+
///
70+
/// The matrix is used by the Damerau-Levenshtein algorithm to calculate the
71+
/// minimum number of single-character edits (insertions, deletions, or substitutions)
72+
/// required to change one word into the other.
73+
/// The matrix is initialized with dimensions one larger than the input strings.
74+
/// The first row of the matrix represents the distances when the left string is empty.
75+
/// The first column of the matrix represents the distances when the right string is empty.
76+
/// The values in the first row and first column are the lengths of the corresponding strings.
77+
/// Initializes a matrix of distances between two strings representations.
78+
/// </summary>
79+
/// <param name="leftSize">The size of the left string.</param>
80+
/// <param name="rightSize">The size of the right string.</param>
81+
/// <returns>A matrix of distances.</returns>
82+
private static int[,] InitializeDistanceArray(int leftSize, int rightSize)
83+
{
84+
// Initialize a matrix of distances with dimensions one larger than the input strings.
85+
var matrix = new int[leftSize + 1, rightSize + 1];
86+
87+
// Set the values in the first row to the lengths of the left string.
88+
// This represents the distance when the left string is empty.
89+
for (var i = 1; i < leftSize + 1; i++)
90+
{
91+
matrix[i, 0] = i;
92+
}
93+
94+
// Set the values in the first column to the lengths of the right string.
95+
// This represents the distance when the right string is empty.
96+
for (var i = 1; i < rightSize + 1; i++)
97+
{
98+
matrix[0, i] = i;
99+
}
100+
101+
// Return the initialized matrix of distances.
102+
return matrix;
103+
}
104+
}

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ find more than one implementation for the same objective but using different alg
179179
* [String](./Algorithms/Strings)
180180
* [Similarity](./Algorithms/Strings/Similarity/)
181181
* [Cosine Similarity](./Algorithms/Strings/Similarity/CosineSimilarity.cs)
182+
* [Damerau-Levenshtein Distance](./Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs)
182183
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
183184
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
184185
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)

0 commit comments

Comments
 (0)