|
| 1 | +using System; |
| 2 | + |
| 3 | +namespace Algorithms.Strings.Similarity; |
| 4 | + |
| 5 | +public static class DamerauLevenshteinDistance |
| 6 | +{ |
| 7 | + /// <summary> |
| 8 | + /// Calculates the Damerau-Levenshtein distance between two strings. |
| 9 | + /// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences. |
| 10 | + /// It is calculated as the minimum number of operations needed to transform one sequence into the other. |
| 11 | + /// The possible operations are insertion, deletion, substitution, and transposition. |
| 12 | + /// </summary> |
| 13 | + /// <param name="left">The first string.</param> |
| 14 | + /// <param name="right">The second string.</param> |
| 15 | + /// <returns>The Damerau-Levenshtein distance between the two strings.</returns> |
| 16 | + public static int Calculate(string left, string right) |
| 17 | + { |
| 18 | + // Get the lengths of the input strings. |
| 19 | + var leftSize = left.Length; |
| 20 | + var rightSize = right.Length; |
| 21 | + |
| 22 | + // Initialize a matrix of distances between the two strings. |
| 23 | + var distances = InitializeDistanceArray(leftSize, rightSize); |
| 24 | + |
| 25 | + // Iterate over each character in the left string. |
| 26 | + for (var i = 1; i < leftSize + 1; i++) |
| 27 | + { |
| 28 | + // Iterate over each character in the right string. |
| 29 | + for (var j = 1; j < rightSize + 1; j++) |
| 30 | + { |
| 31 | + // Calculate the cost of the current operation. |
| 32 | + // If the characters at the current positions are the same, the cost is 0. |
| 33 | + // Otherwise, the cost is 1. |
| 34 | + var cost = left[i - 1] == right[j - 1] ? 0 : 1; |
| 35 | + |
| 36 | + // Calculate the minimum distance by considering three possible operations: |
| 37 | + // deletion, insertion, and substitution. |
| 38 | + distances[i, j] = Math.Min( |
| 39 | + Math.Min( // deletion |
| 40 | + distances[i - 1, j] + 1, // delete the character from the left string |
| 41 | + distances[i, j - 1] + 1), // insert the character into the right string |
| 42 | + distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string |
| 43 | + |
| 44 | + // If the current character in the left string is the same as the character |
| 45 | + // two positions to the left in the right string and the current character |
| 46 | + // in the right string is the same as the character one position to the right |
| 47 | + // in the left string, then we can also consider a transposition operation. |
| 48 | + if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1]) |
| 49 | + { |
| 50 | + distances[i, j] = Math.Min( |
| 51 | + distances[i, j], // current minimum distance |
| 52 | + distances[i - 2, j - 2] + cost); // transpose the last two characters |
| 53 | + } |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + // Return the distance between the two strings. |
| 58 | + return distances[leftSize, rightSize]; |
| 59 | + } |
| 60 | + |
| 61 | + /// <summary> |
| 62 | + /// Initializes a matrix of distances between two string representations. |
| 63 | + /// |
| 64 | + /// This method creates a matrix of distances where the dimensions are one larger |
| 65 | + /// than the input strings. The first row of the matrix represents the distances |
| 66 | + /// when the left string is empty, and the first column represents the distances |
| 67 | + /// when the right string is empty. The values in the first row and first column |
| 68 | + /// are the lengths of the corresponding strings. |
| 69 | + /// |
| 70 | + /// The matrix is used by the Damerau-Levenshtein algorithm to calculate the |
| 71 | + /// minimum number of single-character edits (insertions, deletions, or substitutions) |
| 72 | + /// required to change one word into the other. |
| 73 | + /// The matrix is initialized with dimensions one larger than the input strings. |
| 74 | + /// The first row of the matrix represents the distances when the left string is empty. |
| 75 | + /// The first column of the matrix represents the distances when the right string is empty. |
| 76 | + /// The values in the first row and first column are the lengths of the corresponding strings. |
| 77 | + /// Initializes a matrix of distances between two strings representations. |
| 78 | + /// </summary> |
| 79 | + /// <param name="leftSize">The size of the left string.</param> |
| 80 | + /// <param name="rightSize">The size of the right string.</param> |
| 81 | + /// <returns>A matrix of distances.</returns> |
| 82 | + private static int[,] InitializeDistanceArray(int leftSize, int rightSize) |
| 83 | + { |
| 84 | + // Initialize a matrix of distances with dimensions one larger than the input strings. |
| 85 | + var matrix = new int[leftSize + 1, rightSize + 1]; |
| 86 | + |
| 87 | + // Set the values in the first row to the lengths of the left string. |
| 88 | + // This represents the distance when the left string is empty. |
| 89 | + for (var i = 1; i < leftSize + 1; i++) |
| 90 | + { |
| 91 | + matrix[i, 0] = i; |
| 92 | + } |
| 93 | + |
| 94 | + // Set the values in the first column to the lengths of the right string. |
| 95 | + // This represents the distance when the right string is empty. |
| 96 | + for (var i = 1; i < rightSize + 1; i++) |
| 97 | + { |
| 98 | + matrix[0, i] = i; |
| 99 | + } |
| 100 | + |
| 101 | + // Return the initialized matrix of distances. |
| 102 | + return matrix; |
| 103 | + } |
| 104 | +} |
0 commit comments