diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..c82809e --- /dev/null +++ b/AUTHORS @@ -0,0 +1,10 @@ +# Below is a list of people and organizations that have contributed +# to the Diff Match Patch project. + +Google Inc. + +Duncan Cross (Lua port) +Jan Weiß (Objective C port) +Matthaeus G. Chajdas (C# port) +Mike Slemmer (C++ port) + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ae319c7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution, +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..4ccbe63 --- /dev/null +++ b/README.txt @@ -0,0 +1,43 @@ +Diff, Match and Patch Library +https://github.com/google/diff-match-patch +Neil Fraser + +This library is currently available in seven different ports, all using the same API. +Every version includes a full set of unit tests. + +C++: +* Ported by Mike Slemmer. +* Currently requires the Qt library. + +C#: +* Ported by Matthaeus G. Chajdas. + +Dart: +* The Dart language is still growing and evolving, so this port is only as + stable as the underlying language. + +Java: +* Included is both the source and a Maven package. + +JavaScript: +* diff_match_patch_uncompressed.js is the human-readable version. + Users of node.js should 'require' this uncompressed version since the + compressed version is not guaranteed to work outside of a web browser. +* diff_match_patch.js has been compressed using Google's internal JavaScript compressor. + Non-Google hackers who wish to recompress the source can use: + http://dean.edwards.name/packer/ + +Lua: +* Ported by Duncan Cross. +* Does not support line-mode speedup. + +Objective C: +* Ported by Jan Weiss. +* Includes speed test (this is a separate bundle for other languages). + +Python: +* Two versions, one for Python 2.x, the other for Python 3.x. +* Runs 10x faster under PyPy than CPython. + +Demos: +* Separate demos for Diff, Match and Patch in JavaScript. diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp new file mode 100644 index 0000000..4574f25 --- /dev/null +++ b/cpp/diff_match_patch.cpp @@ -0,0 +1,2105 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +// Code known to compile and run with Qt 4.3 through Qt 4.7. +#include +#include +#include "diff_match_patch.h" + + +////////////////////////// +// +// Diff Class +// +////////////////////////// + + +/** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL + * @param text The text being applied + */ +Diff::Diff(Operation _operation, const QString &_text) : + operation(_operation), text(_text) { + // Construct a diff with the specified operation and text. +} + +Diff::Diff() { +} + + +QString Diff::strOperation(Operation op) { + switch (op) { + case INSERT: + return "INSERT"; + case DELETE: + return "DELETE"; + case EQUAL: + return "EQUAL"; + } + throw "Invalid operation."; +} + +/** + * Display a human-readable version of this Diff. + * @return text version + */ +QString Diff::toString() const { + QString prettyText = text; + // Replace linebreaks with Pilcrow signs. + prettyText.replace('\n', L'\u00b6'); + return QString("Diff(") + strOperation(operation) + QString(",\"") + + prettyText + QString("\")"); +} + +/** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against + * @return true or false + */ +bool Diff::operator==(const Diff &d) const { + return (d.operation == this->operation) && (d.text == this->text); +} + +bool Diff::operator!=(const Diff &d) const { + return !(operator == (d)); +} + + +///////////////////////////////////////////// +// +// Patch Class +// +///////////////////////////////////////////// + + +/** + * Constructor. Initializes with an empty list of diffs. + */ +Patch::Patch() : + start1(0), start2(0), + length1(0), length2(0) { +} + +bool Patch::isNull() const { + if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 + && diffs.size() == 0) { + return true; + } + return false; +} + + +/** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return The GNU diff string + */ +QString Patch::toString() { + QString coords1, coords2; + if (length1 == 0) { + coords1 = QString::number(start1) + QString(",0"); + } else if (length1 == 1) { + coords1 = QString::number(start1 + 1); + } else { + coords1 = QString::number(start1 + 1) + QString(",") + + QString::number(length1); + } + if (length2 == 0) { + coords2 = QString::number(start2) + QString(",0"); + } else if (length2 == 1) { + coords2 = QString::number(start2 + 1); + } else { + coords2 = QString::number(start2 + 1) + QString(",") + + QString::number(length2); + } + QString text; + text = QString("@@ -") + coords1 + QString(" +") + coords2 + + QString(" @@\n"); + // Escape the body of the patch with %xx notation. + foreach (Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: + text += QString('+'); + break; + case DELETE: + text += QString('-'); + break; + case EQUAL: + text += QString(' '); + break; + } + text += QString(QUrl::toPercentEncoding(aDiff.text, " !~*'();/?:@&=+$,#")) + + QString("\n"); + } + + return text; +} + + +///////////////////////////////////////////// +// +// diff_match_patch Class +// +///////////////////////////////////////////// + +diff_match_patch::diff_match_patch() : + Diff_Timeout(1.0f), + Diff_EditCost(4), + Match_Threshold(0.5f), + Match_Distance(1000), + Patch_DeleteThreshold(0.5f), + Patch_Margin(4), + Match_MaxBits(32) { +} + + +QList diff_match_patch::diff_main(const QString &text1, + const QString &text2) { + return diff_main(text1, text2, true); +} + +QList diff_match_patch::diff_main(const QString &text1, + const QString &text2, bool checklines) { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if (Diff_Timeout <= 0) { + deadline = std::numeric_limits::max(); + } else { + deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); + } + return diff_main(text1, text2, checklines, deadline); +} + +QList diff_match_patch::diff_main(const QString &text1, + const QString &text2, bool checklines, clock_t deadline) { + // Check for null inputs. + if (text1.isNull() || text2.isNull()) { + throw "Null inputs. (diff_main)"; + } + + // Check for equality (speedup). + QList diffs; + if (text1 == text2) { + if (!text1.isEmpty()) { + diffs.append(Diff(EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + const QString &commonprefix = text1.left(commonlength); + QString textChopped1 = text1.mid(commonlength); + QString textChopped2 = text2.mid(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(textChopped1, textChopped2); + const QString &commonsuffix = textChopped1.right(commonlength); + textChopped1 = textChopped1.left(textChopped1.length() - commonlength); + textChopped2 = textChopped2.left(textChopped2.length() - commonlength); + + // Compute the diff on the middle block. + diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); + + // Restore the prefix and suffix. + if (!commonprefix.isEmpty()) { + diffs.prepend(Diff(EQUAL, commonprefix)); + } + if (!commonsuffix.isEmpty()) { + diffs.append(Diff(EQUAL, commonsuffix)); + } + + diff_cleanupMerge(diffs); + + return diffs; +} + + +QList diff_match_patch::diff_compute(QString text1, QString text2, + bool checklines, clock_t deadline) { + QList diffs; + + if (text1.isEmpty()) { + // Just add some text (speedup). + diffs.append(Diff(INSERT, text2)); + return diffs; + } + + if (text2.isEmpty()) { + // Just delete some text (speedup). + diffs.append(Diff(DELETE, text1)); + return diffs; + } + + { + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + const int i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; + diffs.append(Diff(op, longtext.left(i))); + diffs.append(Diff(EQUAL, shorttext)); + diffs.append(Diff(op, safeMid(longtext, i + shorttext.length()))); + return diffs; + } + + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.append(Diff(DELETE, text1)); + diffs.append(Diff(INSERT, text2)); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const QStringList hm = diff_halfMatch(text1, text2); + if (hm.count() > 0) { + // A half-match was found, sort out the return data. + const QString text1_a = hm[0]; + const QString text1_b = hm[1]; + const QString text2_a = hm[2]; + const QString text2_b = hm[3]; + const QString mid_common = hm[4]; + // Send both pairs off for separate processing. + const QList diffs_a = diff_main(text1_a, text2_a, + checklines, deadline); + const QList diffs_b = diff_main(text1_b, text2_b, + checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.append(Diff(EQUAL, mid_common)); + diffs += diffs_b; + return diffs; + } + + // Perform a real diff. + if (checklines && text1.length() > 100 && text2.length() > 100) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); +} + + +QList diff_match_patch::diff_lineMode(QString text1, QString text2, + clock_t deadline) { + // Scan the text on a line-by-line basis first. + const QList b = diff_linesToChars(text1, text2); + text1 = b[0].toString(); + text2 = b[1].toString(); + QStringList linearray = b[2].toStringList(); + + QList diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.append(Diff(EQUAL, "")); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + + QMutableListIterator pointer(diffs); + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while (thisDiff != NULL) { + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + pointer.previous(); + for (int j = 0; j < count_delete + count_insert; j++) { + pointer.previous(); + pointer.remove(); + } + foreach(Diff newDiff, + diff_main(text_delete, text_insert, false, deadline)) { + pointer.insert(newDiff); + } + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + break; + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + diffs.removeLast(); // Remove the dummy entry at the end. + + return diffs; +} + + +QList diff_match_patch::diff_bisect(const QString &text1, + const QString &text2, clock_t deadline) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int max_d = (text1_length + text2_length + 1) / 2; + const int v_offset = max_d; + const int v_length = 2 * max_d; + int *v1 = new int[v_length]; + int *v2 = new int[v_length]; + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + const int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + const bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (clock() > deadline) { + break; + } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + const int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1[x1] == text2[y1]) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + const int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + delete [] v1; + delete [] v2; + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + QList diffs; + diffs.append(Diff(DELETE, text1)); + diffs.append(Diff(INSERT, text2)); + return diffs; +} + +QList diff_match_patch::diff_bisectSplit(const QString &text1, + const QString &text2, int x, int y, clock_t deadline) { + QString text1a = text1.left(x); + QString text2a = text2.left(y); + QString text1b = safeMid(text1, x); + QString text2b = safeMid(text2, y); + + // Compute both diffs serially. + QList diffs = diff_main(text1a, text2a, false, deadline); + QList diffsb = diff_main(text1b, text2b, false, deadline); + + return diffs + diffsb; +} + +QList diff_match_patch::diff_linesToChars(const QString &text1, + const QString &text2) { + QStringList lineArray; + QMap lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.append(""); + + const QString chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); + const QString chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); + + QList listRet; + listRet.append(QVariant::fromValue(chars1)); + listRet.append(QVariant::fromValue(chars2)); + listRet.append(QVariant::fromValue(lineArray)); + return listRet; +} + + +QString diff_match_patch::diff_linesToCharsMunge(const QString &text, + QStringList &lineArray, + QMap &lineHash) { + int lineStart = 0; + int lineEnd = -1; + QString line; + QString chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.length() - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = safeMid(text, lineStart, lineEnd + 1 - lineStart); + lineStart = lineEnd + 1; + + if (lineHash.contains(line)) { + chars += QChar(static_cast(lineHash.value(line))); + } else { + lineArray.append(line); + lineHash.insert(line, lineArray.size() - 1); + chars += QChar(static_cast(lineArray.size() - 1)); + } + } + return chars; +} + + + +void diff_match_patch::diff_charsToLines(QList &diffs, + const QStringList &lineArray) { + // Qt has no mutable foreach construct. + QMutableListIterator i(diffs); + while (i.hasNext()) { + Diff &diff = i.next(); + QString text; + for (int y = 0; y < diff.text.length(); y++) { + text += lineArray.value(static_cast(diff.text[y].unicode())); + } + diff.text = text; + } +} + + +int diff_match_patch::diff_commonPrefix(const QString &text1, + const QString &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int n = std::min(text1.length(), text2.length()); + for (int i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } + } + return n; +} + + +int diff_match_patch::diff_commonSuffix(const QString &text1, + const QString &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int n = std::min(text1_length, text2_length); + for (int i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } + } + return n; +} + +int diff_match_patch::diff_commonOverlap(const QString &text1, + const QString &text2) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + QString text1_trunc = text1; + QString text2_trunc = text2; + if (text1_length > text2_length) { + text1_trunc = text1.right(text2_length); + } else if (text1_length < text2_length) { + text2_trunc = text2.left(text1_length); + } + const int text_length = std::min(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + QString pattern = text1_trunc.right(length); + int found = text2_trunc.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length)) { + best = length; + length++; + } + } +} + +QStringList diff_match_patch::diff_halfMatch(const QString &text1, + const QString &text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return QStringList(); + } + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return QStringList(); // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const QStringList hm1 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 3) / 4); + // Check again based on the third quarter. + const QStringList hm2 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 1) / 2); + QStringList hm; + if (hm1.isEmpty() && hm2.isEmpty()) { + return QStringList(); + } else if (hm2.isEmpty()) { + hm = hm1; + } else if (hm1.isEmpty()) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + } else { + QStringList listRet; + listRet << hm[2] << hm[3] << hm[0] << hm[1] << hm[4]; + return listRet; + } +} + + +QStringList diff_match_patch::diff_halfMatchI(const QString &longtext, + const QString &shorttext, + int i) { + // Start with a 1/4 length substring at position i as a seed. + const QString seed = safeMid(longtext, i, longtext.length() / 4); + int j = -1; + QString best_common; + QString best_longtext_a, best_longtext_b; + QString best_shorttext_a, best_shorttext_b; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + const int prefixLength = diff_commonPrefix(safeMid(longtext, i), + safeMid(shorttext, j)); + const int suffixLength = diff_commonSuffix(longtext.left(i), + shorttext.left(j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = safeMid(shorttext, j - suffixLength, suffixLength) + + safeMid(shorttext, j, prefixLength); + best_longtext_a = longtext.left(i - suffixLength); + best_longtext_b = safeMid(longtext, i + prefixLength); + best_shorttext_a = shorttext.left(j - suffixLength); + best_shorttext_b = safeMid(shorttext, j + prefixLength); + } + } + if (best_common.length() * 2 >= longtext.length()) { + QStringList listRet; + listRet << best_longtext_a << best_longtext_b << best_shorttext_a + << best_shorttext_b << best_common; + return listRet; + } else { + return QStringList(); + } +} + + +void diff_match_patch::diff_cleanupSemantic(QList &diffs) { + if (diffs.isEmpty()) { + return; + } + bool changes = false; + QStack equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator pointer(diffs); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while (thisDiff != NULL) { + if (thisDiff->operation == EQUAL) { + // Equality found. + equalities.push(*thisDiff); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff->text; + } else { + // An insertion or deletion. + if (thisDiff->operation == INSERT) { + length_insertions2 += thisDiff->text.length(); + } else { + length_deletions2 += thisDiff->text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (!lastequality.isNull() + && (lastequality.length() + <= std::max(length_insertions1, length_deletions1)) + && (lastequality.length() + <= std::max(length_insertions2, length_deletions2))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.top()) { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue(Diff(DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.insert(Diff(INSERT, lastequality)); + + equalities.pop(); // Throw away the equality we just deleted. + if (!equalities.isEmpty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.isEmpty()) { + // There are no previous equalities, walk back to the start. + while (pointer.hasPrevious()) { + pointer.previous(); + } + } else { + // There is a safe equality we can fall back to. + thisDiff = &equalities.top(); + while (*thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = QString(); + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer.toFront(); + Diff *prevDiff = NULL; + thisDiff = NULL; + if (pointer.hasNext()) { + prevDiff = &pointer.next(); + if (pointer.hasNext()) { + thisDiff = &pointer.next(); + } + } + while (thisDiff != NULL) { + if (prevDiff->operation == DELETE && + thisDiff->operation == INSERT) { + QString deletion = prevDiff->text; + QString insertion = thisDiff->text; + int overlap_length1 = diff_commonOverlap(deletion, insertion); + int overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer.previous(); + pointer.insert(Diff(EQUAL, insertion.left(overlap_length1))); + prevDiff->text = + deletion.left(deletion.length() - overlap_length1); + thisDiff->text = safeMid(insertion, overlap_length1); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer.previous(); + pointer.insert(Diff(EQUAL, deletion.left(overlap_length2))); + prevDiff->operation = INSERT; + prevDiff->text = + insertion.left(insertion.length() - overlap_length2); + thisDiff->operation = DELETE; + thisDiff->text = safeMid(deletion, overlap_length2); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + prevDiff = thisDiff; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } +} + + +void diff_match_patch::diff_cleanupSemanticLossless(QList &diffs) { + QString equality1, edit, equality2; + QString commonString; + int commonOffset; + int score, bestScore; + QString bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + QMutableListIterator pointer(diffs); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + equality1 = prevDiff->text; + edit = thisDiff->text; + equality2 = nextDiff->text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset != 0) { + commonString = safeMid(edit, edit.length() - commonOffset); + equality1 = equality1.left(equality1.length() - commonOffset); + edit = commonString + edit.left(edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (!edit.isEmpty() && !equality2.isEmpty() + && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = safeMid(edit, 1) + equality2[0]; + equality2 = safeMid(equality2, 1); + score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (prevDiff->text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.isEmpty()) { + prevDiff->text = bestEquality1; + } else { + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + pointer.next(); // Walk past nextDiff. + } + thisDiff->text = bestEdit; + if (!bestEquality2.isEmpty()) { + nextDiff->text = bestEquality2; + } else { + pointer.remove(); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } +} + + +int diff_match_patch::diff_cleanupSemanticScore(const QString &one, + const QString &two) { + if (one.isEmpty() || two.isEmpty()) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + QChar char1 = one[one.length() - 1]; + QChar char2 = two[0]; + bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); + bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); + bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); + bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); + bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; + bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; + bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn(one) != -1; + bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn(two) != -1; + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; +} + + +// Define some regex patterns for matching boundaries. +QRegExp diff_match_patch::BLANKLINEEND = QRegExp("\\n\\r?\\n$"); +QRegExp diff_match_patch::BLANKLINESTART = QRegExp("^\\r?\\n\\r?\\n"); + + +void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { + if (diffs.isEmpty()) { + return; + } + bool changes = false; + QStack equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator pointer(diffs); + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *safeDiff = thisDiff; + + while (thisDiff != NULL) { + if (thisDiff->operation == EQUAL) { + // Equality found. + if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { + // Candidate found. + equalities.push(*thisDiff); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff->text; + } else { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = QString(); + safeDiff = thisDiff; + } + post_ins = post_del = false; + } else { + // An insertion or deletion. + if (thisDiff->operation == DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (!lastequality.isNull() + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length() < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.top()) { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue(Diff(DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.insert(Diff(INSERT, lastequality)); + thisDiff = &pointer.previous(); + pointer.next(); + + equalities.pop(); // Throw away the equality we just deleted. + lastequality = QString(); + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } else { + if (!equalities.isEmpty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.isEmpty()) { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } else { + // There is an equality we can fall back to. + thisDiff = &equalities.top(); + } + while (*thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + post_ins = post_del = false; + } + + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + + if (changes) { + diff_cleanupMerge(diffs); + } +} + + +void diff_match_patch::diff_cleanupMerge(QList &diffs) { + diffs.append(Diff(EQUAL, "")); // Add a dummy entry at the end. + QMutableListIterator pointer(diffs); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *prevEqual = NULL; + int commonlength; + while (thisDiff != NULL) { + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + prevEqual = NULL; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + prevEqual = NULL; + break; + case EQUAL: + if (count_delete + count_insert > 1) { + bool both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + pointer.previous(); // Reverse direction. + while (count_delete-- > 0) { + pointer.previous(); + pointer.remove(); + } + while (count_insert-- > 0) { + pointer.previous(); + pointer.remove(); + } + if (both_types) { + // Factor out any common prefixies. + commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if (pointer.hasPrevious()) { + thisDiff = &pointer.previous(); + if (thisDiff->operation != EQUAL) { + throw "Previous diff should have been an equality."; + } + thisDiff->text += text_insert.left(commonlength); + pointer.next(); + } else { + pointer.insert(Diff(EQUAL, text_insert.left(commonlength))); + } + text_insert = safeMid(text_insert, commonlength); + text_delete = safeMid(text_delete, commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + thisDiff = &pointer.next(); + thisDiff->text = safeMid(text_insert, text_insert.length() + - commonlength) + thisDiff->text; + text_insert = text_insert.left(text_insert.length() + - commonlength); + text_delete = text_delete.left(text_delete.length() + - commonlength); + pointer.previous(); + } + } + // Insert the merged records. + if (!text_delete.isEmpty()) { + pointer.insert(Diff(DELETE, text_delete)); + } + if (!text_insert.isEmpty()) { + pointer.insert(Diff(INSERT, text_insert)); + } + // Step forward to the equality. + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + + } else if (prevEqual != NULL) { + // Merge this equality with the previous one. + prevEqual->text += thisDiff->text; + pointer.remove(); + thisDiff = &pointer.previous(); + pointer.next(); // Forward direction + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + prevEqual = thisDiff; + break; + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + if (diffs.back().text.isEmpty()) { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + /* + * Second pass: look for single edits surrounded on both sides by equalities + * which can be shifted sideways to eliminate an equality. + * e.g: ABAC -> ABAC + */ + bool changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer.toFront(); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + if (thisDiff->text.endsWith(prevDiff->text)) { + // Shift the edit over the previous equality. + thisDiff->text = prevDiff->text + + thisDiff->text.left(thisDiff->text.length() + - prevDiff->text.length()); + nextDiff->text = prevDiff->text + nextDiff->text; + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + thisDiff = &pointer.next(); // Walk past nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } else if (thisDiff->text.startsWith(nextDiff->text)) { + // Shift the edit over the next equality. + prevDiff->text += nextDiff->text; + thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) + + nextDiff->text; + pointer.remove(); // Delete nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } +} + + +int diff_match_patch::diff_xIndex(const QList &diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff.operation == DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +} + + +QString diff_match_patch::diff_prettyHtml(const QList &diffs) { + QString html; + QString text; + foreach(Diff aDiff, diffs) { + text = aDiff.text; + text.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶
"); + switch (aDiff.operation) { + case INSERT: + html += QString("") + text + + QString(""); + break; + case DELETE: + html += QString("") + text + + QString(""); + break; + case EQUAL: + html += QString("") + text + QString(""); + break; + } + } + return html; +} + + +QString diff_match_patch::diff_text1(const QList &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != INSERT) { + text += aDiff.text; + } + } + return text; +} + + +QString diff_match_patch::diff_text2(const QList &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != DELETE) { + text += aDiff.text; + } + } + return text; +} + + +int diff_match_patch::diff_levenshtein(const QList &diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + foreach(Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max(insertions, deletions); + return levenshtein; +} + + +QString diff_match_patch::diff_toDelta(const QList &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: { + QString encoded = QString(QUrl::toPercentEncoding(aDiff.text, + " !~*'();/?:@&=+$,#")); + text += QString("+") + encoded + QString("\t"); + break; + } + case DELETE: + text += QString("-") + QString::number(aDiff.text.length()) + + QString("\t"); + break; + case EQUAL: + text += QString("=") + QString::number(aDiff.text.length()) + + QString("\t"); + break; + } + } + if (!text.isEmpty()) { + // Strip off trailing tab character. + text = text.left(text.length() - 1); + } + return text; +} + + +QList diff_match_patch::diff_fromDelta(const QString &text1, + const QString &delta) { + QList diffs; + int pointer = 0; // Cursor in text1 + QStringList tokens = delta.split("\t"); + foreach(QString token, tokens) { + if (token.isEmpty()) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + QString param = safeMid(token, 1); + switch (token[0].toAscii()) { + case '+': + param = QUrl::fromPercentEncoding(qPrintable(param)); + diffs.append(Diff(INSERT, param)); + break; + case '-': + // Fall through. + case '=': { + int n; + n = param.toInt(); + if (n < 0) { + throw QString("Negative number in diff_fromDelta: %1").arg(param); + } + QString text; + text = safeMid(text1, pointer, n); + pointer += n; + if (token[0] == QChar('=')) { + diffs.append(Diff(EQUAL, text)); + } else { + diffs.append(Diff(DELETE, text)); + } + break; + } + default: + throw QString("Invalid diff operation in diff_fromDelta: %1") + .arg(token[0]); + } + } + if (pointer != text1.length()) { + throw QString("Delta length (%1) smaller than source text length (%2)") + .arg(pointer).arg(text1.length()); + } + return diffs; +} + + + // MATCH FUNCTIONS + + +int diff_match_patch::match_main(const QString &text, const QString &pattern, + int loc) { + // Check for null inputs. + if (text.isNull() || pattern.isNull()) { + throw "Null inputs. (match_main)"; + } + + loc = std::max(0, std::min(loc, text.length())); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.isEmpty()) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() + && safeMid(text, loc, pattern.length()) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } +} + + +int diff_match_patch::match_bitap(const QString &text, const QString &pattern, + int loc) { + if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + QMap s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length()); + if (best_loc != -1) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + int matchmask = 1 << (pattern.length() - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + int *rd; + int *last_rd = NULL; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + int start = std::max(1, loc - bin_mid + 1); + int finish = std::min(loc + bin_mid, text.length()) + pattern.length(); + + rd = new int[finish + 2]; + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.length() <= j - 1) { + // Out of range. + charMatch = 0; + } else { + charMatch = s.value(text[j - 1], 0); + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) + | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = std::max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + delete [] last_rd; + last_rd = rd; + } + delete [] last_rd; + delete [] rd; + return best_loc; +} + + +double diff_match_patch::match_bitapScore(int e, int x, int loc, + const QString &pattern) { + const float accuracy = static_cast (e) / pattern.length(); + const int proximity = qAbs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / static_cast (Match_Distance)); +} + + +QMap diff_match_patch::match_alphabet(const QString &pattern) { + QMap s; + int i; + for (i = 0; i < pattern.length(); i++) { + QChar c = pattern[i]; + s.insert(c, 0); + } + for (i = 0; i < pattern.length(); i++) { + QChar c = pattern[i]; + s.insert(c, s.value(c) | (1 << (pattern.length() - i - 1))); + } + return s; +} + + +// PATCH FUNCTIONS + + +void diff_match_patch::patch_addContext(Patch &patch, const QString &text) { + if (text.isEmpty()) { + return; + } + QString pattern = safeMid(text, patch.start2, patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) + && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = safeMid(text, std::max(0, patch.start2 - padding), + std::min(text.length(), patch.start2 + patch.length1 + padding) + - std::max(0, patch.start2 - padding)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + QString prefix = safeMid(text, std::max(0, patch.start2 - padding), + patch.start2 - std::max(0, patch.start2 - padding)); + if (!prefix.isEmpty()) { + patch.diffs.prepend(Diff(EQUAL, prefix)); + } + // Add the suffix. + QString suffix = safeMid(text, patch.start2 + patch.length1, + std::min(text.length(), patch.start2 + patch.length1 + padding) + - (patch.start2 + patch.length1)); + if (!suffix.isEmpty()) { + patch.diffs.append(Diff(EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); +} + + +QList diff_match_patch::patch_make(const QString &text1, + const QString &text2) { + // Check for null inputs. + if (text1.isNull() || text2.isNull()) { + throw "Null inputs. (patch_make)"; + } + + // No diffs provided, compute our own. + QList diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + + return patch_make(text1, diffs); +} + + +QList diff_match_patch::patch_make(const QList &diffs) { + // No origin string provided, compute our own. + const QString text1 = diff_text1(diffs); + return patch_make(text1, diffs); +} + + +QList diff_match_patch::patch_make(const QString &text1, + const QString &text2, + const QList &diffs) { + // text2 is entirely unused. + return patch_make(text1, diffs); + + Q_UNUSED(text2) +} + + +QList diff_match_patch::patch_make(const QString &text1, + const QList &diffs) { + // Check for null inputs. + if (text1.isNull()) { + throw "Null inputs. (patch_make)"; + } + + QList patches; + if (diffs.isEmpty()) { + return patches; // Get rid of the null case. + } + Patch patch; + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + QString prepatch_text = text1; + QString postpatch_text = text1; + foreach(Diff aDiff, diffs) { + if (patch.diffs.isEmpty() && aDiff.operation != EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case INSERT: + patch.diffs.append(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.left(char_count2) + + aDiff.text + safeMid(postpatch_text, char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.append(aDiff); + postpatch_text = postpatch_text.left(char_count2) + + safeMid(postpatch_text, char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin + && !patch.diffs.isEmpty() && !(aDiff == diffs.back())) { + // Small equality inside a patch. + patch.diffs.append(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.append(patch); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.append(patch); + } + + return patches; +} + + +QList diff_match_patch::patch_deepCopy(QList &patches) { + QList patchesCopy; + foreach(Patch aPatch, patches) { + Patch patchCopy = Patch(); + foreach(Diff aDiff, aPatch.diffs) { + Diff diffCopy = Diff(aDiff.operation, aDiff.text); + patchCopy.diffs.append(diffCopy); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.append(patchCopy); + } + return patchesCopy; +} + + +QPair > diff_match_patch::patch_apply( + QList &patches, const QString &sourceText) { + QString text = sourceText; // Copy to preserve original. + if (patches.isEmpty()) { + return QPair >(text, QVector(0)); + } + + // Deep copy the patches so that no changes are made to originals. + QList patchesCopy = patch_deepCopy(patches); + + QString nullPadding = patch_addPadding(patchesCopy); + text = nullPadding + text + nullPadding; + patch_splitMax(patchesCopy); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + QVector results(patchesCopy.size()); + foreach(Patch aPatch, patchesCopy) { + int expected_loc = aPatch.start2 + delta; + QString text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.length() > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main(text, text1.left(Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, text1.right(Match_MaxBits), + expected_loc + text1.length() - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + QString text2; + if (end_loc == -1) { + text2 = safeMid(text, start_loc, text1.length()); + } else { + text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.left(start_loc) + diff_text2(aPatch.diffs) + + safeMid(text, start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + QList diffs = diff_main(text1, text2, false); + if (text1.length() > Match_MaxBits + && diff_levenshtein(diffs) / static_cast (text1.length()) + > Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + int index1 = 0; + foreach(Diff aDiff, aPatch.diffs) { + if (aDiff.operation != EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == INSERT) { + // Insertion + text = text.left(start_loc + index2) + aDiff.text + + safeMid(text, start_loc + index2); + } else if (aDiff.operation == DELETE) { + // Deletion + text = text.left(start_loc + index2) + + safeMid(text, start_loc + diff_xIndex(diffs, + index1 + aDiff.text.length())); + } + } + if (aDiff.operation != DELETE) { + index1 += aDiff.text.length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid(text, nullPadding.length(), text.length() + - 2 * nullPadding.length()); + return QPair >(text, results); +} + + +QString diff_match_patch::patch_addPadding(QList &patches) { + short paddingLength = Patch_Margin; + QString nullPadding = ""; + for (short x = 1; x <= paddingLength; x++) { + nullPadding += QChar((ushort)x); + } + + // Bump all the patches forward. + QMutableListIterator pointer(patches); + while (pointer.hasNext()) { + Patch &aPatch = pointer.next(); + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch &firstPatch = patches.first(); + QList &firstPatchDiffs = firstPatch.diffs; + if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) { + // Add nullPadding equality. + firstPatchDiffs.prepend(Diff(EQUAL, nullPadding)); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } else if (paddingLength > firstPatchDiffs.first().text.length()) { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.first(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), + paddingLength - firstDiff.text.length()) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.first(); + QList &lastPatchDiffs = lastPatch.diffs; + if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) { + // Add nullPadding equality. + lastPatchDiffs.append(Diff(EQUAL, nullPadding)); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } else if (paddingLength > lastPatchDiffs.last().text.length()) { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.last(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.left(extraLength); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } + + return nullPadding; +} + + +void diff_match_patch::patch_splitMax(QList &patches) { + short patch_size = Match_MaxBits; + QString precontext, postcontext; + Patch patch; + int start1, start2; + bool empty; + Operation diff_type; + QString diff_text; + QMutableListIterator pointer(patches); + Patch bigpatch; + + if (pointer.hasNext()) { + bigpatch = pointer.next(); + } + + while (!bigpatch.isNull()) { + if (bigpatch.length1 <= patch_size) { + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + continue; + } + // Remove the big old patch. + pointer.remove(); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = ""; + while (!bigpatch.diffs.isEmpty()) { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (!precontext.isEmpty()) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.append(Diff(EQUAL, precontext)); + } + while (!bigpatch.diffs.isEmpty() + && patch.length1 < patch_size - Patch_Margin) { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if (diff_type == INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.append(bigpatch.diffs.front()); + bigpatch.diffs.removeFirst(); + empty = false; + } else if (diff_type == DELETE && patch.diffs.size() == 1 + && patch.diffs.front().operation == EQUAL + && diff_text.length() > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.append(Diff(diff_type, diff_text)); + bigpatch.diffs.removeFirst(); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.left(std::min(diff_text.length(), + patch_size - patch.length1 - Patch_Margin)); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.append(Diff(diff_type, diff_text)); + if (diff_text == bigpatch.diffs.front().text) { + bigpatch.diffs.removeFirst(); + } else { + bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, + diff_text.length()); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = safeMid(precontext, precontext.length() - Patch_Margin); + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).left(Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + if (!postcontext.isEmpty()) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if (!patch.diffs.isEmpty() + && patch.diffs.back().operation == EQUAL) { + patch.diffs.back().text += postcontext; + } else { + patch.diffs.append(Diff(EQUAL, postcontext)); + } + } + if (!empty) { + pointer.insert(patch); + } + } + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + } +} + + +QString diff_match_patch::patch_toText(const QList &patches) { + QString text; + foreach(Patch aPatch, patches) { + text.append(aPatch.toString()); + } + return text; +} + + +QList diff_match_patch::patch_fromText(const QString &textline) { + QList patches; + if (textline.isEmpty()) { + return patches; + } + QStringList text = textline.split("\n", QString::SkipEmptyParts); + Patch patch; + QRegExp patchHeader("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); + char sign; + QString line; + while (!text.isEmpty()) { + if (!patchHeader.exactMatch(text.front())) { + throw QString("Invalid patch string: %1").arg(text.front()); + } + + patch = Patch(); + patch.start1 = patchHeader.cap(1).toInt(); + if (patchHeader.cap(2).isEmpty()) { + patch.start1--; + patch.length1 = 1; + } else if (patchHeader.cap(2) == "0") { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = patchHeader.cap(2).toInt(); + } + + patch.start2 = patchHeader.cap(3).toInt(); + if (patchHeader.cap(4).isEmpty()) { + patch.start2--; + patch.length2 = 1; + } else if (patchHeader.cap(4) == "0") { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = patchHeader.cap(4).toInt(); + } + text.removeFirst(); + + while (!text.isEmpty()) { + if (text.front().isEmpty()) { + text.removeFirst(); + continue; + } + sign = text.front()[0].toAscii(); + line = safeMid(text.front(), 1); + line = line.replace("+", "%2B"); // decode would change all "+" to " " + line = QUrl::fromPercentEncoding(qPrintable(line)); + if (sign == '-') { + // Deletion. + patch.diffs.append(Diff(DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.append(Diff(INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.append(Diff(EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw QString("Invalid patch mode '%1' in: %2").arg(sign).arg(line); + return QList(); + } + text.removeFirst(); + } + + patches.append(patch); + + } + return patches; +} diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h new file mode 100644 index 0000000..82d3283 --- /dev/null +++ b/cpp/diff_match_patch.h @@ -0,0 +1,625 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_H +#define DIFF_MATCH_PATCH_H + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + * + * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): + * + * Code known to compile and run with Qt 4.3 through Qt 4.7. + * + * Here is a trivial sample program which works properly when linked with this + * library: + * + + #include + #include + #include + #include + #include + #include "diff_match_patch.h" + int main(int argc, char **argv) { + diff_match_patch dmp; + QString str1 = QString("First string in diff"); + QString str2 = QString("Second string in diff"); + + QString strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + QPair > out + = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + QString strResult = out.first; + + // here, strResult will equal str2 above. + return 0; + } + + */ + + +/**- +* The data structure representing a diff is a Linked list of Diff objects: +* {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), +* Diff(Operation.EQUAL, " world.")} +* which means: delete "Hello", add "Goodbye" and keep " world." +*/ +enum Operation { + DELETE, INSERT, EQUAL +}; + + +/** +* Class representing one diff operation. +*/ +class Diff { + public: + Operation operation; + // One of: INSERT, DELETE or EQUAL. + QString text; + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff(Operation _operation, const QString &_text); + Diff(); + inline bool isNull() const; + QString toString() const; + bool operator==(const Diff &d) const; + bool operator!=(const Diff &d) const; + + static QString strOperation(Operation op); +}; + + +/** +* Class representing one patch operation. +*/ +class Patch { + public: + QList diffs; + int start1; + int start2; + int length1; + int length2; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + bool isNull() const; + QString toString(); +}; + + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ +class diff_match_patch { + + friend class diff_match_patch_test; + + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int Match_Distance; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold; + // Chunk size for context length. + short Patch_Margin; + + // The number of bits in an int. + short Match_MaxBits; + + private: + // Define some regex patterns for matching boundaries. + static QRegExp BLANKLINEEND; + static QRegExp BLANKLINESTART; + + + public: + + diff_match_patch(); + + // DIFF FUNCTIONS + + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + QList diff_main(const QString &text1, const QString &text2); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + QList diff_main(const QString &text1, const QString &text2, bool checklines); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ + private: + QList diff_main(const QString &text1, const QString &text2, bool checklines, clock_t deadline); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + QList diff_compute(QString text1, QString text2, bool checklines, clock_t deadline); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + QList diff_lineMode(QString text1, QString text2, clock_t deadline); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + protected: + QList diff_bisect(const QString &text1, const QString &text2, clock_t deadline); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private: + QList diff_bisectSplit(const QString &text1, const QString &text2, int x, int y, clock_t deadline); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ + protected: + QList diff_linesToChars(const QString &text1, const QString &text2); // return elems 0 and 1 are QString, elem 2 is QStringList + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private: + QString diff_linesToCharsMunge(const QString &text, QStringList &lineArray, + QMap &lineHash); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ + private: + void diff_charsToLines(QList &diffs, const QStringList &lineArray); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public: + int diff_commonPrefix(const QString &text1, const QString &text2); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public: + int diff_commonSuffix(const QString &text1, const QString &text2); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected: + int diff_commonOverlap(const QString &text1, const QString &text2); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + protected: + QStringList diff_halfMatch(const QString &text1, const QString &text2); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private: + QStringList diff_halfMatchI(const QString &longtext, const QString &shorttext, int i); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemantic(QList &diffs); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemanticLossless(QList &diffs); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private: + int diff_cleanupSemanticScore(const QString &one, const QString &two); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupEfficiency(QList &diffs); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupMerge(QList &diffs); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public: + int diff_xIndex(const QList &diffs, int loc); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ + public: + QString diff_prettyHtml(const QList &diffs); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ + public: + QString diff_text1(const QList &diffs); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ + public: + QString diff_text2(const QList &diffs); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ + public: + int diff_levenshtein(const QList &diffs); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ + public: + QString diff_toDelta(const QList &diffs); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws QString If invalid input. + */ + public: + QList diff_fromDelta(const QString &text1, const QString &delta); + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public: + int match_main(const QString &text, const QString &pattern, int loc); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected: + int match_bitap(const QString &text, const QString &pattern, int loc); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private: + double match_bitapScore(int e, int x, int loc, const QString &pattern); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected: + QMap match_alphabet(const QString &pattern); + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected: + void patch_addContext(Patch &patch, const QString &text); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ + public: + QList patch_make(const QString &text1, const QString &text2); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + QList patch_make(const QList &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const QString &text1, const QList &diffs). + */ + public: + QList patch_make(const QString &text1, const QString &text2, const QList &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + QList patch_make(const QString &text1, const QList &diffs); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ + public: + QList patch_deepCopy(QList &patches); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ + public: + QPair > patch_apply(QList &patches, const QString &text); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ + public: + QString patch_addPadding(QList &patches); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ + public: + void patch_splitMax(QList &patches); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public: + QString patch_toText(const QList &patches); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws QString If invalid input. + */ + public: + QList patch_fromText(const QString &textline); + + /** + * A safer version of QString.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + private: + static inline QString safeMid(const QString &str, int pos) { + return (pos == str.length()) ? QString("") : str.mid(pos); + } + + /** + * A safer version of QString.mid(pos, len). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + private: + static inline QString safeMid(const QString &str, int pos, int len) { + return (pos == str.length()) ? QString("") : str.mid(pos, len); + } +}; + +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp/diff_match_patch.pro b/cpp/diff_match_patch.pro new file mode 100644 index 0000000..8052797 --- /dev/null +++ b/cpp/diff_match_patch.pro @@ -0,0 +1,19 @@ +#QT += sql xml network +TEMPLATE = app +CONFIG += qt debug_and_release + +mac { + CONFIG -= app_bundle +} + +# don't embed the manifest for now (doesn't work :( ) +#CONFIG -= embed_manifest_exe + +FORMS = + +HEADERS = diff_match_patch.h diff_match_patch_test.h + +SOURCES = diff_match_patch.cpp diff_match_patch_test.cpp + +RESOURCES = + diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp new file mode 100644 index 0000000..f75b1cd --- /dev/null +++ b/cpp/diff_match_patch_test.cpp @@ -0,0 +1,1197 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Code known to compile and run with Qt 4.3 through Qt 4.7. +#include +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" + +int main(int argc, char **argv) { + diff_match_patch_test dmp_test; + qDebug("Starting diff_match_patch unit tests."); + dmp_test.run_all_tests(); + qDebug("Done."); + return 0; + Q_UNUSED(argc) + Q_UNUSED(argv) +} + + +diff_match_patch_test::diff_match_patch_test() { +} + +void diff_match_patch_test::run_all_tests() { + QTime t; + t.start(); + try { + testDiffCommonPrefix(); + testDiffCommonSuffix(); + testDiffCommonOverlap(); + testDiffHalfmatch(); + testDiffLinesToChars(); + testDiffCharsToLines(); + testDiffCleanupMerge(); + testDiffCleanupSemanticLossless(); + testDiffCleanupSemantic(); + testDiffCleanupEfficiency(); + testDiffPrettyHtml(); + testDiffText(); + testDiffDelta(); + testDiffXIndex(); + testDiffLevenshtein(); + testDiffBisect(); + testDiffMain(); + + testMatchAlphabet(); + testMatchBitap(); + testMatchMain(); + + testPatchObj(); + testPatchFromText(); + testPatchToText(); + testPatchAddContext(); + testPatchMake(); + testPatchSplitMax(); + testPatchAddPadding(); + testPatchApply(); + qDebug("All tests passed."); + } catch (QString strCase) { + qDebug("Test failed: %s", qPrintable(strCase)); + } + qDebug("Total time: %d ms", t.elapsed()); +} + +// DIFF TEST FUNCTIONS + +void diff_match_patch_test::testDiffCommonPrefix() { + // Detect any common prefix. + assertEquals("diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix("abc", "xyz")); + + assertEquals("diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + + assertEquals("diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix("1234", "1234xyz")); +} + +void diff_match_patch_test::testDiffCommonSuffix() { + // Detect any common suffix. + assertEquals("diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix("abc", "xyz")); + + assertEquals("diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + + assertEquals("diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix("1234", "xyz1234")); +} + +void diff_match_patch_test::testDiffCommonOverlap() { + // Detect any suffix/prefix overlap. + assertEquals("diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap("", "abcd")); + + assertEquals("diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap("abc", "abcd")); + + assertEquals("diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap("123456", "abcd")); + + assertEquals("diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals("diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap("fi", QString::fromWCharArray((const wchar_t*) L"\ufb01i", 2))); +} + +void diff_match_patch_test::testDiffHalfmatch() { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty("diff_halfMatch: No match #1.", dmp.diff_halfMatch("1234567890", "abcdef")); + + assertEmpty("diff_halfMatch: No match #2.", dmp.diff_halfMatch("12345", "23")); + + assertEquals("diff_halfMatch: Single Match #1.", QString("12,90,a,z,345678").split(","), dmp.diff_halfMatch("1234567890", "a345678z")); + + assertEquals("diff_halfMatch: Single Match #2.", QString("a,z,12,90,345678").split(","), dmp.diff_halfMatch("a345678z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #3.", QString("abc,z,1234,0,56789").split(","), dmp.diff_halfMatch("abc56789z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #4.", QString("a,xyz,1,7890,23456").split(","), dmp.diff_halfMatch("a23456xyz", "1234567890")); + + assertEquals("diff_halfMatch: Multiple Matches #1.", QString("12123,123121,a,z,1234123451234").split(","), dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + + assertEquals("diff_halfMatch: Multiple Matches #2.", QString(",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=").split(","), dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + + assertEquals("diff_halfMatch: Multiple Matches #3.", QString("-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y").split(","), dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquals("diff_halfMatch: Non-optimal halfmatch.", QString("qHillo,w,x,Hulloy,HelloHe").split(","), dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + + dmp.Diff_Timeout = 0; + assertEmpty("diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); +} + +void diff_match_patch_test::testDiffLinesToChars() { + // Convert lines down to characters. + QStringList tmpVector; + QList tmpVarList; + tmpVector.append(""); + tmpVector.append("alpha\n"); + tmpVector.append("beta\n"); + tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); //(("\u0001\u0002\u0001")); + tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // (("\u0002\u0001\u0002")); + tmpVarList << QVariant::fromValue(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append(""); + tmpVector.append("alpha\r\n"); + tmpVector.append("beta\r\n"); + tmpVector.append("\r\n"); + tmpVarList << QVariant::fromValue(QString("")); + tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)3) + QChar((ushort)3)); // (("\u0001\u0002\u0003\u0003")); + tmpVarList << QVariant::fromValue(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.append(""); + tmpVector.append("a"); + tmpVector.append("b"); + tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1)); // (("\u0001")); + tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2)); // (("\u0002")); + tmpVarList << QVariant::fromValue(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("a", "b")); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + QString lines; + QString chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.append(QString::number(x) + "\n"); + lines += QString::number(x) + "\n"; + chars += QChar(static_cast(x)); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.prepend(""); + tmpVarList << QVariant::fromValue(chars); + tmpVarList << QVariant::fromValue(QString("")); + tmpVarList << QVariant::fromValue(tmpVector); + assertEquals("diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, "")); +} + +void diff_match_patch_test::testDiffCharsToLines() { + // First check that Diff equality works. + assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); + + assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); + + // Convert chars up to lines. + QList diffs; + diffs << Diff(EQUAL, QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); // ("\u0001\u0002\u0001"); + diffs << Diff(INSERT, QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // ("\u0002\u0001\u0002"); + QStringList tmpVector; + tmpVector.append(""); + tmpVector.append("alpha\n"); + tmpVector.append("beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines:", diffList(Diff(EQUAL, "alpha\nbeta\nalpha\n"), Diff(INSERT, "beta\nalpha\nbeta\n")), diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + QList tmpVarList; + QString lines; + QString chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.append(QString::number(x) + "\n"); + lines += QString::number(x) + "\n"; + chars += QChar(static_cast(x)); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.prepend(""); + diffs = diffList(Diff(DELETE, chars)); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); +} + +void diff_match_patch_test::testDiffCleanupMerge() { + // Cleanup a messy diff. + QList diffs; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Null case.", diffList(), diffs); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: No change case.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")), diffs); + + diffs = diffList(Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge equalities.", diffList(Diff(EQUAL, "abc")), diffs); + + diffs = diffList(Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge deletions.", diffList(Diff(DELETE, "abc")), diffs); + + diffs = diffList(Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge insertions.", diffList(Diff(INSERT, "abc")), diffs); + + diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge interweave.", diffList(Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")), diffs); + + diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "c")), diffs); + + diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc"), Diff(EQUAL, "y")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "cy")), diffs); + + diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left.", diffList(Diff(INSERT, "ab"), Diff(EQUAL, "ac")), diffs); + + diffs = diffList(Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right.", diffList(Diff(EQUAL, "ca"), Diff(INSERT, "ba")), diffs); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), Diff(DELETE, "ac"), Diff(EQUAL, "x")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left recursive.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "acx")), diffs); + + diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), Diff(DELETE, "b"), Diff(EQUAL, "a")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right recursive.", diffList(Diff(EQUAL, "xca"), Diff(DELETE, "cba")), diffs); +} + +void diff_match_patch_test::testDiffCleanupSemanticLossless() { + // Slide diffs to match logical boundaries. + QList diffs = diffList(); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); + + diffs = diffList(Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), Diff(EQUAL, "\r\nEEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Blank lines.", diffList(Diff(EQUAL, "AAA\r\n\r\n"), Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), Diff(EQUAL, "BBB\r\nEEE")), diffs); + + diffs = diffList(Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), Diff(EQUAL, " EEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Line boundaries.", diffList(Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), Diff(EQUAL, "BBB EEE")), diffs); + + diffs = diffList(Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), Diff(EQUAL, "at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), Diff(EQUAL, "at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", diffList(Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the start.", diffList(Diff(DELETE, "a"), Diff(EQUAL, "aax")), diffs); + + diffs = diffList(Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the end.", diffList(Diff(EQUAL, "xaa"), Diff(DELETE, "a")), diffs); + + diffs = diffList(Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), Diff(EQUAL, "yyy.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Sentence boundaries.", diffList(Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), Diff(EQUAL, " The yyy.")), diffs); +} + +void diff_match_patch_test::testDiffCleanupSemantic() { + // Cleanup semantically trivial equalities. + QList diffs = diffList(); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); + + diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #1.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")), diffs); + + diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #2.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")), diffs); + + diffs = diffList(Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Simple elimination.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "b")), diffs); + + diffs = diffList(Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), Diff(EQUAL, "f"), Diff(INSERT, "g")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Backpass elimination.", diffList(Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")), diffs); + + diffs = diffList(Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Multiple elimination.", diffList(Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")), diffs); + + diffs = diffList(Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), Diff(EQUAL, "at.")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No overlap elimination.", diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")), diffs); + + diffs = diffList(Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Overlap elimination.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), Diff(INSERT, "def")), diffs); + + diffs = diffList(Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", diffList(Diff(INSERT, "def"), Diff(EQUAL, "xxx"), Diff(DELETE, "abc")), diffs); + + diffs = diffList(Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Two overlap eliminations.", diffList(Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), Diff(INSERT, "efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")), diffs); +} + +void diff_match_patch_test::testDiffCleanupEfficiency() { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + QList diffs = diffList(); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Null case.", diffList(), diffs); + + diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: No elimination.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")), diffs); + + diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Four-edit elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")), diffs); + + diffs = diffList(Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Three-edit elimination.", diffList(Diff(DELETE, "xcd"), Diff(INSERT, "12x34")), diffs); + + diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), Diff(INSERT, "56")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Backpass elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")), diffs); + + dmp.Diff_EditCost = 5; + diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: High cost elimination.", diffList(Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")), diffs); + dmp.Diff_EditCost = 4; +} + +void diff_match_patch_test::testDiffPrettyHtml() { + // Pretty print. + QList diffs = diffList(Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")); + assertEquals("diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); +} + +void diff_match_patch_test::testDiffText() { + // Compute the source and destination texts. + QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy")); + assertEquals("diff_text1:", "jumps over the lazy", dmp.diff_text1(diffs)); + assertEquals("diff_text2:", "jumped over a lazy", dmp.diff_text2(diffs)); +} + +void diff_match_patch_test::testDiffDelta() { + // Convert a diff into delta string. + QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")); + QString text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Base text.", "jumps over the lazy", text1); + + QString delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + try { + dmp.diff_fromDelta(text1 + "x", delta); + assertFalse("diff_fromDelta: Too long.", true); + } catch (QString ex) { + // Exception expected. + } + + // Generates error (19 > 18). + try { + dmp.diff_fromDelta(text1.mid(1), delta); + assertFalse("diff_fromDelta: Too short.", true); + } catch (QString ex) { + // Exception expected. + } + + // Generates error (%c3%xy invalid Unicode). + /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" + try { + dmp.diff_fromDelta("", "+%c3%xy"); + assertFalse("diff_fromDelta: Invalid character.", true); + } catch (QString ex) { + // Exception expected. + } + */ + + // Test deltas with special characters. + diffs = diffList(Diff(EQUAL, QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %", 7)), Diff(DELETE, QString::fromWCharArray((const wchar_t*) L"\u0681 \001 \n ^", 7)), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0682 \002 \\ |", 7))); + text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Unicode text.", QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %\u0681 \001 \n ^", 14), text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta); + + assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = diffList(Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); + QString text2 = dmp.diff_text2(diffs); + assertEquals("diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta("", delta)); +} + +void diff_match_patch_test::testDiffXIndex() { + // Translate a location in text1 to text2. + QList diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); + assertEquals("diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")); + assertEquals("diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); +} + +void diff_match_patch_test::testDiffLevenshtein() { + QList diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); + assertEquals("diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); + + diffs = diffList(Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")); + assertEquals("diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein(diffs)); + + diffs = diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")); + assertEquals("diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein(diffs)); +} + +void diff_match_patch_test::testDiffBisect() { + // Normal. + QString a = "cat"; + QString b = "map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + QList diffs = diffList(Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), Diff(DELETE, "t"), Diff(INSERT, "p")); + assertEquals("diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); + + // Timeout. + diffs = diffList(Diff(DELETE, "cat"), Diff(INSERT, "map")); + assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); +} + +void diff_match_patch_test::testDiffMain() { + // Perform a trivial diff. + QList diffs = diffList(); + assertEquals("diff_main: Null case.", diffs, dmp.diff_main("", "", false)); + + diffs = diffList(Diff(EQUAL, "abc")); + assertEquals("diff_main: Equality.", diffs, dmp.diff_main("abc", "abc", false)); + + diffs = diffList(Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")); + assertEquals("diff_main: Simple insertion.", diffs, dmp.diff_main("abc", "ab123c", false)); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")); + assertEquals("diff_main: Simple deletion.", diffs, dmp.diff_main("a123bc", "abc", false)); + + diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), Diff(INSERT, "456"), Diff(EQUAL, "c")); + assertEquals("diff_main: Two insertions.", diffs, dmp.diff_main("abc", "a123b456c", false)); + + diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), Diff(DELETE, "456"), Diff(EQUAL, "c")); + assertEquals("diff_main: Two deletions.", diffs, dmp.diff_main("a123b456c", "abc", false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b")); + assertEquals("diff_main: Simple case #1.", diffs, dmp.diff_main("a", "b", false)); + + diffs = diffList(Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")); + assertEquals("diff_main: Simple case #2.", diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + + diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0680", 1)), Diff(EQUAL, "x"), Diff(DELETE, "\t"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\000", 1))); + assertEquals("diff_main: Simple case #3.", diffs, dmp.diff_main("ax\t", QString::fromWCharArray((const wchar_t*) L"\u0680x\000", 3), false)); + + diffs = diffList(Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")); + assertEquals("diff_main: Overlap #1.", diffs, dmp.diff_main("1ayb2", "abxab", false)); + + diffs = diffList(Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")); + assertEquals("diff_main: Overlap #2.", diffs, dmp.diff_main("abcy", "xaxcxabc", false)); + + diffs = diffList(Diff(DELETE, "ABCD"), Diff(EQUAL, "a"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "bcd"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "efghijklmnopqrs"), Diff(DELETE, "EFGHIJKLMNOefg")); + assertEquals("diff_main: Overlap #3.", diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)); + + diffs = diffList(Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")); + assertEquals("diff_main: Large equality.", diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. + QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main(a, b); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue("diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue("diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + QStringList texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + QStringList texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); + + // Test null inputs. + try { + dmp.diff_main(NULL, NULL); + assertFalse("diff_main: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } +} + + +// MATCH TEST FUNCTIONS + + +void diff_match_patch_test::testMatchAlphabet() { + // Initialise the bitmasks for Bitap. + QMap bitmask; + bitmask.insert('a', 4); + bitmask.insert('b', 2); + bitmask.insert('c', 1); + assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); + + bitmask = QMap(); + bitmask.insert('a', 37); + bitmask.insert('b', 18); + bitmask.insert('c', 8); + assertEquals("match_alphabet: Duplicates.", bitmask, dmp.match_alphabet("abcaba")); +} + +void diff_match_patch_test::testMatchBitap() { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Exact match #1.", 5, dmp.match_bitap("abcdefghijk", "fgh", 5)); + + assertEquals("match_bitap: Exact match #2.", 5, dmp.match_bitap("abcdefghijk", "fgh", 0)); + + assertEquals("match_bitap: Fuzzy match #1.", 4, dmp.match_bitap("abcdefghijk", "efxhi", 0)); + + assertEquals("match_bitap: Fuzzy match #2.", 2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + + assertEquals("match_bitap: Fuzzy match #3.", -1, dmp.match_bitap("abcdefghijk", "bxy", 1)); + + assertEquals("match_bitap: Overflow.", 2, dmp.match_bitap("123456789xx0", "3456789x0", 2)); + + assertEquals("match_bitap: Before start match.", 0, dmp.match_bitap("abcdef", "xxabc", 4)); + + assertEquals("match_bitap: Beyond end match.", 3, dmp.match_bitap("abcdef", "defyy", 4)); + + assertEquals("match_bitap: Oversized pattern.", 0, dmp.match_bitap("abcdef", "xabcdefy", 0)); + + dmp.Match_Threshold = 0.4f; + assertEquals("match_bitap: Threshold #1.", 4, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.3f; + assertEquals("match_bitap: Threshold #2.", -1, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.0f; + assertEquals("match_bitap: Threshold #3.", 1, dmp.match_bitap("abcdefghijk", "bcdef", 1)); + + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Multiple select #1.", 0, dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + + assertEquals("match_bitap: Multiple select #2.", 8, dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + + dmp.Match_Distance = 10; // Strict location. + assertEquals("match_bitap: Distance test #1.", -1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + + assertEquals("match_bitap: Distance test #2.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals("match_bitap: Distance test #3.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); +} + +void diff_match_patch_test::testMatchMain() { + // Full match. + assertEquals("match_main: Equality.", 0, dmp.match_main("abcdef", "abcdef", 1000)); + + assertEquals("match_main: Null text.", -1, dmp.match_main("", "abcdef", 1)); + + assertEquals("match_main: Null pattern.", 3, dmp.match_main("abcdef", "", 3)); + + assertEquals("match_main: Exact match.", 3, dmp.match_main("abcdef", "de", 3)); + + dmp.Match_Threshold = 0.7f; + assertEquals("match_main: Complex match.", 4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)); + dmp.Match_Threshold = 0.5f; + + // Test null inputs. + try { + dmp.match_main(NULL, NULL, 0); + assertFalse("match_main: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } +} + + +// PATCH TEST FUNCTIONS + + +void diff_match_patch_test::testPatchObj() { + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, "\nlaz")); + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("Patch: toString.", strp, p.toString()); +} + +void diff_match_patch_test::testPatchFromText() { + assertTrue("patch_fromText: #0.", dmp.patch_fromText("").isEmpty()); + + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch_fromText: #1.", strp, dmp.patch_fromText(strp).value(0).toString()); + + assertEquals("patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n").value(0).toString()); + + assertEquals("patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n").value(0).toString()); + + assertEquals("patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n").value(0).toString()); + + // Generates error. + try { + dmp.patch_fromText("Bad\nPatch\n"); + assertFalse("patch_fromText: #5.", true); + } catch (QString ex) { + // Exception expected. + } +} + +void diff_match_patch_test::testPatchToText() { + QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + QList patches; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); + + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Dual", strp, dmp.patch_toText(patches)); +} + +void diff_match_patch_test::testPatchAddContext() { + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); + dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); + + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); + dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); + assertEquals("patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); +} + +void diff_match_patch_test::testPatchMake() { + QList patches; + patches = dmp.patch_make("", ""); + assertEquals("patch_make: Null case", "", dmp.patch_toText(patches)); + + QString text1 = "The quick brown fox jumps over the lazy dog."; + QString text2 = "That quick brown fox jumped over a lazy dog."; + QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText(patches)); + + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); + + QList diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals("patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, diffs); + assertEquals("patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, text2, diffs); + assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); + assertEquals("patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); + + diffs = diffList(Diff(DELETE, "`1234567890-=[]\\;',./"), Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")); + assertEquals("patch_fromText: Character decoding.", diffs, dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").value(0).diffs); + + text1 = ""; + for (int x = 0; x < 100; x++) { + text1 += "abcdef"; + } + text2 = text1 + "123"; + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); + + // Test null inputs. + try { + dmp.patch_make(NULL, NULL); + assertFalse("patch_make: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } +} + +void diff_match_patch_test::testPatchSplitMax() { + // Assumes that Match_MaxBits is 32. + QList patches; + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); + QString oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); +} + +void diff_match_patch_test::testPatchAddPadding() { + QList patches; + patches = dmp.patch_make("", "test"); + assertEquals("patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("XY", "XtestY"); + assertEquals("patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); + assertEquals("patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); +} + +void diff_match_patch_test::testPatchApply() { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + QList patches; + patches = dmp.patch_make("", ""); + QPair > results = dmp.patch_apply(patches, "Hello world."); + QVector boolArray = results.second; + + QString resultStr = QString("%1\t%2").arg(results.first).arg(boolArray.count()); + assertEquals("patch_apply: Null case.", "Hello world.\t0", resultStr); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); + results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, "I am the very model of a modern major general."); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); + results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); + assertEquals("patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make("", "test"); + QString patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ""); + assertEquals("patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make("", "test"); + results = dmp.patch_apply(patches, ""); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); + assertEquals("patch_apply: Edge exact match.", "test\ttrue", resultStr); + + patches = dmp.patch_make("XY", "XtestY"); + results = dmp.patch_apply(patches, "XY"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); + assertEquals("patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr); + + patches = dmp.patch_make("y", "y123"); + results = dmp.patch_apply(patches, "x"); + boolArray = results.second; + resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); + assertEquals("patch_apply: Edge partial match.", "x123\ttrue", resultStr); +} + + +void diff_match_patch_test::assertEquals(const QString &strCase, int n1, int n2) { + if (n1 != n2) { + qDebug("%s FAIL\nExpected: %d\nActual: %d", qPrintable(strCase), n1, n2); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QString &s1, const QString &s2) { + if (s1 != s2) { + qDebug("%s FAIL\nExpected: %s\nActual: %s", + qPrintable(strCase), qPrintable(s1), qPrintable(s2)); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const Diff &d1, const Diff &d2) { + if (d1 != d2) { + qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), + qPrintable(d1.toString()), qPrintable(d2.toString())); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { + bool fail = false; + if (list1.count() == list2.count()) { + int i = 0; + foreach(Diff d1, list1) { + Diff d2 = list2.value(i); + if (d1 != d2) { + fail = true; + break; + } + i++; + } + } else { + fail = true; + } + + if (fail) { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach(Diff d1, list1) { + if (!first) { + listString1 += ", "; + } + listString1 += d1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach(Diff d2, list2) { + if (!first) { + listString2 += ", "; + } + listString2 += d2.toString(); + first = false; + } + listString2 += ")"; + qDebug("%s FAIL\nExpected: %s\nActual: %s", + qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { + bool fail = false; + if (list1.count() == list2.count()) { + int i = 0; + foreach(QVariant q1, list1) { + QVariant q2 = list2.value(i); + if (q1 != q2) { + fail = true; + break; + } + i++; + } + } else { + fail = true; + } + + if (fail) { + // Build human readable description of both lists. + QString listString1 = "("; + bool first = true; + foreach(QVariant q1, list1) { + if (!first) { + listString1 += ", "; + } + listString1 += q1.toString(); + first = false; + } + listString1 += ")"; + QString listString2 = "("; + first = true; + foreach(QVariant q2, list2) { + if (!first) { + listString2 += ", "; + } + listString2 += q2.toString(); + first = false; + } + listString2 += ")"; + qDebug("%s FAIL\nExpected: %s\nActual: %s", + qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2) { + if (var1 != var2) { + qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), + qPrintable(var1.toString()), qPrintable(var2.toString())); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QMap &m1, const QMap &m2) { + QMapIterator i1(m1), i2(m2); + + while (i1.hasNext() && i2.hasNext()) { + i1.next(); + i2.next(); + if (i1.key() != i2.key() || i1.value() != i2.value()) { + qDebug("%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable(strCase), + i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value()); + throw strCase; + } + } + + if (i1.hasNext()) { + i1.next(); + qDebug("%s FAIL\nExpected: (%c, %d)\nActual: none", + qPrintable(strCase), i1.key().toAscii(), i1.value()); + throw strCase; + } + if (i2.hasNext()) { + i2.next(); + qDebug("%s FAIL\nExpected: none\nActual: (%c, %d)", + qPrintable(strCase), i2.key().toAscii(), i2.value()); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2) { + if (list1 != list2) { + qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), + qPrintable(list1.join(",")), qPrintable(list2.join(","))); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertTrue(const QString &strCase, bool value) { + if (!value) { + qDebug("%s FAIL\nExpected: true\nActual: false", qPrintable(strCase)); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + +void diff_match_patch_test::assertFalse(const QString &strCase, bool value) { + if (value) { + qDebug("%s FAIL\nExpected: false\nActual: true", qPrintable(strCase)); + throw strCase; + } + qDebug("%s OK", qPrintable(strCase)); +} + + +// Construct the two texts which made up the diff originally. +QStringList diff_match_patch_test::diff_rebuildtexts(QList diffs) { + QStringList text; + text << QString("") << QString(""); + foreach (Diff myDiff, diffs) { + if (myDiff.operation != INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != DELETE) { + text[1] += myDiff.text; + } + } + return text; +} + +void diff_match_patch_test::assertEmpty(const QString &strCase, const QStringList &list) { + if (!list.isEmpty()) { + throw strCase; + } +} + + +// Private function for quickly building lists of diffs. +QList diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, + Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + QList listRet; + if (d1.operation == INSERT && d1.text == NULL) { + return listRet; + } + listRet << d1; + + if (d2.operation == INSERT && d2.text == NULL) { + return listRet; + } + listRet << d2; + + if (d3.operation == INSERT && d3.text == NULL) { + return listRet; + } + listRet << d3; + + if (d4.operation == INSERT && d4.text == NULL) { + return listRet; + } + listRet << d4; + + if (d5.operation == INSERT && d5.text == NULL) { + return listRet; + } + listRet << d5; + + if (d6.operation == INSERT && d6.text == NULL) { + return listRet; + } + listRet << d6; + + if (d7.operation == INSERT && d7.text == NULL) { + return listRet; + } + listRet << d7; + + if (d8.operation == INSERT && d8.text == NULL) { + return listRet; + } + listRet << d8; + + if (d9.operation == INSERT && d9.text == NULL) { + return listRet; + } + listRet << d9; + + if (d10.operation == INSERT && d10.text == NULL) { + return listRet; + } + listRet << d10; + + return listRet; +} + + +/* +Compile instructions for MinGW and QT4 on Windows: +qmake -project +qmake +mingw32-make +g++ -o diff_match_patch_test debug\diff_match_patch_test.o debug\diff_match_patch.o \qt4\lib\libQtCore4.a +diff_match_patch_test.exe + +Compile insructions for OS X: +qmake -spec macx-g++ +make +./diff_match_patch +*/ diff --git a/cpp/diff_match_patch_test.h b/cpp/diff_match_patch_test.h new file mode 100644 index 0000000..9792222 --- /dev/null +++ b/cpp/diff_match_patch_test.h @@ -0,0 +1,89 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_TEST_H +#define DIFF_MATCH_PATCH_TEST_H + +class diff_match_patch_test { + public: + diff_match_patch_test(); + void run_all_tests(); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + + private: + diff_match_patch dmp; + + // Define equality. + void assertEquals(const QString &strCase, int n1, int n2); + void assertEquals(const QString &strCase, const QString &s1, const QString &s2); + void assertEquals(const QString &strCase, const Diff &d1, const Diff &d2); + void assertEquals(const QString &strCase, const QList &list1, const QList &list2); + void assertEquals(const QString &strCase, const QList &list1, const QList &list2); + void assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2); + void assertEquals(const QString &strCase, const QMap &m1, const QMap &m2); + void assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2); + void assertTrue(const QString &strCase, bool value); + void assertFalse(const QString &strCase, bool value); + void assertEmpty(const QString &strCase, const QStringList &list); + + // Construct the two texts which made up the diff originally. + QStringList diff_rebuildtexts(QList diffs); + // Private function for quickly building lists of diffs. + QList diffList( + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), + Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), + Diff d5 = Diff(INSERT, NULL), Diff d6 = Diff(INSERT, NULL), + Diff d7 = Diff(INSERT, NULL), Diff d8 = Diff(INSERT, NULL), + Diff d9 = Diff(INSERT, NULL), Diff d10 = Diff(INSERT, NULL)); +}; + +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/csharp/DiffMatchPatch.cs b/csharp/DiffMatchPatch.cs new file mode 100644 index 0000000..e96dc94 --- /dev/null +++ b/csharp/DiffMatchPatch.cs @@ -0,0 +1,2299 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Web; + +namespace DiffMatchPatch { + internal static class CompatibilityExtensions { + // JScript splice function + public static List Splice(this List input, int start, int count, + params T[] objects) { + List deletedRange = input.GetRange(start, count); + input.RemoveRange(start, count); + input.InsertRange(start, objects); + + return deletedRange; + } + + // Java substring function + public static string JavaSubstring(this string s, int begin, int end) { + return s.Substring(begin, end - begin); + } + } + + /**- + * The data structure representing a diff is a List of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ + public enum Operation { + DELETE, INSERT, EQUAL + } + + + /** + * Class representing one diff operation. + */ + public class Diff { + public Operation operation; + // One of: INSERT, DELETE or EQUAL. + public string text; + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + public Diff(Operation operation, string text) { + // Construct a diff with the specified operation and text. + this.operation = operation; + this.text = text; + } + + /** + * Display a human-readable version of this Diff. + * @return text version. + */ + public override string ToString() { + string prettyText = this.text.Replace('\n', '\u00b6'); + return "Diff(" + this.operation + ",\"" + prettyText + "\")"; + } + + /** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against. + * @return true or false. + */ + public override bool Equals(Object obj) { + // If parameter is null return false. + if (obj == null) { + return false; + } + + // If parameter cannot be cast to Diff return false. + Diff p = obj as Diff; + if ((System.Object)p == null) { + return false; + } + + // Return true if the fields match. + return p.operation == this.operation && p.text == this.text; + } + + public bool Equals(Diff obj) { + // If parameter is null return false. + if (obj == null) { + return false; + } + + // Return true if the fields match. + return obj.operation == this.operation && obj.text == this.text; + } + + public override int GetHashCode() { + return text.GetHashCode() ^ operation.GetHashCode(); + } + } + + + /** + * Class representing one patch operation. + */ + public class Patch { + public List diffs = new List(); + public int start1; + public int start2; + public int length1; + public int length2; + + /** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return The GNU diff string. + */ + public override string ToString() { + string coords1, coords2; + if (this.length1 == 0) { + coords1 = this.start1 + ",0"; + } else if (this.length1 == 1) { + coords1 = Convert.ToString(this.start1 + 1); + } else { + coords1 = (this.start1 + 1) + "," + this.length1; + } + if (this.length2 == 0) { + coords2 = this.start2 + ",0"; + } else if (this.length2 == 1) { + coords2 = Convert.ToString(this.start2 + 1); + } else { + coords2 = (this.start2 + 1) + "," + this.length2; + } + StringBuilder text = new StringBuilder(); + text.Append("@@ -").Append(coords1).Append(" +").Append(coords2) + .Append(" @@\n"); + // Escape the body of the patch with %xx notation. + foreach (Diff aDiff in this.diffs) { + switch (aDiff.operation) { + case Operation.INSERT: + text.Append('+'); + break; + case Operation.DELETE: + text.Append('-'); + break; + case Operation.EQUAL: + text.Append(' '); + break; + } + + text.Append(HttpUtility.UrlEncode(aDiff.text, + new UTF8Encoding()).Replace('+', ' ')).Append("\n"); + } + + return diff_match_patch.unescapeForEncodeUriCompatability( + text.ToString()); + } + } + + + /** + * Class containing the diff, match and patch methods. + * Also Contains the behaviour settings. + */ + public class diff_match_patch { + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + public float Diff_Timeout = 1.0f; + // Cost of an empty edit operation in terms of edit characters. + public short Diff_EditCost = 4; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + public float Match_Threshold = 0.5f; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + public int Match_Distance = 1000; + // When deleting a large block of text (over ~64 characters), how close + // do the contents have to be to match the expected contents. (0.0 = + // perfection, 1.0 = very loose). Note that Match_Threshold controls + // how closely the end points of a delete need to match. + public float Patch_DeleteThreshold = 0.5f; + // Chunk size for context length. + public short Patch_Margin = 4; + + // The number of bits in an int. + private short Match_MaxBits = 32; + + + // DIFF FUNCTIONS + + + /** + * Find the differences between two texts. + * Run a faster, slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return List of Diff objects. + */ + public List diff_main(string text1, string text2) { + return diff_main(text1, text2, true); + } + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return List of Diff objects. + */ + public List diff_main(string text1, string text2, bool checklines) { + // Set a deadline by which time the diff must be complete. + DateTime deadline; + if (this.Diff_Timeout <= 0) { + deadline = DateTime.MaxValue; + } else { + deadline = DateTime.Now + + new TimeSpan(((long)(Diff_Timeout * 1000)) * 10000); + } + return diff_main(text1, text2, checklines, deadline); + } + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout + * instead. + * @return List of Diff objects. + */ + private List diff_main(string text1, string text2, bool checklines, + DateTime deadline) { + // Check for null inputs not needed since null can't be passed in C#. + + // Check for equality (speedup). + List diffs; + if (text1 == text2) { + diffs = new List(); + if (text1.Length != 0) { + diffs.Add(new Diff(Operation.EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + string commonprefix = text1.Substring(0, commonlength); + text1 = text1.Substring(commonlength); + text2 = text2.Substring(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(text1, text2); + string commonsuffix = text1.Substring(text1.Length - commonlength); + text1 = text1.Substring(0, text1.Length - commonlength); + text2 = text2.Substring(0, text2.Length - commonlength); + + // Compute the diff on the middle block. + diffs = diff_compute(text1, text2, checklines, deadline); + + // Restore the prefix and suffix. + if (commonprefix.Length != 0) { + diffs.Insert(0, (new Diff(Operation.EQUAL, commonprefix))); + } + if (commonsuffix.Length != 0) { + diffs.Add(new Diff(Operation.EQUAL, commonsuffix)); + } + + diff_cleanupMerge(diffs); + return diffs; + } + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return List of Diff objects. + */ + private List diff_compute(string text1, string text2, + bool checklines, DateTime deadline) { + List diffs = new List(); + + if (text1.Length == 0) { + // Just add some text (speedup). + diffs.Add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + if (text2.Length == 0) { + // Just delete some text (speedup). + diffs.Add(new Diff(Operation.DELETE, text1)); + return diffs; + } + + string longtext = text1.Length > text2.Length ? text1 : text2; + string shorttext = text1.Length > text2.Length ? text2 : text1; + int i = longtext.IndexOf(shorttext, StringComparison.Ordinal); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + Operation op = (text1.Length > text2.Length) ? + Operation.DELETE : Operation.INSERT; + diffs.Add(new Diff(op, longtext.Substring(0, i))); + diffs.Add(new Diff(Operation.EQUAL, shorttext)); + diffs.Add(new Diff(op, longtext.Substring(i + shorttext.Length))); + return diffs; + } + + if (shorttext.Length == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.Add(new Diff(Operation.DELETE, text1)); + diffs.Add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + // Check to see if the problem can be split in two. + string[] hm = diff_halfMatch(text1, text2); + if (hm != null) { + // A half-match was found, sort out the return data. + string text1_a = hm[0]; + string text1_b = hm[1]; + string text2_a = hm[2]; + string text2_b = hm[3]; + string mid_common = hm[4]; + // Send both pairs off for separate processing. + List diffs_a = diff_main(text1_a, text2_a, checklines, deadline); + List diffs_b = diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.Add(new Diff(Operation.EQUAL, mid_common)); + diffs.AddRange(diffs_b); + return diffs; + } + + if (checklines && text1.Length > 100 && text2.Length > 100) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); + } + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return List of Diff objects. + */ + private List diff_lineMode(string text1, string text2, + DateTime deadline) { + // Scan the text on a line-by-line basis first. + Object[] b = diff_linesToChars(text1, text2); + text1 = (string)b[0]; + text2 = (string)b[1]; + List linearray = (List)b[2]; + + List diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.Add(new Diff(Operation.EQUAL, string.Empty)); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + string text_delete = string.Empty; + string text_insert = string.Empty; + while (pointer < diffs.Count) { + switch (diffs[pointer].operation) { + case Operation.INSERT: + count_insert++; + text_insert += diffs[pointer].text; + break; + case Operation.DELETE: + count_delete++; + text_delete += diffs[pointer].text; + break; + case Operation.EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + diffs.RemoveRange(pointer - count_delete - count_insert, + count_delete + count_insert); + pointer = pointer - count_delete - count_insert; + List a = + this.diff_main(text_delete, text_insert, false, deadline); + diffs.InsertRange(pointer, a); + pointer = pointer + a.Count; + } + count_insert = 0; + count_delete = 0; + text_delete = string.Empty; + text_insert = string.Empty; + break; + } + pointer++; + } + diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. + + return diffs; + } + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time at which to bail if not yet complete. + * @return List of Diff objects. + */ + protected List diff_bisect(string text1, string text2, + DateTime deadline) { + // Cache the text lengths to prevent multiple calls. + int text1_length = text1.Length; + int text2_length = text2.Length; + int max_d = (text1_length + text2_length + 1) / 2; + int v_offset = max_d; + int v_length = 2 * max_d; + int[] v1 = new int[v_length]; + int[] v2 = new int[v_length]; + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (DateTime.Now > deadline) { + break; + } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1[x1] == text2[y1]) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1[text1_length - x2 - 1] + == text2[text2_length - y2 - 1]) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + List diffs = new List(); + diffs.Add(new Diff(Operation.DELETE, text1)); + diffs.Add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private List diff_bisectSplit(string text1, string text2, + int x, int y, DateTime deadline) { + string text1a = text1.Substring(0, x); + string text2a = text2.Substring(0, y); + string text1b = text1.Substring(x); + string text2b = text2.Substring(y); + + // Compute both diffs serially. + List diffs = diff_main(text1a, text2a, false, deadline); + List diffsb = diff_main(text1b, text2b, false, deadline); + + diffs.AddRange(diffsb); + return diffs; + } + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ + protected Object[] diff_linesToChars(string text1, string text2) { + List lineArray = new List(); + Dictionary lineHash = new Dictionary(); + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.Add(string.Empty); + + string chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); + string chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); + return new Object[] { chars1, chars2, lineArray }; + } + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private string diff_linesToCharsMunge(string text, List lineArray, + Dictionary lineHash) { + int lineStart = 0; + int lineEnd = -1; + string line; + StringBuilder chars = new StringBuilder(); + // Walk the text, pulling out a Substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.Length - 1) { + lineEnd = text.IndexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.Length - 1; + } + line = text.JavaSubstring(lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (lineHash.ContainsKey(line)) { + chars.Append(((char)(int)lineHash[line])); + } else { + lineArray.Add(line); + lineHash.Add(line, lineArray.Count - 1); + chars.Append(((char)(lineArray.Count - 1))); + } + } + return chars.ToString(); + } + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines + * of text. + * @param diffs List of Diff objects. + * @param lineArray List of unique strings. + */ + protected void diff_charsToLines(ICollection diffs, + List lineArray) { + StringBuilder text; + foreach (Diff diff in diffs) { + text = new StringBuilder(); + for (int y = 0; y < diff.text.Length; y++) { + text.Append(lineArray[diff.text[y]]); + } + diff.text = text.ToString(); + } + } + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public int diff_commonPrefix(string text1, string text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + int n = Math.Min(text1.Length, text2.Length); + for (int i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } + } + return n; + } + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public int diff_commonSuffix(string text1, string text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + int text1_length = text1.Length; + int text2_length = text2.Length; + int n = Math.Min(text1.Length, text2.Length); + for (int i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } + } + return n; + } + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected int diff_commonOverlap(string text1, string text2) { + // Cache the text lengths to prevent multiple calls. + int text1_length = text1.Length; + int text2_length = text2.Length; + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + if (text1_length > text2_length) { + text1 = text1.Substring(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2 = text2.Substring(0, text1_length); + } + int text_length = Math.Min(text1_length, text2_length); + // Quick check for the worst case. + if (text1 == text2) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + string pattern = text1.Substring(text_length - length); + int found = text2.IndexOf(pattern, StringComparison.Ordinal); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1.Substring(text_length - length) == + text2.Substring(0, length)) { + best = length; + length++; + } + } + } + + /** + * Do the two texts share a Substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + + protected string[] diff_halfMatch(string text1, string text2) { + if (this.Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return null; + } + string longtext = text1.Length > text2.Length ? text1 : text2; + string shorttext = text1.Length > text2.Length ? text2 : text1; + if (longtext.Length < 4 || shorttext.Length * 2 < longtext.Length) { + return null; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + string[] hm1 = diff_halfMatchI(longtext, shorttext, + (longtext.Length + 3) / 4); + // Check again based on the third quarter. + string[] hm2 = diff_halfMatchI(longtext, shorttext, + (longtext.Length + 1) / 2); + string[] hm; + if (hm1 == null && hm2 == null) { + return null; + } else if (hm2 == null) { + hm = hm1; + } else if (hm1 == null) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].Length > hm2[4].Length ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.Length > text2.Length) { + return hm; + //return new string[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; + } else { + return new string[] { hm[2], hm[3], hm[0], hm[1], hm[4] }; + } + } + + /** + * Does a Substring of shorttext exist within longtext such that the + * Substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length Substring within longtext. + * @return Five element string array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private string[] diff_halfMatchI(string longtext, string shorttext, int i) { + // Start with a 1/4 length Substring at position i as a seed. + string seed = longtext.Substring(i, longtext.Length / 4); + int j = -1; + string best_common = string.Empty; + string best_longtext_a = string.Empty, best_longtext_b = string.Empty; + string best_shorttext_a = string.Empty, best_shorttext_b = string.Empty; + while (j < shorttext.Length && (j = shorttext.IndexOf(seed, j + 1, + StringComparison.Ordinal)) != -1) { + int prefixLength = diff_commonPrefix(longtext.Substring(i), + shorttext.Substring(j)); + int suffixLength = diff_commonSuffix(longtext.Substring(0, i), + shorttext.Substring(0, j)); + if (best_common.Length < suffixLength + prefixLength) { + best_common = shorttext.Substring(j - suffixLength, suffixLength) + + shorttext.Substring(j, prefixLength); + best_longtext_a = longtext.Substring(0, i - suffixLength); + best_longtext_b = longtext.Substring(i + prefixLength); + best_shorttext_a = shorttext.Substring(0, j - suffixLength); + best_shorttext_b = shorttext.Substring(j + prefixLength); + } + } + if (best_common.Length * 2 >= longtext.Length) { + return new string[]{best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common}; + } else { + return null; + } + } + + /** + * Reduce the number of edits by eliminating semantically trivial + * equalities. + * @param diffs List of Diff objects. + */ + public void diff_cleanupSemantic(List diffs) { + bool changes = false; + // Stack of indices where equalities are found. + Stack equalities = new Stack(); + // Always equal to equalities[equalitiesLength-1][1] + string lastequality = null; + int pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + while (pointer < diffs.Count) { + if (diffs[pointer].operation == Operation.EQUAL) { // Equality found. + equalities.Push(pointer); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = diffs[pointer].text; + } else { // an insertion or deletion + if (diffs[pointer].operation == Operation.INSERT) { + length_insertions2 += diffs[pointer].text.Length; + } else { + length_deletions2 += diffs[pointer].text.Length; + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality != null && (lastequality.Length + <= Math.Max(length_insertions1, length_deletions1)) + && (lastequality.Length + <= Math.Max(length_insertions2, length_deletions2))) { + // Duplicate record. + diffs.Insert(equalities.Peek(), + new Diff(Operation.DELETE, lastequality)); + // Change second copy to insert. + diffs[equalities.Peek() + 1].operation = Operation.INSERT; + // Throw away the equality we just deleted. + equalities.Pop(); + if (equalities.Count > 0) { + equalities.Pop(); + } + pointer = equalities.Count > 0 ? equalities.Peek() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = null; + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.Count) { + if (diffs[pointer - 1].operation == Operation.DELETE && + diffs[pointer].operation == Operation.INSERT) { + string deletion = diffs[pointer - 1].text; + string insertion = diffs[pointer].text; + int overlap_length1 = diff_commonOverlap(deletion, insertion); + int overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.Length / 2.0 || + overlap_length1 >= insertion.Length / 2.0) { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.Insert(pointer, new Diff(Operation.EQUAL, + insertion.Substring(0, overlap_length1))); + diffs[pointer - 1].text = + deletion.Substring(0, deletion.Length - overlap_length1); + diffs[pointer + 1].text = insertion.Substring(overlap_length1); + pointer++; + } + } else { + if (overlap_length2 >= deletion.Length / 2.0 || + overlap_length2 >= insertion.Length / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.Insert(pointer, new Diff(Operation.EQUAL, + deletion.Substring(0, overlap_length2))); + diffs[pointer - 1].operation = Operation.INSERT; + diffs[pointer - 1].text = + insertion.Substring(0, insertion.Length - overlap_length2); + diffs[pointer + 1].operation = Operation.DELETE; + diffs[pointer + 1].text = deletion.Substring(overlap_length2); + pointer++; + } + } + pointer++; + } + pointer++; + } + } + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs List of Diff objects. + */ + public void diff_cleanupSemanticLossless(List diffs) { + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.Count - 1) { + if (diffs[pointer - 1].operation == Operation.EQUAL && + diffs[pointer + 1].operation == Operation.EQUAL) { + // This is a single edit surrounded by equalities. + string equality1 = diffs[pointer - 1].text; + string edit = diffs[pointer].text; + string equality2 = diffs[pointer + 1].text; + + // First, shift the edit as far left as possible. + int commonOffset = this.diff_commonSuffix(equality1, edit); + if (commonOffset > 0) { + string commonString = edit.Substring(edit.Length - commonOffset); + equality1 = equality1.Substring(0, equality1.Length - commonOffset); + edit = commonString + edit.Substring(0, edit.Length - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + string bestEquality1 = equality1; + string bestEdit = edit; + string bestEquality2 = equality2; + int bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (edit.Length != 0 && equality2.Length != 0 + && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = edit.Substring(1) + equality2[0]; + equality2 = equality2.Substring(1); + int score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on + // edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (diffs[pointer - 1].text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (bestEquality1.Length != 0) { + diffs[pointer - 1].text = bestEquality1; + } else { + diffs.RemoveAt(pointer - 1); + pointer--; + } + diffs[pointer].text = bestEdit; + if (bestEquality2.Length != 0) { + diffs[pointer + 1].text = bestEquality2; + } else { + diffs.RemoveAt(pointer + 1); + pointer--; + } + } + } + pointer++; + } + } + + /** + * Given two strings, comAdde a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private int diff_cleanupSemanticScore(string one, string two) { + if (one.Length == 0 || two.Length == 0) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + char char1 = one[one.Length - 1]; + char char2 = two[0]; + bool nonAlphaNumeric1 = !Char.IsLetterOrDigit(char1); + bool nonAlphaNumeric2 = !Char.IsLetterOrDigit(char2); + bool whitespace1 = nonAlphaNumeric1 && Char.IsWhiteSpace(char1); + bool whitespace2 = nonAlphaNumeric2 && Char.IsWhiteSpace(char2); + bool lineBreak1 = whitespace1 && Char.IsControl(char1); + bool lineBreak2 = whitespace2 && Char.IsControl(char2); + bool blankLine1 = lineBreak1 && BLANKLINEEND.IsMatch(one); + bool blankLine2 = lineBreak2 && BLANKLINESTART.IsMatch(two); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + // Define some regex patterns for matching boundaries. + private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z"); + private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n"); + + /** + * Reduce the number of edits by eliminating operationally trivial + * equalities. + * @param diffs List of Diff objects. + */ + public void diff_cleanupEfficiency(List diffs) { + bool changes = false; + // Stack of indices where equalities are found. + Stack equalities = new Stack(); + // Always equal to equalities[equalitiesLength-1][1] + string lastequality = string.Empty; + int pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while (pointer < diffs.Count) { + if (diffs[pointer].operation == Operation.EQUAL) { // Equality found. + if (diffs[pointer].text.Length < this.Diff_EditCost + && (post_ins || post_del)) { + // Candidate found. + equalities.Push(pointer); + pre_ins = post_ins; + pre_del = post_del; + lastequality = diffs[pointer].text; + } else { + // Not a candidate, and can never become one. + equalities.Clear(); + lastequality = string.Empty; + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer].operation == Operation.DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if ((lastequality.Length != 0) + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.Length < this.Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + + (post_del ? 1 : 0)) == 3))) { + // Duplicate record. + diffs.Insert(equalities.Peek(), + new Diff(Operation.DELETE, lastequality)); + // Change second copy to insert. + diffs[equalities.Peek() + 1].operation = Operation.INSERT; + equalities.Pop(); // Throw away the equality we just deleted. + lastequality = string.Empty; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.Clear(); + } else { + if (equalities.Count > 0) { + equalities.Pop(); + } + + pointer = equalities.Count > 0 ? equalities.Peek() : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if (changes) { + diff_cleanupMerge(diffs); + } + } + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs List of Diff objects. + */ + public void diff_cleanupMerge(List diffs) { + // Add a dummy entry at the end. + diffs.Add(new Diff(Operation.EQUAL, string.Empty)); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + string text_delete = string.Empty; + string text_insert = string.Empty; + int commonlength; + while (pointer < diffs.Count) { + switch (diffs[pointer].operation) { + case Operation.INSERT: + count_insert++; + text_insert += diffs[pointer].text; + pointer++; + break; + case Operation.DELETE: + count_delete++; + text_delete += diffs[pointer].text; + pointer++; + break; + case Operation.EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete != 0 && count_insert != 0) { + // Factor out any common prefixies. + commonlength = this.diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if ((pointer - count_delete - count_insert) > 0 && + diffs[pointer - count_delete - count_insert - 1].operation + == Operation.EQUAL) { + diffs[pointer - count_delete - count_insert - 1].text + += text_insert.Substring(0, commonlength); + } else { + diffs.Insert(0, new Diff(Operation.EQUAL, + text_insert.Substring(0, commonlength))); + pointer++; + } + text_insert = text_insert.Substring(commonlength); + text_delete = text_delete.Substring(commonlength); + } + // Factor out any common suffixies. + commonlength = this.diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + diffs[pointer].text = text_insert.Substring(text_insert.Length + - commonlength) + diffs[pointer].text; + text_insert = text_insert.Substring(0, text_insert.Length + - commonlength); + text_delete = text_delete.Substring(0, text_delete.Length + - commonlength); + } + } + // Delete the offending records and add the merged ones. + if (count_delete == 0) { + diffs.Splice(pointer - count_insert, + count_delete + count_insert, + new Diff(Operation.INSERT, text_insert)); + } else if (count_insert == 0) { + diffs.Splice(pointer - count_delete, + count_delete + count_insert, + new Diff(Operation.DELETE, text_delete)); + } else { + diffs.Splice(pointer - count_delete - count_insert, + count_delete + count_insert, + new Diff(Operation.DELETE, text_delete), + new Diff(Operation.INSERT, text_insert)); + } + pointer = pointer - count_delete - count_insert + + (count_delete != 0 ? 1 : 0) + (count_insert != 0 ? 1 : 0) + 1; + } else if (pointer != 0 + && diffs[pointer - 1].operation == Operation.EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1].text += diffs[pointer].text; + diffs.RemoveAt(pointer); + } else { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete = string.Empty; + text_insert = string.Empty; + break; + } + } + if (diffs[diffs.Count - 1].text.Length == 0) { + diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < (diffs.Count - 1)) { + if (diffs[pointer - 1].operation == Operation.EQUAL && + diffs[pointer + 1].operation == Operation.EQUAL) { + // This is a single edit surrounded by equalities. + if (diffs[pointer].text.EndsWith(diffs[pointer - 1].text, + StringComparison.Ordinal)) { + // Shift the edit over the previous equality. + diffs[pointer].text = diffs[pointer - 1].text + + diffs[pointer].text.Substring(0, diffs[pointer].text.Length - + diffs[pointer - 1].text.Length); + diffs[pointer + 1].text = diffs[pointer - 1].text + + diffs[pointer + 1].text; + diffs.Splice(pointer - 1, 1); + changes = true; + } else if (diffs[pointer].text.StartsWith(diffs[pointer + 1].text, + StringComparison.Ordinal)) { + // Shift the edit over the next equality. + diffs[pointer - 1].text += diffs[pointer + 1].text; + diffs[pointer].text = + diffs[pointer].text.Substring(diffs[pointer + 1].text.Length) + + diffs[pointer + 1].text; + diffs.Splice(pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + this.diff_cleanupMerge(diffs); + } + } + + /** + * loc is a location in text1, comAdde and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs List of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public int diff_xIndex(List diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff = null; + foreach (Diff aDiff in diffs) { + if (aDiff.operation != Operation.INSERT) { + // Equality or deletion. + chars1 += aDiff.text.Length; + } + if (aDiff.operation != Operation.DELETE) { + // Equality or insertion. + chars2 += aDiff.text.Length; + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff != null && lastDiff.operation == Operation.DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); + } + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs List of Diff objects. + * @return HTML representation. + */ + public string diff_prettyHtml(List diffs) { + StringBuilder html = new StringBuilder(); + foreach (Diff aDiff in diffs) { + string text = aDiff.text.Replace("&", "&").Replace("<", "<") + .Replace(">", ">").Replace("\n", "¶
"); + switch (aDiff.operation) { + case Operation.INSERT: + html.Append("").Append(text) + .Append(""); + break; + case Operation.DELETE: + html.Append("").Append(text) + .Append(""); + break; + case Operation.EQUAL: + html.Append("").Append(text).Append(""); + break; + } + } + return html.ToString(); + } + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs List of Diff objects. + * @return Source text. + */ + public string diff_text1(List diffs) { + StringBuilder text = new StringBuilder(); + foreach (Diff aDiff in diffs) { + if (aDiff.operation != Operation.INSERT) { + text.Append(aDiff.text); + } + } + return text.ToString(); + } + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs List of Diff objects. + * @return Destination text. + */ + public string diff_text2(List diffs) { + StringBuilder text = new StringBuilder(); + foreach (Diff aDiff in diffs) { + if (aDiff.operation != Operation.DELETE) { + text.Append(aDiff.text); + } + } + return text.ToString(); + } + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs List of Diff objects. + * @return Number of changes. + */ + public int diff_levenshtein(List diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + foreach (Diff aDiff in diffs) { + switch (aDiff.operation) { + case Operation.INSERT: + insertions += aDiff.text.Length; + break; + case Operation.DELETE: + deletions += aDiff.text.Length; + break; + case Operation.EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += Math.Max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += Math.Max(insertions, deletions); + return levenshtein; + } + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx + * notation. + * @param diffs Array of Diff objects. + * @return Delta text. + */ + public string diff_toDelta(List diffs) { + StringBuilder text = new StringBuilder(); + foreach (Diff aDiff in diffs) { + switch (aDiff.operation) { + case Operation.INSERT: + text.Append("+").Append(HttpUtility.UrlEncode(aDiff.text, + new UTF8Encoding()).Replace('+', ' ')).Append("\t"); + break; + case Operation.DELETE: + text.Append("-").Append(aDiff.text.Length).Append("\t"); + break; + case Operation.EQUAL: + text.Append("=").Append(aDiff.text.Length).Append("\t"); + break; + } + } + string delta = text.ToString(); + if (delta.Length != 0) { + // Strip off trailing tab character. + delta = delta.Substring(0, delta.Length - 1); + delta = unescapeForEncodeUriCompatability(delta); + } + return delta; + } + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, comAdde the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of Diff objects or null if invalid. + * @throws ArgumentException If invalid input. + */ + public List diff_fromDelta(string text1, string delta) { + List diffs = new List(); + int pointer = 0; // Cursor in text1 + string[] tokens = delta.Split(new string[] { "\t" }, + StringSplitOptions.None); + foreach (string token in tokens) { + if (token.Length == 0) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + string param = token.Substring(1); + switch (token[0]) { + case '+': + // decode would change all "+" to " " + param = param.Replace("+", "%2b"); + + param = HttpUtility.UrlDecode(param, new UTF8Encoding(false, true)); + //} catch (UnsupportedEncodingException e) { + // // Not likely on modern system. + // throw new Error("This system does not support UTF-8.", e); + //} catch (IllegalArgumentException e) { + // // Malformed URI sequence. + // throw new IllegalArgumentException( + // "Illegal escape in diff_fromDelta: " + param, e); + //} + diffs.Add(new Diff(Operation.INSERT, param)); + break; + case '-': + // Fall through. + case '=': + int n; + try { + n = Convert.ToInt32(param); + } catch (FormatException e) { + throw new ArgumentException( + "Invalid number in diff_fromDelta: " + param, e); + } + if (n < 0) { + throw new ArgumentException( + "Negative number in diff_fromDelta: " + param); + } + string text; + try { + text = text1.Substring(pointer, n); + pointer += n; + } catch (ArgumentOutOfRangeException e) { + throw new ArgumentException("Delta length (" + pointer + + ") larger than source text length (" + text1.Length + + ").", e); + } + if (token[0] == '=') { + diffs.Add(new Diff(Operation.EQUAL, text)); + } else { + diffs.Add(new Diff(Operation.DELETE, text)); + } + break; + default: + // Anything else is an error. + throw new ArgumentException( + "Invalid diff operation in diff_fromDelta: " + token[0]); + } + } + if (pointer != text1.Length) { + throw new ArgumentException("Delta length (" + pointer + + ") smaller than source text length (" + text1.Length + ")."); + } + return diffs; + } + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public int match_main(string text, string pattern, int loc) { + // Check for null inputs not needed since null can't be passed in C#. + + loc = Math.Max(0, Math.Min(loc, text.Length)); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.Length == 0) { + // Nothing to match. + return -1; + } else if (loc + pattern.Length <= text.Length + && text.Substring(loc, pattern.Length) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } + } + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected int match_bitap(string text, string pattern, int loc) { + // assert (Match_MaxBits == 0 || pattern.Length <= Match_MaxBits) + // : "Pattern too long for this application."; + + // Initialise the alphabet. + Dictionary s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.IndexOf(pattern, loc, StringComparison.Ordinal); + if (best_loc != -1) { + score_threshold = Math.Min(match_bitapScore(0, best_loc, loc, + pattern), score_threshold); + // What about in the other direction? (speedup) + best_loc = text.LastIndexOf(pattern, + Math.Min(loc + pattern.Length, text.Length), + StringComparison.Ordinal); + if (best_loc != -1) { + score_threshold = Math.Min(match_bitapScore(0, best_loc, loc, + pattern), score_threshold); + } + } + + // Initialise the bit arrays. + int matchmask = 1 << (pattern.Length - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.Length + text.Length; + // Empty initialization added to appease C# compiler. + int[] last_rd = new int[0]; + for (int d = 0; d < pattern.Length; d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + int start = Math.Max(1, loc - bin_mid + 1); + int finish = Math.Min(loc + bin_mid, text.Length) + pattern.Length; + + int[] rd = new int[finish + 2]; + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.Length <= j - 1 || !s.ContainsKey(text[j - 1])) { + // Out of range. + charMatch = 0; + } else { + charMatch = s[text[j - 1]]; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = Math.Max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = rd; + } + return best_loc; + } + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private double match_bitapScore(int e, int x, int loc, string pattern) { + float accuracy = (float)e / pattern.Length; + int proximity = Math.Abs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / (float) Match_Distance); + } + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected Dictionary match_alphabet(string pattern) { + Dictionary s = new Dictionary(); + char[] char_pattern = pattern.ToCharArray(); + foreach (char c in char_pattern) { + if (!s.ContainsKey(c)) { + s.Add(c, 0); + } + } + int i = 0; + foreach (char c in char_pattern) { + int value = s[c] | (1 << (pattern.Length - i - 1)); + s[c] = value; + i++; + } + return s; + } + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected void patch_addContext(Patch patch, string text) { + if (text.Length == 0) { + return; + } + string pattern = text.Substring(patch.start2, patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two + // different matches are found, increase the pattern length. + while (text.IndexOf(pattern, StringComparison.Ordinal) + != text.LastIndexOf(pattern, StringComparison.Ordinal) + && pattern.Length < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = text.JavaSubstring(Math.Max(0, patch.start2 - padding), + Math.Min(text.Length, patch.start2 + patch.length1 + padding)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + string prefix = text.JavaSubstring(Math.Max(0, patch.start2 - padding), + patch.start2); + if (prefix.Length != 0) { + patch.diffs.Insert(0, new Diff(Operation.EQUAL, prefix)); + } + // Add the suffix. + string suffix = text.JavaSubstring(patch.start2 + patch.length1, + Math.Min(text.Length, patch.start2 + patch.length1 + padding)); + if (suffix.Length != 0) { + patch.diffs.Add(new Diff(Operation.EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.Length; + patch.start2 -= prefix.Length; + // Extend the lengths. + patch.length1 += prefix.Length + suffix.Length; + patch.length2 += prefix.Length + suffix.Length; + } + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return List of Patch objects. + */ + public List patch_make(string text1, string text2) { + // Check for null inputs not needed since null can't be passed in C#. + // No diffs provided, comAdde our own. + List diffs = diff_main(text1, text2, true); + if (diffs.Count > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of Diff objects for text1 to text2. + * @return List of Patch objects. + */ + public List patch_make(List diffs) { + // Check for null inputs not needed since null can't be passed in C#. + // No origin string provided, comAdde our own. + string text1 = diff_text1(diffs); + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text + * @param text2 Ignored. + * @param diffs Array of Diff objects for text1 to text2. + * @return List of Patch objects. + * @deprecated Prefer patch_make(string text1, List diffs). + */ + public List patch_make(string text1, string text2, + List diffs) { + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of Diff objects for text1 to text2. + * @return List of Patch objects. + */ + public List patch_make(string text1, List diffs) { + // Check for null inputs not needed since null can't be passed in C#. + List patches = new List(); + if (diffs.Count == 0) { + return patches; // Get rid of the null case. + } + Patch patch = new Patch(); + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + string prepatch_text = text1; + string postpatch_text = text1; + foreach (Diff aDiff in diffs) { + if (patch.diffs.Count == 0 && aDiff.operation != Operation.EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case Operation.INSERT: + patch.diffs.Add(aDiff); + patch.length2 += aDiff.text.Length; + postpatch_text = postpatch_text.Insert(char_count2, aDiff.text); + break; + case Operation.DELETE: + patch.length1 += aDiff.text.Length; + patch.diffs.Add(aDiff); + postpatch_text = postpatch_text.Remove(char_count2, + aDiff.text.Length); + break; + case Operation.EQUAL: + if (aDiff.text.Length <= 2 * Patch_Margin + && patch.diffs.Count() != 0 && aDiff != diffs.Last()) { + // Small equality inside a patch. + patch.diffs.Add(aDiff); + patch.length1 += aDiff.text.Length; + patch.length2 += aDiff.text.Length; + } + + if (aDiff.text.Length >= 2 * Patch_Margin) { + // Time for a new patch. + if (patch.diffs.Count != 0) { + patch_addContext(patch, prepatch_text); + patches.Add(patch); + patch = new Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != Operation.INSERT) { + char_count1 += aDiff.text.Length; + } + if (aDiff.operation != Operation.DELETE) { + char_count2 += aDiff.text.Length; + } + } + // Pick up the leftover patch if not empty. + if (patch.diffs.Count != 0) { + patch_addContext(patch, prepatch_text); + patches.Add(patch); + } + + return patches; + } + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of Patch objects. + * @return Array of Patch objects. + */ + public List patch_deepCopy(List patches) { + List patchesCopy = new List(); + foreach (Patch aPatch in patches) { + Patch patchCopy = new Patch(); + foreach (Diff aDiff in aPatch.diffs) { + Diff diffCopy = new Diff(aDiff.operation, aDiff.text); + patchCopy.diffs.Add(diffCopy); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.Add(patchCopy); + } + return patchesCopy; + } + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of Patch objects + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * bool values. + */ + public Object[] patch_apply(List patches, string text) { + if (patches.Count == 0) { + return new Object[] { text, new bool[0] }; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy(patches); + + string nullPadding = this.patch_addPadding(patches); + text = nullPadding + text + nullPadding; + patch_splitMax(patches); + + int x = 0; + // delta keeps track of the offset between the expected and actual + // location of the previous patch. If there are patches expected at + // positions 10 and 20, but the first patch was found at 12, delta is 2 + // and the second patch has an effective expected position of 22. + int delta = 0; + bool[] results = new bool[patches.Count]; + foreach (Patch aPatch in patches) { + int expected_loc = aPatch.start2 + delta; + string text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.Length > this.Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern + // in the case of a monster delete. + start_loc = match_main(text, + text1.Substring(0, this.Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, + text1.Substring(text1.Length - this.Match_MaxBits), + expected_loc + text1.Length - this.Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = this.match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + string text2; + if (end_loc == -1) { + text2 = text.JavaSubstring(start_loc, + Math.Min(start_loc + text1.Length, text.Length)); + } else { + text2 = text.JavaSubstring(start_loc, + Math.Min(end_loc + this.Match_MaxBits, text.Length)); + } + if (text1 == text2) { + // Perfect match, just shove the Replacement text in. + text = text.Substring(0, start_loc) + diff_text2(aPatch.diffs) + + text.Substring(start_loc + text1.Length); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + List diffs = diff_main(text1, text2, false); + if (text1.Length > this.Match_MaxBits + && this.diff_levenshtein(diffs) / (float) text1.Length + > this.Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + int index1 = 0; + foreach (Diff aDiff in aPatch.diffs) { + if (aDiff.operation != Operation.EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == Operation.INSERT) { + // Insertion + text = text.Insert(start_loc + index2, aDiff.text); + } else if (aDiff.operation == Operation.DELETE) { + // Deletion + text = text.Remove(start_loc + index2, diff_xIndex(diffs, + index1 + aDiff.text.Length) - index2); + } + } + if (aDiff.operation != Operation.DELETE) { + index1 += aDiff.text.Length; + } + } + } + } + } + x++; + } + // Strip the padding off. + text = text.Substring(nullPadding.Length, text.Length + - 2 * nullPadding.Length); + return new Object[] { text, results }; + } + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of Patch objects. + * @return The padding string added to each side. + */ + public string patch_addPadding(List patches) { + short paddingLength = this.Patch_Margin; + string nullPadding = string.Empty; + for (short x = 1; x <= paddingLength; x++) { + nullPadding += (char)x; + } + + // Bump all the patches forward. + foreach (Patch aPatch in patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch patch = patches.First(); + List diffs = patch.diffs; + if (diffs.Count == 0 || diffs.First().operation != Operation.EQUAL) { + // Add nullPadding equality. + diffs.Insert(0, new Diff(Operation.EQUAL, nullPadding)); + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs.First().text.Length) { + // Grow first equality. + Diff firstDiff = diffs.First(); + int extraLength = paddingLength - firstDiff.text.Length; + firstDiff.text = nullPadding.Substring(firstDiff.text.Length) + + firstDiff.text; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches.Last(); + diffs = patch.diffs; + if (diffs.Count == 0 || diffs.Last().operation != Operation.EQUAL) { + // Add nullPadding equality. + diffs.Add(new Diff(Operation.EQUAL, nullPadding)); + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs.Last().text.Length) { + // Grow last equality. + Diff lastDiff = diffs.Last(); + int extraLength = paddingLength - lastDiff.text.Length; + lastDiff.text += nullPadding.Substring(0, extraLength); + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; + } + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches List of Patch objects. + */ + public void patch_splitMax(List patches) { + short patch_size = this.Match_MaxBits; + for (int x = 0; x < patches.Count; x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + Patch bigpatch = patches[x]; + // Remove the big old patch. + patches.Splice(x--, 1); + int start1 = bigpatch.start1; + int start2 = bigpatch.start2; + string precontext = string.Empty; + while (bigpatch.diffs.Count != 0) { + // Create one of several smaller patches. + Patch patch = new Patch(); + bool empty = true; + patch.start1 = start1 - precontext.Length; + patch.start2 = start2 - precontext.Length; + if (precontext.Length != 0) { + patch.length1 = patch.length2 = precontext.Length; + patch.diffs.Add(new Diff(Operation.EQUAL, precontext)); + } + while (bigpatch.diffs.Count != 0 + && patch.length1 < patch_size - this.Patch_Margin) { + Operation diff_type = bigpatch.diffs[0].operation; + string diff_text = bigpatch.diffs[0].text; + if (diff_type == Operation.INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.Length; + start2 += diff_text.Length; + patch.diffs.Add(bigpatch.diffs.First()); + bigpatch.diffs.RemoveAt(0); + empty = false; + } else if (diff_type == Operation.DELETE && patch.diffs.Count == 1 + && patch.diffs.First().operation == Operation.EQUAL + && diff_text.Length > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.Length; + start1 += diff_text.Length; + empty = false; + patch.diffs.Add(new Diff(diff_type, diff_text)); + bigpatch.diffs.RemoveAt(0); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.Substring(0, Math.Min(diff_text.Length, + patch_size - patch.length1 - Patch_Margin)); + patch.length1 += diff_text.Length; + start1 += diff_text.Length; + if (diff_type == Operation.EQUAL) { + patch.length2 += diff_text.Length; + start2 += diff_text.Length; + } else { + empty = false; + } + patch.diffs.Add(new Diff(diff_type, diff_text)); + if (diff_text == bigpatch.diffs[0].text) { + bigpatch.diffs.RemoveAt(0); + } else { + bigpatch.diffs[0].text = + bigpatch.diffs[0].text.Substring(diff_text.Length); + } + } + } + // Compute the head context for the next patch. + precontext = this.diff_text2(patch.diffs); + precontext = precontext.Substring(Math.Max(0, + precontext.Length - this.Patch_Margin)); + + string postcontext = null; + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).Length > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs) + .Substring(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + + if (postcontext.Length != 0) { + patch.length1 += postcontext.Length; + patch.length2 += postcontext.Length; + if (patch.diffs.Count != 0 + && patch.diffs[patch.diffs.Count - 1].operation + == Operation.EQUAL) { + patch.diffs[patch.diffs.Count - 1].text += postcontext; + } else { + patch.diffs.Add(new Diff(Operation.EQUAL, postcontext)); + } + } + if (!empty) { + patches.Splice(++x, 0, patch); + } + } + } + } + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public string patch_toText(List patches) { + StringBuilder text = new StringBuilder(); + foreach (Patch aPatch in patches) { + text.Append(aPatch); + } + return text.ToString(); + } + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws ArgumentException If invalid input. + */ + public List patch_fromText(string textline) { + List patches = new List(); + if (textline.Length == 0) { + return patches; + } + string[] text = textline.Split('\n'); + int textPointer = 0; + Patch patch; + Regex patchHeader + = new Regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); + Match m; + char sign; + string line; + while (textPointer < text.Length) { + m = patchHeader.Match(text[textPointer]); + if (!m.Success) { + throw new ArgumentException("Invalid patch string: " + + text[textPointer]); + } + patch = new Patch(); + patches.Add(patch); + patch.start1 = Convert.ToInt32(m.Groups[1].Value); + if (m.Groups[2].Length == 0) { + patch.start1--; + patch.length1 = 1; + } else if (m.Groups[2].Value == "0") { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = Convert.ToInt32(m.Groups[2].Value); + } + + patch.start2 = Convert.ToInt32(m.Groups[3].Value); + if (m.Groups[4].Length == 0) { + patch.start2--; + patch.length2 = 1; + } else if (m.Groups[4].Value == "0") { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = Convert.ToInt32(m.Groups[4].Value); + } + textPointer++; + + while (textPointer < text.Length) { + try { + sign = text[textPointer][0]; + } catch (IndexOutOfRangeException) { + // Blank line? Whatever. + textPointer++; + continue; + } + line = text[textPointer].Substring(1); + line = line.Replace("+", "%2b"); + line = HttpUtility.UrlDecode(line, new UTF8Encoding(false, true)); + if (sign == '-') { + // Deletion. + patch.diffs.Add(new Diff(Operation.DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.Add(new Diff(Operation.INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.Add(new Diff(Operation.EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw new ArgumentException( + "Invalid patch mode '" + sign + "' in: " + line); + } + textPointer++; + } + } + return patches; + } + + /** + * Unescape selected chars for compatability with JavaScript's encodeURI. + * In speed critical applications this could be dropped since the + * receiving application will certainly decode these fine. + * Note that this function is case-sensitive. Thus "%3F" would not be + * unescaped. But this is ok because it is only called with the output of + * HttpUtility.UrlEncode which returns lowercase hex. + * + * Example: "%3f" -> "?", "%24" -> "$", etc. + * + * @param str The string to escape. + * @return The escaped string. + */ + public static string unescapeForEncodeUriCompatability(string str) { + return str.Replace("%21", "!").Replace("%7e", "~") + .Replace("%27", "'").Replace("%28", "(").Replace("%29", ")") + .Replace("%3b", ";").Replace("%2f", "/").Replace("%3f", "?") + .Replace("%3a", ":").Replace("%40", "@").Replace("%26", "&") + .Replace("%3d", "=").Replace("%2b", "+").Replace("%24", "$") + .Replace("%2c", ",").Replace("%23", "#"); + } + } +} diff --git a/csharp/DiffMatchPatchTest.cs b/csharp/DiffMatchPatchTest.cs new file mode 100644 index 0000000..25c8dec --- /dev/null +++ b/csharp/DiffMatchPatchTest.cs @@ -0,0 +1,1179 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using DiffMatchPatch; +using System.Collections.Generic; +using System; +using System.Text; +using NUnit.Framework; + +namespace nicTest { + [TestFixture()] + public class diff_match_patchTest : diff_match_patch { + [Test()] + public void diff_commonPrefixTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Detect any common suffix. + // Null case. + Assert.AreEqual(0, dmp.diff_commonPrefix("abc", "xyz")); + + // Non-null case. + Assert.AreEqual(4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + + // Whole case. + Assert.AreEqual(4, dmp.diff_commonPrefix("1234", "1234xyz")); + } + + [Test()] + public void diff_commonSuffixTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Detect any common suffix. + // Null case. + Assert.AreEqual(0, dmp.diff_commonSuffix("abc", "xyz")); + + // Non-null case. + Assert.AreEqual(4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + + // Whole case. + Assert.AreEqual(4, dmp.diff_commonSuffix("1234", "xyz1234")); + } + + [Test()] + public void diff_commonOverlapTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Detect any suffix/prefix overlap. + // Null case. + Assert.AreEqual(0, dmp.diff_commonOverlap("", "abcd")); + + // Whole case. + Assert.AreEqual(3, dmp.diff_commonOverlap("abc", "abcd")); + + // No overlap. + Assert.AreEqual(0, dmp.diff_commonOverlap("123456", "abcd")); + + // Overlap. + Assert.AreEqual(3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + + // Unicode. + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + Assert.AreEqual(0, dmp.diff_commonOverlap("fi", "\ufb01i")); + } + + [Test()] + public void diff_halfmatchTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + dmp.Diff_Timeout = 1; + // No match. + Assert.IsNull(dmp.diff_halfMatch("1234567890", "abcdef")); + + Assert.IsNull(dmp.diff_halfMatch("12345", "23")); + + // Single Match. + CollectionAssert.AreEqual(new string[] { "12", "90", "a", "z", "345678" }, dmp.diff_halfMatch("1234567890", "a345678z")); + + CollectionAssert.AreEqual(new string[] { "a", "z", "12", "90", "345678" }, dmp.diff_halfMatch("a345678z", "1234567890")); + + CollectionAssert.AreEqual(new string[] { "abc", "z", "1234", "0", "56789" }, dmp.diff_halfMatch("abc56789z", "1234567890")); + + CollectionAssert.AreEqual(new string[] { "a", "xyz", "1", "7890", "23456" }, dmp.diff_halfMatch("a23456xyz", "1234567890")); + + // Multiple Matches. + CollectionAssert.AreEqual(new string[] { "12123", "123121", "a", "z", "1234123451234" }, dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + + CollectionAssert.AreEqual(new string[] { "", "-=-=-=-=-=", "x", "", "x-=-=-=-=-=-=-=" }, dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + + CollectionAssert.AreEqual(new string[] { "-=-=-=-=-=", "", "", "y", "-=-=-=-=-=-=-=y" }, dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + + // Non-optimal halfmatch. + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + CollectionAssert.AreEqual(new string[] { "qHillo", "w", "x", "Hulloy", "HelloHe" }, dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + + // Optimal no halfmatch. + dmp.Diff_Timeout = 0; + Assert.IsNull(dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + } + + [Test()] + public void diff_linesToCharsTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Convert lines down to characters. + List tmpVector = new List(); + tmpVector.Add(""); + tmpVector.Add("alpha\n"); + tmpVector.Add("beta\n"); + Object[] result = dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n"); + Assert.AreEqual("\u0001\u0002\u0001", result[0]); + Assert.AreEqual("\u0002\u0001\u0002", result[1]); + CollectionAssert.AreEqual(tmpVector, (List)result[2]); + + tmpVector.Clear(); + tmpVector.Add(""); + tmpVector.Add("alpha\r\n"); + tmpVector.Add("beta\r\n"); + tmpVector.Add("\r\n"); + result = dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n"); + Assert.AreEqual("", result[0]); + Assert.AreEqual("\u0001\u0002\u0003\u0003", result[1]); + CollectionAssert.AreEqual(tmpVector, (List)result[2]); + + tmpVector.Clear(); + tmpVector.Add(""); + tmpVector.Add("a"); + tmpVector.Add("b"); + result = dmp.diff_linesToChars("a", "b"); + Assert.AreEqual("\u0001", result[0]); + Assert.AreEqual("\u0002", result[1]); + CollectionAssert.AreEqual(tmpVector, (List)result[2]); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.Clear(); + StringBuilder lineList = new StringBuilder(); + StringBuilder charList = new StringBuilder(); + for (int x = 1; x < n + 1; x++) { + tmpVector.Add(x + "\n"); + lineList.Append(x + "\n"); + charList.Append(Convert.ToChar(x)); + } + Assert.AreEqual(n, tmpVector.Count); + string lines = lineList.ToString(); + string chars = charList.ToString(); + Assert.AreEqual(n, chars.Length); + tmpVector.Insert(0, ""); + result = dmp.diff_linesToChars(lines, ""); + Assert.AreEqual(chars, result[0]); + Assert.AreEqual("", result[1]); + CollectionAssert.AreEqual(tmpVector, (List)result[2]); + } + + [Test()] + public void diff_charsToLinesTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Convert chars up to lines. + List diffs = new List { + new Diff(Operation.EQUAL, "\u0001\u0002\u0001"), + new Diff(Operation.INSERT, "\u0002\u0001\u0002")}; + List tmpVector = new List(); + tmpVector.Add(""); + tmpVector.Add("alpha\n"); + tmpVector.Add("beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "alpha\nbeta\nalpha\n"), + new Diff(Operation.INSERT, "beta\nalpha\nbeta\n")}, diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.Clear(); + StringBuilder lineList = new StringBuilder(); + StringBuilder charList = new StringBuilder(); + for (int x = 1; x < n + 1; x++) { + tmpVector.Add(x + "\n"); + lineList.Append(x + "\n"); + charList.Append(Convert.ToChar (x)); + } + Assert.AreEqual(n, tmpVector.Count); + string lines = lineList.ToString(); + string chars = charList.ToString(); + Assert.AreEqual(n, chars.Length); + tmpVector.Insert(0, ""); + diffs = new List {new Diff(Operation.DELETE, chars)}; + dmp.diff_charsToLines(diffs, tmpVector); + CollectionAssert.AreEqual(new List + {new Diff(Operation.DELETE, lines)}, diffs); + } + + [Test()] + public void diff_cleanupMergeTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Cleanup a messy diff. + // Null case. + List diffs = new List(); + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List(), diffs); + + // No change case. + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "b"), new Diff(Operation.INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "b"), new Diff(Operation.INSERT, "c")}, diffs); + + // Merge equalities. + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.EQUAL, "b"), new Diff(Operation.EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "abc")}, diffs); + + // Merge deletions. + diffs = new List {new Diff(Operation.DELETE, "a"), new Diff(Operation.DELETE, "b"), new Diff(Operation.DELETE, "c")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.DELETE, "abc")}, diffs); + + // Merge insertions. + diffs = new List {new Diff(Operation.INSERT, "a"), new Diff(Operation.INSERT, "b"), new Diff(Operation.INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.INSERT, "abc")}, diffs); + + // Merge interweave. + diffs = new List {new Diff(Operation.DELETE, "a"), new Diff(Operation.INSERT, "b"), new Diff(Operation.DELETE, "c"), new Diff(Operation.INSERT, "d"), new Diff(Operation.EQUAL, "e"), new Diff(Operation.EQUAL, "f")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.DELETE, "ac"), new Diff(Operation.INSERT, "bd"), new Diff(Operation.EQUAL, "ef")}, diffs); + + // Prefix and suffix detection. + diffs = new List {new Diff(Operation.DELETE, "a"), new Diff(Operation.INSERT, "abc"), new Diff(Operation.DELETE, "dc")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "d"), new Diff(Operation.INSERT, "b"), new Diff(Operation.EQUAL, "c")}, diffs); + + // Prefix and suffix detection with equalities. + diffs = new List {new Diff(Operation.EQUAL, "x"), new Diff(Operation.DELETE, "a"), new Diff(Operation.INSERT, "abc"), new Diff(Operation.DELETE, "dc"), new Diff(Operation.EQUAL, "y")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "xa"), new Diff(Operation.DELETE, "d"), new Diff(Operation.INSERT, "b"), new Diff(Operation.EQUAL, "cy")}, diffs); + + // Slide edit left. + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.INSERT, "ba"), new Diff(Operation.EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.INSERT, "ab"), new Diff(Operation.EQUAL, "ac")}, diffs); + + // Slide edit right. + diffs = new List {new Diff(Operation.EQUAL, "c"), new Diff(Operation.INSERT, "ab"), new Diff(Operation.EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "ca"), new Diff(Operation.INSERT, "ba")}, diffs); + + // Slide edit left recursive. + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "b"), new Diff(Operation.EQUAL, "c"), new Diff(Operation.DELETE, "ac"), new Diff(Operation.EQUAL, "x")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.DELETE, "abc"), new Diff(Operation.EQUAL, "acx")}, diffs); + + // Slide edit right recursive. + diffs = new List {new Diff(Operation.EQUAL, "x"), new Diff(Operation.DELETE, "ca"), new Diff(Operation.EQUAL, "c"), new Diff(Operation.DELETE, "b"), new Diff(Operation.EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + CollectionAssert.AreEqual(new List {new Diff(Operation.EQUAL, "xca"), new Diff(Operation.DELETE, "cba")}, diffs); + } + + [Test()] + public void diff_cleanupSemanticLosslessTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Slide diffs to match logical boundaries. + // Null case. + List diffs = new List(); + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List(), diffs); + + // Blank lines. + diffs = new List { + new Diff(Operation.EQUAL, "AAA\r\n\r\nBBB"), + new Diff(Operation.INSERT, "\r\nDDD\r\n\r\nBBB"), + new Diff(Operation.EQUAL, "\r\nEEE") + }; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "AAA\r\n\r\n"), + new Diff(Operation.INSERT, "BBB\r\nDDD\r\n\r\n"), + new Diff(Operation.EQUAL, "BBB\r\nEEE")}, diffs); + + // Line boundaries. + diffs = new List { + new Diff(Operation.EQUAL, "AAA\r\nBBB"), + new Diff(Operation.INSERT, " DDD\r\nBBB"), + new Diff(Operation.EQUAL, " EEE")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "AAA\r\n"), + new Diff(Operation.INSERT, "BBB DDD\r\n"), + new Diff(Operation.EQUAL, "BBB EEE")}, diffs); + + // Word boundaries. + diffs = new List { + new Diff(Operation.EQUAL, "The c"), + new Diff(Operation.INSERT, "ow and the c"), + new Diff(Operation.EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "The "), + new Diff(Operation.INSERT, "cow and the "), + new Diff(Operation.EQUAL, "cat.")}, diffs); + + // Alphanumeric boundaries. + diffs = new List { + new Diff(Operation.EQUAL, "The-c"), + new Diff(Operation.INSERT, "ow-and-the-c"), + new Diff(Operation.EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "The-"), + new Diff(Operation.INSERT, "cow-and-the-"), + new Diff(Operation.EQUAL, "cat.")}, diffs); + + // Hitting the start. + diffs = new List { + new Diff(Operation.EQUAL, "a"), + new Diff(Operation.DELETE, "a"), + new Diff(Operation.EQUAL, "ax")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "a"), + new Diff(Operation.EQUAL, "aax")}, diffs); + + // Hitting the end. + diffs = new List { + new Diff(Operation.EQUAL, "xa"), + new Diff(Operation.DELETE, "a"), + new Diff(Operation.EQUAL, "a")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "xaa"), + new Diff(Operation.DELETE, "a")}, diffs); + + // Sentence boundaries. + diffs = new List { + new Diff(Operation.EQUAL, "The xxx. The "), + new Diff(Operation.INSERT, "zzz. The "), + new Diff(Operation.EQUAL, "yyy.")}; + dmp.diff_cleanupSemanticLossless(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "The xxx."), + new Diff(Operation.INSERT, " The zzz."), + new Diff(Operation.EQUAL, " The yyy.")}, diffs); + } + + [Test()] + public void diff_cleanupSemanticTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Cleanup semantically trivial equalities. + // Null case. + List diffs = new List(); + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List(), diffs); + + // No elimination #1. + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "cd"), + new Diff(Operation.EQUAL, "12"), + new Diff(Operation.DELETE, "e")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "cd"), + new Diff(Operation.EQUAL, "12"), + new Diff(Operation.DELETE, "e")}, diffs); + + // No elimination #2. + diffs = new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.INSERT, "ABC"), + new Diff(Operation.EQUAL, "1234"), + new Diff(Operation.DELETE, "wxyz")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.INSERT, "ABC"), + new Diff(Operation.EQUAL, "1234"), + new Diff(Operation.DELETE, "wxyz")}, diffs); + + // Simple elimination. + diffs = new List { + new Diff(Operation.DELETE, "a"), + new Diff(Operation.EQUAL, "b"), + new Diff(Operation.DELETE, "c")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.INSERT, "b")}, diffs); + + // Backpass elimination. + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.EQUAL, "cd"), + new Diff(Operation.DELETE, "e"), + new Diff(Operation.EQUAL, "f"), + new Diff(Operation.INSERT, "g")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abcdef"), + new Diff(Operation.INSERT, "cdfg")}, diffs); + + // Multiple eliminations. + diffs = new List { + new Diff(Operation.INSERT, "1"), + new Diff(Operation.EQUAL, "A"), + new Diff(Operation.DELETE, "B"), + new Diff(Operation.INSERT, "2"), + new Diff(Operation.EQUAL, "_"), + new Diff(Operation.INSERT, "1"), + new Diff(Operation.EQUAL, "A"), + new Diff(Operation.DELETE, "B"), + new Diff(Operation.INSERT, "2")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "AB_AB"), + new Diff(Operation.INSERT, "1A2_1A2")}, diffs); + + // Word boundaries. + diffs = new List { + new Diff(Operation.EQUAL, "The c"), + new Diff(Operation.DELETE, "ow and the c"), + new Diff(Operation.EQUAL, "at.")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.EQUAL, "The "), + new Diff(Operation.DELETE, "cow and the "), + new Diff(Operation.EQUAL, "cat.")}, diffs); + + // No overlap elimination. + diffs = new List { + new Diff(Operation.DELETE, "abcxx"), + new Diff(Operation.INSERT, "xxdef")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abcxx"), + new Diff(Operation.INSERT, "xxdef")}, diffs); + + // Overlap elimination. + diffs = new List { + new Diff(Operation.DELETE, "abcxxx"), + new Diff(Operation.INSERT, "xxxdef")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.EQUAL, "xxx"), + new Diff(Operation.INSERT, "def")}, diffs); + + // Reverse overlap elimination. + diffs = new List { + new Diff(Operation.DELETE, "xxxabc"), + new Diff(Operation.INSERT, "defxxx")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.INSERT, "def"), + new Diff(Operation.EQUAL, "xxx"), + new Diff(Operation.DELETE, "abc")}, diffs); + + // Two overlap eliminations. + diffs = new List { + new Diff(Operation.DELETE, "abcd1212"), + new Diff(Operation.INSERT, "1212efghi"), + new Diff(Operation.EQUAL, "----"), + new Diff(Operation.DELETE, "A3"), + new Diff(Operation.INSERT, "3BC")}; + dmp.diff_cleanupSemantic(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abcd"), + new Diff(Operation.EQUAL, "1212"), + new Diff(Operation.INSERT, "efghi"), + new Diff(Operation.EQUAL, "----"), + new Diff(Operation.DELETE, "A"), + new Diff(Operation.EQUAL, "3"), + new Diff(Operation.INSERT, "BC")}, diffs); + } + + [Test()] + public void diff_cleanupEfficiencyTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + // Null case. + List diffs = new List (); + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List(), diffs); + + // No elimination. + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "wxyz"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "wxyz"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "34")}, diffs); + + // Four-edit elimination. + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "xyz"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abxyzcd"), + new Diff(Operation.INSERT, "12xyz34")}, diffs); + + // Three-edit elimination. + diffs = new List { + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "x"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "xcd"), + new Diff(Operation.INSERT, "12x34")}, diffs); + + // Backpass elimination. + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "xy"), + new Diff(Operation.INSERT, "34"), + new Diff(Operation.EQUAL, "z"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "56")}; + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abxyzcd"), + new Diff(Operation.INSERT, "12xy34z56")}, diffs); + + // High cost elimination. + dmp.Diff_EditCost = 5; + diffs = new List { + new Diff(Operation.DELETE, "ab"), + new Diff(Operation.INSERT, "12"), + new Diff(Operation.EQUAL, "wxyz"), + new Diff(Operation.DELETE, "cd"), + new Diff(Operation.INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + CollectionAssert.AreEqual(new List { + new Diff(Operation.DELETE, "abwxyzcd"), + new Diff(Operation.INSERT, "12wxyz34")}, diffs); + dmp.Diff_EditCost = 4; + } + + [Test()] + public void diff_prettyHtmlTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Pretty print. + List diffs = new List { + new Diff(Operation.EQUAL, "a\n"), + new Diff(Operation.DELETE, "b"), + new Diff(Operation.INSERT, "c&d")}; + Assert.AreEqual("
<B>b</B>c&d", + dmp.diff_prettyHtml(diffs)); + } + + [Test()] + public void diff_textTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Compute the source and destination texts. + List diffs = new List { + new Diff(Operation.EQUAL, "jump"), + new Diff(Operation.DELETE, "s"), + new Diff(Operation.INSERT, "ed"), + new Diff(Operation.EQUAL, " over "), + new Diff(Operation.DELETE, "the"), + new Diff(Operation.INSERT, "a"), + new Diff(Operation.EQUAL, " lazy")}; + Assert.AreEqual("jumps over the lazy", dmp.diff_text1(diffs)); + + Assert.AreEqual("jumped over a lazy", dmp.diff_text2(diffs)); + } + + [Test()] + public void diff_deltaTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Convert a diff into delta string. + List diffs = new List { + new Diff(Operation.EQUAL, "jump"), + new Diff(Operation.DELETE, "s"), + new Diff(Operation.INSERT, "ed"), + new Diff(Operation.EQUAL, " over "), + new Diff(Operation.DELETE, "the"), + new Diff(Operation.INSERT, "a"), + new Diff(Operation.EQUAL, " lazy"), + new Diff(Operation.INSERT, "old dog")}; + string text1 = dmp.diff_text1(diffs); + Assert.AreEqual("jumps over the lazy", text1); + + string delta = dmp.diff_toDelta(diffs); + Assert.AreEqual("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + + // Convert delta string into a diff. + CollectionAssert.AreEqual(diffs, dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + try { + dmp.diff_fromDelta(text1 + "x", delta); + Assert.Fail("diff_fromDelta: Too long."); + } catch (ArgumentException) { + // Exception expected. + } + + // Generates error (19 > 18). + try { + dmp.diff_fromDelta(text1.Substring(1), delta); + Assert.Fail("diff_fromDelta: Too short."); + } catch (ArgumentException) { + // Exception expected. + } + + // Generates error (%c3%xy invalid Unicode). + try { + dmp.diff_fromDelta("", "+%c3%xy"); + Assert.Fail("diff_fromDelta: Invalid character."); + } catch (ArgumentException) { + // Exception expected. + } + + // Test deltas with special characters. + char zero = (char)0; + char one = (char)1; + char two = (char)2; + diffs = new List { + new Diff(Operation.EQUAL, "\u0680 " + zero + " \t %"), + new Diff(Operation.DELETE, "\u0681 " + one + " \n ^"), + new Diff(Operation.INSERT, "\u0682 " + two + " \\ |")}; + text1 = dmp.diff_text1(diffs); + Assert.AreEqual("\u0680 " + zero + " \t %\u0681 " + one + " \n ^", text1); + + delta = dmp.diff_toDelta(diffs); + // Lowercase, due to UrlEncode uses lower. + Assert.AreEqual("=7\t-7\t+%da%82 %02 %5c %7c", delta, "diff_toDelta: Unicode."); + + CollectionAssert.AreEqual(diffs, dmp.diff_fromDelta(text1, delta), "diff_fromDelta: Unicode."); + + // Verify pool of unchanged characters. + diffs = new List { + new Diff(Operation.INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")}; + string text2 = dmp.diff_text2(diffs); + Assert.AreEqual("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2, "diff_text2: Unchanged characters."); + + delta = dmp.diff_toDelta(diffs); + Assert.AreEqual("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta, "diff_toDelta: Unchanged characters."); + + // Convert delta string into a diff. + CollectionAssert.AreEqual(diffs, dmp.diff_fromDelta("", delta), "diff_fromDelta: Unchanged characters."); + } + + [Test()] + public void diff_xIndexTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Translate a location in text1 to text2. + List diffs = new List { + new Diff(Operation.DELETE, "a"), + new Diff(Operation.INSERT, "1234"), + new Diff(Operation.EQUAL, "xyz")}; + Assert.AreEqual(5, dmp.diff_xIndex(diffs, 2), "diff_xIndex: Translation on equality."); + + diffs = new List { + new Diff(Operation.EQUAL, "a"), + new Diff(Operation.DELETE, "1234"), + new Diff(Operation.EQUAL, "xyz")}; + Assert.AreEqual(1, dmp.diff_xIndex(diffs, 3), "diff_xIndex: Translation on deletion."); + } + + [Test()] + public void diff_levenshteinTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + List diffs = new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.INSERT, "1234"), + new Diff(Operation.EQUAL, "xyz")}; + Assert.AreEqual(4, dmp.diff_levenshtein(diffs), "diff_levenshtein: Levenshtein with trailing equality."); + + diffs = new List { + new Diff(Operation.EQUAL, "xyz"), + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.INSERT, "1234")}; + Assert.AreEqual(4, dmp.diff_levenshtein(diffs), "diff_levenshtein: Levenshtein with leading equality."); + + diffs = new List { + new Diff(Operation.DELETE, "abc"), + new Diff(Operation.EQUAL, "xyz"), + new Diff(Operation.INSERT, "1234")}; + Assert.AreEqual(7, dmp.diff_levenshtein(diffs), "diff_levenshtein: Levenshtein with middle equality."); + } + + [Test()] + public void diff_bisectTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Normal. + string a = "cat"; + string b = "map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + List diffs = new List {new Diff(Operation.DELETE, "c"), new Diff(Operation.INSERT, "m"), new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "t"), new Diff(Operation.INSERT, "p")}; + CollectionAssert.AreEqual(diffs, dmp.diff_bisect(a, b, DateTime.MaxValue)); + + // Timeout. + diffs = new List {new Diff(Operation.DELETE, "cat"), new Diff(Operation.INSERT, "map")}; + CollectionAssert.AreEqual(diffs, dmp.diff_bisect(a, b, DateTime.MinValue)); + } + + [Test()] + public void diff_mainTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Perform a trivial diff. + List diffs = new List {}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("", "", false), "diff_main: Null case."); + + diffs = new List {new Diff(Operation.EQUAL, "abc")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("abc", "abc", false), "diff_main: Equality."); + + diffs = new List {new Diff(Operation.EQUAL, "ab"), new Diff(Operation.INSERT, "123"), new Diff(Operation.EQUAL, "c")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("abc", "ab123c", false), "diff_main: Simple insertion."); + + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "123"), new Diff(Operation.EQUAL, "bc")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("a123bc", "abc", false), "diff_main: Simple deletion."); + + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.INSERT, "123"), new Diff(Operation.EQUAL, "b"), new Diff(Operation.INSERT, "456"), new Diff(Operation.EQUAL, "c")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("abc", "a123b456c", false), "diff_main: Two insertions."); + + diffs = new List {new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "123"), new Diff(Operation.EQUAL, "b"), new Diff(Operation.DELETE, "456"), new Diff(Operation.EQUAL, "c")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("a123b456c", "abc", false), "diff_main: Two deletions."); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = new List {new Diff(Operation.DELETE, "a"), new Diff(Operation.INSERT, "b")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("a", "b", false), "diff_main: Simple case #1."); + + diffs = new List {new Diff(Operation.DELETE, "Apple"), new Diff(Operation.INSERT, "Banana"), new Diff(Operation.EQUAL, "s are a"), new Diff(Operation.INSERT, "lso"), new Diff(Operation.EQUAL, " fruit.")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false), "diff_main: Simple case #2."); + + diffs = new List {new Diff(Operation.DELETE, "a"), new Diff(Operation.INSERT, "\u0680"), new Diff(Operation.EQUAL, "x"), new Diff(Operation.DELETE, "\t"), new Diff(Operation.INSERT, new string (new char[]{(char)0}))}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("ax\t", "\u0680x" + (char)0, false), "diff_main: Simple case #3."); + + diffs = new List {new Diff(Operation.DELETE, "1"), new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "y"), new Diff(Operation.EQUAL, "b"), new Diff(Operation.DELETE, "2"), new Diff(Operation.INSERT, "xab")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("1ayb2", "abxab", false), "diff_main: Overlap #1."); + + diffs = new List {new Diff(Operation.INSERT, "xaxcx"), new Diff(Operation.EQUAL, "abc"), new Diff(Operation.DELETE, "y")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("abcy", "xaxcxabc", false), "diff_main: Overlap #2."); + + diffs = new List {new Diff(Operation.DELETE, "ABCD"), new Diff(Operation.EQUAL, "a"), new Diff(Operation.DELETE, "="), new Diff(Operation.INSERT, "-"), new Diff(Operation.EQUAL, "bcd"), new Diff(Operation.DELETE, "="), new Diff(Operation.INSERT, "-"), new Diff(Operation.EQUAL, "efghijklmnopqrs"), new Diff(Operation.DELETE, "EFGHIJKLMNOefg")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false), "diff_main: Overlap #3."); + + diffs = new List {new Diff(Operation.INSERT, " "), new Diff(Operation.EQUAL, "a"), new Diff(Operation.INSERT, "nd"), new Diff(Operation.EQUAL, " [[Pennsylvania]]"), new Diff(Operation.DELETE, " and [[New")}; + CollectionAssert.AreEqual(diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false), "diff_main: Large equality."); + + dmp.Diff_Timeout = 0.1f; // 100ms + string a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + string b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + DateTime startTime = DateTime.Now; + dmp.diff_main(a, b); + DateTime endTime = DateTime.Now; + // Test that we took at least the timeout period. + Assert.IsTrue(new TimeSpan(((long)(dmp.Diff_Timeout * 1000)) * 10000) <= endTime - startTime); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + Assert.IsTrue(new TimeSpan(((long)(dmp.Diff_Timeout * 1000)) * 10000 * 2) > endTime - startTime); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + CollectionAssert.AreEqual(dmp.diff_main(a, b, true), dmp.diff_main(a, b, false), "diff_main: Simple line-mode."); + + a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + CollectionAssert.AreEqual(dmp.diff_main(a, b, true), dmp.diff_main(a, b, false), "diff_main: Single line-mode."); + + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + string[] texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + string[] texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + CollectionAssert.AreEqual(texts_textmode, texts_linemode, "diff_main: Overlap line-mode."); + + // Test null inputs -- not needed because nulls can't be passed in C#. + } + + [Test()] + public void match_alphabetTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Initialise the bitmasks for Bitap. + Dictionary bitmask = new Dictionary(); + bitmask.Add('a', 4); bitmask.Add('b', 2); bitmask.Add('c', 1); + CollectionAssert.AreEqual(bitmask, dmp.match_alphabet("abc"), "match_alphabet: Unique."); + + bitmask.Clear(); + bitmask.Add('a', 37); bitmask.Add('b', 18); bitmask.Add('c', 8); + CollectionAssert.AreEqual(bitmask, dmp.match_alphabet("abcaba"), "match_alphabet: Duplicates."); + } + + [Test()] + public void match_bitapTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + Assert.AreEqual(5, dmp.match_bitap("abcdefghijk", "fgh", 5), "match_bitap: Exact match #1."); + + Assert.AreEqual(5, dmp.match_bitap("abcdefghijk", "fgh", 0), "match_bitap: Exact match #2."); + + Assert.AreEqual(4, dmp.match_bitap("abcdefghijk", "efxhi", 0), "match_bitap: Fuzzy match #1."); + + Assert.AreEqual(2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5), "match_bitap: Fuzzy match #2."); + + Assert.AreEqual(-1, dmp.match_bitap("abcdefghijk", "bxy", 1), "match_bitap: Fuzzy match #3."); + + Assert.AreEqual(2, dmp.match_bitap("123456789xx0", "3456789x0", 2), "match_bitap: Overflow."); + + Assert.AreEqual(0, dmp.match_bitap("abcdef", "xxabc", 4), "match_bitap: Before start match."); + + Assert.AreEqual(3, dmp.match_bitap("abcdef", "defyy", 4), "match_bitap: Beyond end match."); + + Assert.AreEqual(0, dmp.match_bitap("abcdef", "xabcdefy", 0), "match_bitap: Oversized pattern."); + + dmp.Match_Threshold = 0.4f; + Assert.AreEqual(4, dmp.match_bitap("abcdefghijk", "efxyhi", 1), "match_bitap: Threshold #1."); + + dmp.Match_Threshold = 0.3f; + Assert.AreEqual(-1, dmp.match_bitap("abcdefghijk", "efxyhi", 1), "match_bitap: Threshold #2."); + + dmp.Match_Threshold = 0.0f; + Assert.AreEqual(1, dmp.match_bitap("abcdefghijk", "bcdef", 1), "match_bitap: Threshold #3."); + + dmp.Match_Threshold = 0.5f; + Assert.AreEqual(0, dmp.match_bitap("abcdexyzabcde", "abccde", 3), "match_bitap: Multiple select #1."); + + Assert.AreEqual(8, dmp.match_bitap("abcdexyzabcde", "abccde", 5), "match_bitap: Multiple select #2."); + + dmp.Match_Distance = 10; // Strict location. + Assert.AreEqual(-1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24), "match_bitap: Distance test #1."); + + Assert.AreEqual(0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1), "match_bitap: Distance test #2."); + + dmp.Match_Distance = 1000; // Loose location. + Assert.AreEqual(0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24), "match_bitap: Distance test #3."); + } + + [Test()] + public void match_mainTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + // Full match. + Assert.AreEqual(0, dmp.match_main("abcdef", "abcdef", 1000), "match_main: Equality."); + + Assert.AreEqual(-1, dmp.match_main("", "abcdef", 1), "match_main: Null text."); + + Assert.AreEqual(3, dmp.match_main("abcdef", "", 3), "match_main: Null pattern."); + + Assert.AreEqual(3, dmp.match_main("abcdef", "de", 3), "match_main: Exact match."); + + Assert.AreEqual(3, dmp.match_main("abcdef", "defy", 4), "match_main: Beyond end match."); + + Assert.AreEqual(0, dmp.match_main("abcdef", "abcdefy", 0), "match_main: Oversized pattern."); + + dmp.Match_Threshold = 0.7f; + Assert.AreEqual(4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5), "match_main: Complex match."); + dmp.Match_Threshold = 0.5f; + + // Test null inputs -- not needed because nulls can't be passed in C#. + } + + [Test()] + public void patch_patchObjTest() { + // Patch Object. + Patch p = new Patch(); + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = new List { + new Diff(Operation.EQUAL, "jump"), + new Diff(Operation.DELETE, "s"), + new Diff(Operation.INSERT, "ed"), + new Diff(Operation.EQUAL, " over "), + new Diff(Operation.DELETE, "the"), + new Diff(Operation.INSERT, "a"), + new Diff(Operation.EQUAL, "\nlaz")}; + string strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0alaz\n"; + Assert.AreEqual(strp, p.ToString(), "Patch: toString."); + } + + [Test()] + public void patch_fromTextTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + Assert.IsTrue(dmp.patch_fromText("").Count == 0, "patch_fromText: #0."); + + string strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0alaz\n"; + Assert.AreEqual(strp, dmp.patch_fromText(strp)[0].ToString(), "patch_fromText: #1."); + + Assert.AreEqual("@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0].ToString(), "patch_fromText: #2."); + + Assert.AreEqual("@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n") [0].ToString(), "patch_fromText: #3."); + + Assert.AreEqual("@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n") [0].ToString(), "patch_fromText: #4."); + + // Generates error. + try { + dmp.patch_fromText("Bad\nPatch\n"); + Assert.Fail("patch_fromText: #5."); + } catch (ArgumentException) { + // Exception expected. + } + } + + [Test()] + public void patch_toTextTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + string strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + List patches; + patches = dmp.patch_fromText(strp); + string result = dmp.patch_toText(patches); + Assert.AreEqual(strp, result); + + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText(strp); + result = dmp.patch_toText(patches); + Assert.AreEqual(strp, result); + } + + [Test()] + public void patch_addContextTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n") [0]; + dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); + Assert.AreEqual("@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.ToString(), "patch_addContext: Simple case."); + + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + Assert.AreEqual("@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.ToString(), "patch_addContext: Not enough trailing context."); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + Assert.AreEqual("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.ToString(), "patch_addContext: Not enough leading context."); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); + Assert.AreEqual("@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.ToString(), "patch_addContext: Ambiguity."); + } + + [Test()] + public void patch_makeTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + List patches; + patches = dmp.patch_make("", ""); + Assert.AreEqual("", dmp.patch_toText(patches), "patch_make: Null case."); + + string text1 = "The quick brown fox jumps over the lazy dog."; + string text2 = "That quick brown fox jumped over a lazy dog."; + string expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Text2+Text1 inputs."); + + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Text1+Text2 inputs."); + + List diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Diff input."); + + patches = dmp.patch_make(text1, diffs); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Text1+Diff inputs."); + + patches = dmp.patch_make(text1, text2, diffs); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Text1+Text2+Diff inputs (deprecated)."); + + patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); + Assert.AreEqual("@@ -1,21 +1,21 @@\n-%601234567890-=%5b%5d%5c;',./\n+~!@#$%25%5e&*()_+%7b%7d%7c:%22%3c%3e?\n", + dmp.patch_toText(patches), + "patch_toText: Character encoding."); + + diffs = new List { + new Diff(Operation.DELETE, "`1234567890-=[]\\;',./"), + new Diff(Operation.INSERT, "~!@#$%^&*()_+{}|:\"<>?")}; + CollectionAssert.AreEqual(diffs, + dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n") [0].diffs, + "patch_fromText: Character decoding."); + + text1 = ""; + for (int x = 0; x < 100; x++) { + text1 += "abcdef"; + } + text2 = text1 + "123"; + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + Assert.AreEqual(expectedPatch, dmp.patch_toText(patches), "patch_make: Long string with repeats."); + + // Test null inputs -- not needed because nulls can't be passed in C#. + } + + [Test()] + public void patch_splitMaxTest() { + // Assumes that Match_MaxBits is 32. + diff_match_patchTest dmp = new diff_match_patchTest(); + List patches; + + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + Assert.AreEqual("@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); + string oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + Assert.AreEqual(oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); + dmp.patch_splitMax(patches); + Assert.AreEqual("@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); + dmp.patch_splitMax(patches); + Assert.AreEqual("@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); + } + + [Test()] + public void patch_addPaddingTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + List patches; + patches = dmp.patch_make("", "test"); + Assert.AreEqual("@@ -0,0 +1,4 @@\n+test\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges full."); + dmp.patch_addPadding(patches); + Assert.AreEqual("@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges full."); + + patches = dmp.patch_make("XY", "XtestY"); + Assert.AreEqual("@@ -1,2 +1,6 @@\n X\n+test\n Y\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges partial."); + dmp.patch_addPadding(patches); + Assert.AreEqual("@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges partial."); + + patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); + Assert.AreEqual("@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges none."); + dmp.patch_addPadding(patches); + Assert.AreEqual("@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches), + "patch_addPadding: Both edges none."); + } + + [Test()] + public void patch_applyTest() { + diff_match_patchTest dmp = new diff_match_patchTest(); + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + List patches; + patches = dmp.patch_make("", ""); + Object[] results = dmp.patch_apply(patches, "Hello world."); + bool[] boolArray = (bool[])results[1]; + string resultStr = results[0] + "\t" + boolArray.Length; + Assert.AreEqual("Hello world.\t0", resultStr, "patch_apply: Null case."); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); + results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("That quick brown fox jumped over a lazy dog.\tTrue\tTrue", resultStr, "patch_apply: Exact match."); + + results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("That quick red rabbit jumped over a tired tiger.\tTrue\tTrue", resultStr, "patch_apply: Partial match."); + + results = dmp.patch_apply(patches, "I am the very model of a modern major general."); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("I am the very model of a modern major general.\tFalse\tFalse", resultStr, "patch_apply: Failed match."); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("xabcy\tTrue\tTrue", resultStr, "patch_apply: Big delete, small change."); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tFalse\tTrue", resultStr, "patch_apply: Big delete, big change 1."); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("xabcy\tTrue\tTrue", resultStr, "patch_apply: Big delete, big change 2."); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); + results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + Assert.AreEqual("ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tFalse\tTrue", resultStr, "patch_apply: Compensate for failed patch."); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make("", "test"); + string patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ""); + Assert.AreEqual(patchStr, dmp.patch_toText(patches), "patch_apply: No side effects."); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + Assert.AreEqual(patchStr, dmp.patch_toText(patches), "patch_apply: No side effects with major delete."); + + patches = dmp.patch_make("", "test"); + results = dmp.patch_apply(patches, ""); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + Assert.AreEqual("test\tTrue", resultStr, "patch_apply: Edge exact match."); + + patches = dmp.patch_make("XY", "XtestY"); + results = dmp.patch_apply(patches, "XY"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + Assert.AreEqual("XtestY\tTrue", resultStr, "patch_apply: Near edge exact match."); + + patches = dmp.patch_make("y", "y123"); + results = dmp.patch_apply(patches, "x"); + boolArray = (bool[])results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + Assert.AreEqual("x123\tTrue", resultStr, "patch_apply: Edge partial match."); + } + + private static string[] diff_rebuildtexts(List diffs) { + string[] text = { "", "" }; + foreach (Diff myDiff in diffs) { + if (myDiff.operation != Operation.INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != Operation.DELETE) { + text[1] += myDiff.text; + } + } + return text; + } + } +} diff --git a/dart/DMPClass.dart b/dart/DMPClass.dart new file mode 100644 index 0000000..c460f3c --- /dev/null +++ b/dart/DMPClass.dart @@ -0,0 +1,2117 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + */ + +part of DiffMatchPatch; + +/** + * The data structure representing a diff is a List of Diff objects: + * {Diff(DIFF_DELETE, 'Hello'), Diff(DIFF_INSERT, 'Goodbye'), + * Diff(DIFF_EQUAL, ' world.')} + * which means: delete 'Hello', add 'Goodbye' and keep ' world.' + */ +const DIFF_DELETE = -1; +const DIFF_INSERT = 1; +const DIFF_EQUAL = 0; + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ +class DiffMatchPatch { + + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + /** + * Number of seconds to map a diff before giving up (0 for infinity). + */ + double Diff_Timeout = 1.0; + /** + * Cost of an empty edit operation in terms of edit characters. + */ + int Diff_EditCost = 4; + /** + * At what point is no match declared (0.0 = perfection, 1.0 = very loose). + */ + double Match_Threshold = 0.5; + /** + * How far to search for a match (0 = exact location, 1000+ = broad match). + * A match this many characters away from the expected location will add + * 1.0 to the score (0.0 is a perfect match). + */ + int Match_Distance = 1000; + /** + * When deleting a large block of text (over ~64 characters), how close do + * the contents have to be to match the expected contents. (0.0 = perfection, + * 1.0 = very loose). Note that Match_Threshold controls how closely the + * end points of a delete need to match. + */ + double Patch_DeleteThreshold = 0.5; + /** + * Chunk size for context length. + */ + int Patch_Margin = 4; + + /** + * The number of bits in an int. + */ + int Match_MaxBits = 32; + + + // DIFF FUNCTIONS + + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * [text1] is the old string to be diffed. + * [text2] is the new string to be diffed. + * [checklines] is an optional speedup flag. If present and false, then don't + * run a line-level diff first to identify the changed areas. + * Defaults to true, which does a faster, slightly less optimal diff. + * [deadline] is an optional time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * Returns a List of Diff objects. + */ + List diff_main(String text1, String text2, + [bool checklines = true, Date deadline]) { + // Set a deadline by which time the diff must be complete. + if (deadline == null) { + deadline = new Date.now(); + if (Diff_Timeout <= 0) { + // One year should be sufficient for 'infinity'. + deadline = deadline.add(new Duration(days: 365)); + } else { + deadline = deadline.add(new Duration( + milliseconds: (Diff_Timeout * 1000).toInt())); + } + } + // Check for null inputs. + if (text1 == null || text2 == null) { + throw new ArgumentError('Null inputs. (diff_main)'); + } + + // Check for equality (speedup). + List diffs; + if (text1 == text2) { + diffs = []; + if (!text1.isEmpty) { + diffs.add(new Diff(DIFF_EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + String commonprefix = text1.substring(0, commonlength); + text1 = text1.substring(commonlength); + text2 = text2.substring(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(text1, text2); + String commonsuffix = text1.substring(text1.length - commonlength); + text1 = text1.substring(0, text1.length - commonlength); + text2 = text2.substring(0, text2.length - commonlength); + + // Compute the diff on the middle block. + diffs = _diff_compute(text1, text2, checklines, deadline); + + // Restore the prefix and suffix. + if (!commonprefix.isEmpty) { + diffs.insertRange(0, 1, new Diff(DIFF_EQUAL, commonprefix)); + } + if (!commonsuffix.isEmpty) { + diffs.addLast(new Diff(DIFF_EQUAL, commonsuffix)); + } + + diff_cleanupMerge(diffs); + return diffs; + } + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * [text1] is the old string to be diffed. + * [text2] is the new string to be diffed. + * [checklines] is a speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * [deadline] is the time when the diff should be complete by. + * Returns a List of Diff objects. + */ + List _diff_compute(String text1, String text2, + bool checklines, Date deadline) { + List diffs = []; + + if (text1.length == 0) { + // Just add some text (speedup). + diffs.add(new Diff(DIFF_INSERT, text2)); + return diffs; + } + + if (text2.length == 0) { + // Just delete some text (speedup). + diffs.add(new Diff(DIFF_DELETE, text1)); + return diffs; + } + + String longtext = text1.length > text2.length ? text1 : text2; + String shorttext = text1.length > text2.length ? text2 : text1; + int i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + int op = (text1.length > text2.length) ? + DIFF_DELETE : DIFF_INSERT; + diffs.add(new Diff(op, longtext.substring(0, i))); + diffs.add(new Diff(DIFF_EQUAL, shorttext)); + diffs.add(new Diff(op, longtext.substring(i + shorttext.length))); + return diffs; + } + + if (shorttext.length == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.add(new Diff(DIFF_DELETE, text1)); + diffs.add(new Diff(DIFF_INSERT, text2)); + return diffs; + } + + // Check to see if the problem can be split in two. + final hm = _diff_halfMatch(text1, text2); + if (hm != null) { + // A half-match was found, sort out the return data. + final text1_a = hm[0]; + final text1_b = hm[1]; + final text2_a = hm[2]; + final text2_b = hm[3]; + final mid_common = hm[4]; + // Send both pairs off for separate processing. + final diffs_a = diff_main(text1_a, text2_a, checklines, deadline); + final diffs_b = diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.add(new Diff(DIFF_EQUAL, mid_common)); + diffs.addAll(diffs_b); + return diffs; + } + + if (checklines && text1.length > 100 && text2.length > 100) { + return _diff_lineMode(text1, text2, deadline); + } + + return _diff_bisect(text1, text2, deadline); + } + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * [text1] is the old string to be diffed. + * [text2] is the new string to be diffed. + * [deadline] is the time when the diff should be complete by. + * Returns a List of Diff objects. + */ + List _diff_lineMode(String text1, String text2, Date deadline) { + // Scan the text on a line-by-line basis first. + final a = _diff_linesToChars(text1, text2); + text1 = a['chars1']; + text2 = a['chars2']; + final linearray = a['lineArray']; + + final diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + _diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.add(new Diff(DIFF_EQUAL, '')); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + final text_delete = new StringBuffer(); + final text_insert = new StringBuffer(); + while (pointer < diffs.length) { + switch (diffs[pointer].operation) { + case DIFF_INSERT: + count_insert++; + text_insert.add(diffs[pointer].text); + break; + case DIFF_DELETE: + count_delete++; + text_delete.add(diffs[pointer].text); + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + diffs.removeRange(pointer - count_delete - count_insert, + count_delete + count_insert); + pointer = pointer - count_delete - count_insert; + final a = diff_main(text_delete.toString(), text_insert.toString(), + false, deadline); + for (int j = a.length - 1; j >= 0; j--) { + diffs.insertRange(pointer, 1, a[j]); + } + pointer = pointer + a.length; + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + pointer++; + } + diffs.removeLast(); // Remove the dummy entry at the end. + + return diffs; + } + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * [text1] is the old string to be diffed. + * [text2] is the new string to be diffed. + * [deadline] is the time at which to bail if not yet complete. + * Returns a List of Diff objects. + */ + List _diff_bisect(String text1, String text2, Date deadline) { + // Cache the text lengths to prevent multiple calls. + final text1_length = text1.length; + final text2_length = text2.length; + final max_d = (text1_length + text2_length + 1) ~/ 2; + final v_offset = max_d; + final v_length = 2 * max_d; + final v1 = new List(v_length); + final v2 = new List(v_length); + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + final delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + final front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if ((new Date.now()).compareTo(deadline) == 1) { + break; + } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1[x1] == text2[y1]) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1[text1_length - x2 - 1] + == text2[text2_length - y2 - 1]) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + return [new Diff(DIFF_DELETE, text1), new Diff(DIFF_INSERT, text2)]; + } + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * [text1] is the old string to be diffed. + * [text2] is the new string to be diffed. + * [x] is the index of split point in text1. + * [y] is the index of split point in text2. + * [deadline] is the time at which to bail if not yet complete. + * Returns a List of Diff objects. + */ + List _diff_bisectSplit(String text1, String text2, + int x, int y, Date deadline) { + final text1a = text1.substring(0, x); + final text2a = text2.substring(0, y); + final text1b = text1.substring(x); + final text2b = text2.substring(y); + + // Compute both diffs serially. + final diffs = diff_main(text1a, text2a, false, deadline); + final diffsb = diff_main(text1b, text2b, false, deadline); + + diffs.addAll(diffsb); + return diffs; + } + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * [text1] is the first string. + * [text2] is the second string. + * Returns a Map containing the encoded text1, the encoded text2 and + * the List of unique strings. The zeroth element of the List of + * unique strings is intentionally blank. + */ + Map _diff_linesToChars(String text1, String text2) { + final lineArray = []; + final lineHash = new HashMap(); + // e.g. linearray[4] == 'Hello\n' + // e.g. linehash['Hello\n'] == 4 + + // '\x00' is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.add(''); + + String chars1 = _diff_linesToCharsMunge(text1, lineArray, lineHash); + String chars2 = _diff_linesToCharsMunge(text2, lineArray, lineHash); + return {'chars1': chars1, 'chars2': chars2, 'lineArray': lineArray}; + } + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * [text] is the string to encode. + * [lineArray] is a List of unique strings. + * [lineHash] is a Map of strings to indices. + * Returns an encoded string. + */ + String _diff_linesToCharsMunge(String text, List lineArray, + Map lineHash) { + int lineStart = 0; + int lineEnd = -1; + String line; + final chars = new StringBuffer(); + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.length - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length - 1; + } + line = text.substring(lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (lineHash.containsKey(line)) { + chars.add(new String.fromCharCodes([lineHash[line]])); + } else { + lineArray.add(line); + lineHash[line] = lineArray.length - 1; + chars.add(new String.fromCharCodes([lineArray.length - 1])); + } + } + return chars.toString(); + } + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * [diffs] is a List of Diff objects. + * [lineArray] is a List of unique strings. + */ + void _diff_charsToLines(List diffs, List lineArray) { + final text = new StringBuffer(); + for (Diff diff in diffs) { + for (int y = 0; y < diff.text.length; y++) { + text.add(lineArray[diff.text.charCodeAt(y)]); + } + diff.text = text.toString(); + text.clear(); + } + } + + /** + * Determine the common prefix of two strings + * [text1] is the first string. + * [text2] is the second string. + * Returns the number of characters common to the start of each string. + */ + int diff_commonPrefix(String text1, String text2) { + // TODO: Once Dart's performance stabilizes, determine if linear or binary + // search is better. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + final n = min(text1.length, text2.length); + for (int i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } + } + return n; + } + + /** + * Determine the common suffix of two strings + * [text1] is the first string. + * [text2] is the second string. + * Returns the number of characters common to the end of each string. + */ + int diff_commonSuffix(String text1, String text2) { + // TODO: Once Dart's performance stabilizes, determine if linear or binary + // search is better. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + final text1_length = text1.length; + final text2_length = text2.length; + final n = min(text1_length, text2_length); + for (int i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } + } + return n; + } + + /** + * Determine if the suffix of one string is the prefix of another. + * [text1] is the first string. + * [text2] is the second string. + * Returns the number of characters common to the end of the first + * string and the start of the second string. + */ + int _diff_commonOverlap(String text1, String text2) { + // Eliminate the null case. + if (text1.isEmpty || text2.isEmpty) { + return 0; + } + // Cache the text lengths to prevent multiple calls. + final text1_length = text1.length; + final text2_length = text2.length; + // Truncate the longer string. + if (text1_length > text2_length) { + text1 = text1.substring(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2 = text2.substring(0, text1_length); + } + final text_length = min(text1_length, text2_length); + // Quick check for the worst case. + if (text1 == text2) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + String pattern = text1.substring(text_length - length); + int found = text2.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1.substring(text_length - length) == + text2.substring(0, length)) { + best = length; + length++; + } + } + } + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * [text1] is the first string. + * [text2] is the second string. + * Returns a five element List of Strings, containing the prefix of text1, + * the suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + List _diff_halfMatch(String text1, String text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return null; + } + final longtext = text1.length > text2.length ? text1 : text2; + final shorttext = text1.length > text2.length ? text2 : text1; + if (longtext.length < 4 || shorttext.length * 2 < longtext.length) { + return null; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + final hm1 = _diff_halfMatchI(longtext, shorttext, + ((longtext.length + 3) / 4).ceil().toInt()); + // Check again based on the third quarter. + final hm2 = _diff_halfMatchI(longtext, shorttext, + ((longtext.length + 1) / 2).ceil().toInt()); + List hm; + if (hm1 == null && hm2 == null) { + return null; + } else if (hm2 == null) { + hm = hm1; + } else if (hm1 == null) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length > hm2[4].length ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length > text2.length) { + return hm; + //return [hm[0], hm[1], hm[2], hm[3], hm[4]]; + } else { + return [hm[2], hm[3], hm[0], hm[1], hm[4]]; + } + } + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * [longtext] is the longer string. + * [shorttext is the shorter string. + * [i] Start index of quarter length substring within longtext. + * Returns a five element String array, containing the prefix of longtext, + * the suffix of longtext, the prefix of shorttext, the suffix of + * shorttext and the common middle. Or null if there was no match. + */ + List _diff_halfMatchI(String longtext, String shorttext, int i) { + // Start with a 1/4 length substring at position i as a seed. + final seed = longtext.substring(i, + i + (longtext.length / 4).floor().toInt()); + int j = -1; + String best_common = ''; + String best_longtext_a = '', best_longtext_b = ''; + String best_shorttext_a = '', best_shorttext_b = ''; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + int prefixLength = diff_commonPrefix(longtext.substring(i), + shorttext.substring(j)); + int suffixLength = diff_commonSuffix(longtext.substring(0, i), + shorttext.substring(0, j)); + if (best_common.length < suffixLength + prefixLength) { + best_common = '${shorttext.substring(j - suffixLength, j)}' + '${shorttext.substring(j, j + prefixLength)}'; + best_longtext_a = longtext.substring(0, i - suffixLength); + best_longtext_b = longtext.substring(i + prefixLength); + best_shorttext_a = shorttext.substring(0, j - suffixLength); + best_shorttext_b = shorttext.substring(j + prefixLength); + } + } + if (best_common.length * 2 >= longtext.length) { + return [best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common]; + } else { + return null; + } + } + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * [diffs] is a List of Diff objects. + */ + void diff_cleanupSemantic(List diffs) { + bool changes = false; + // Stack of indices where equalities are found. + final equalities = []; + // Always equal to diffs[equalities.last()].text + String lastequality = null; + int pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + while (pointer < diffs.length) { + if (diffs[pointer].operation == DIFF_EQUAL) { // Equality found. + equalities.addLast(pointer); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = diffs[pointer].text; + } else { // An insertion or deletion. + if (diffs[pointer].operation == DIFF_INSERT) { + length_insertions2 += diffs[pointer].text.length; + } else { + length_deletions2 += diffs[pointer].text.length; + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality != null && (lastequality.length + <= max(length_insertions1, length_deletions1)) + && (lastequality.length <= max(length_insertions2, + length_deletions2))) { + // Duplicate record. + diffs.insertRange(equalities.last, 1, + new Diff(DIFF_DELETE, lastequality)); + // Change second copy to insert. + diffs[equalities.last + 1].operation = DIFF_INSERT; + // Throw away the equality we just deleted. + equalities.removeLast(); + // Throw away the previous equality (it needs to be reevaluated). + if (!equalities.isEmpty) { + equalities.removeLast(); + } + pointer = equalities.isEmpty ? -1 : equalities.last; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = null; + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + _diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.length) { + if (diffs[pointer - 1].operation == DIFF_DELETE + && diffs[pointer].operation == DIFF_INSERT) { + String deletion = diffs[pointer - 1].text; + String insertion = diffs[pointer].text; + int overlap_length1 = _diff_commonOverlap(deletion, insertion); + int overlap_length2 = _diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length / 2 || + overlap_length1 >= insertion.length / 2) { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.insertRange(pointer, 1, + new Diff(DIFF_EQUAL, insertion.substring(0, overlap_length1))); + diffs[pointer - 1].text = + deletion.substring(0, deletion.length - overlap_length1); + diffs[pointer + 1].text = insertion.substring(overlap_length1); + pointer++; + } + } else { + if (overlap_length2 >= deletion.length / 2 || + overlap_length2 >= insertion.length / 2) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.insertRange(pointer, 1, + new Diff(DIFF_EQUAL, deletion.substring(0, overlap_length2))); + diffs[pointer - 1] = new Diff(DIFF_INSERT, + insertion.substring(0, insertion.length - overlap_length2)); + diffs[pointer + 1] = new Diff(DIFF_DELETE, + deletion.substring(overlap_length2)); + pointer++; + } + } + pointer++; + } + pointer++; + } + } + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * [diffs] is a List of Diff objects. + */ + void _diff_cleanupSemanticLossless(List diffs) { + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * Closure, but does not reference any external variables. + * [one] the first string. + * [two] the second string. + * Returns the score. + */ + int _diff_cleanupSemanticScore(String one, String two) { + if (one.isEmpty || two.isEmpty) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + String char1 = one[one.length - 1]; + String char2 = two[0]; + bool nonAlphaNumeric1 = char1.contains(nonAlphaNumericRegex_); + bool nonAlphaNumeric2 = char2.contains(nonAlphaNumericRegex_); + bool whitespace1 = nonAlphaNumeric1 && char1.contains(whitespaceRegex_); + bool whitespace2 = nonAlphaNumeric2 && char2.contains(whitespaceRegex_); + bool lineBreak1 = whitespace1 && char1.contains(linebreakRegex_); + bool lineBreak2 = whitespace2 && char2.contains(linebreakRegex_); + bool blankLine1 = lineBreak1 && one.contains(blanklineEndRegex_); + bool blankLine2 = lineBreak2 && two.contains(blanklineStartRegex_); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1].operation == DIFF_EQUAL + && diffs[pointer + 1].operation == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + String equality1 = diffs[pointer - 1].text; + String edit = diffs[pointer].text; + String equality2 = diffs[pointer + 1].text; + + // First, shift the edit as far left as possible. + int commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset != 0) { + String commonString = edit.substring(edit.length - commonOffset); + equality1 = equality1.substring(0, equality1.length - commonOffset); + edit = + '$commonString${edit.substring(0, edit.length - commonOffset)}'; + equality2 = '$commonString$equality2'; + } + + // Second, step character by character right, looking for the best fit. + String bestEquality1 = equality1; + String bestEdit = edit; + String bestEquality2 = equality2; + int bestScore = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2); + while (!edit.isEmpty && !equality2.isEmpty + && edit[0] == equality2[0]) { + equality1 = '$equality1${edit[0]}'; + edit = '${edit.substring(1)}${equality2[0]}'; + equality2 = equality2.substring(1); + int score = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (diffs[pointer - 1].text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.isEmpty) { + diffs[pointer - 1].text = bestEquality1; + } else { + diffs.removeRange(pointer - 1, 1); + pointer--; + } + diffs[pointer].text = bestEdit; + if (!bestEquality2.isEmpty) { + diffs[pointer + 1].text = bestEquality2; + } else { + diffs.removeRange(pointer + 1, 1); + pointer--; + } + } + } + pointer++; + } + } + + // Define some regex patterns for matching boundaries. + RegExp nonAlphaNumericRegex_ = new RegExp(r'[^a-zA-Z0-9]'); + RegExp whitespaceRegex_ = new RegExp(r'\s'); + RegExp linebreakRegex_ = new RegExp(r'[\r\n]'); + RegExp blanklineEndRegex_ = new RegExp(r'\n\r?\n$'); + RegExp blanklineStartRegex_ = new RegExp(r'^\r?\n\r?\n'); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * [diffs] is a List of Diff objects. + */ + void diff_cleanupEfficiency(List diffs) { + bool changes = false; + // Stack of indices where equalities are found. + final equalities = []; + // Always equal to diffs[equalities.last()].text + String lastequality = null; + int pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while (pointer < diffs.length) { + if (diffs[pointer].operation == DIFF_EQUAL) { // Equality found. + if (diffs[pointer].text.length < Diff_EditCost + && (post_ins || post_del)) { + // Candidate found. + equalities.addLast(pointer); + pre_ins = post_ins; + pre_del = post_del; + lastequality = diffs[pointer].text; + } else { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = null; + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer].operation == DIFF_DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (lastequality != null + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + + (post_del ? 1 : 0)) == 3))) { + // Duplicate record. + diffs.insertRange(equalities.last, 1, + new Diff(DIFF_DELETE, lastequality)); + // Change second copy to insert. + diffs[equalities.last + 1].operation = DIFF_INSERT; + equalities.removeLast(); // Throw away the equality we just deleted. + lastequality = null; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + } else { + if (!equalities.isEmpty) { + equalities.removeLast(); + } + pointer = equalities.isEmpty ? -1 : equalities.last; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if (changes) { + diff_cleanupMerge(diffs); + } + } + + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * [diffs] is a List of Diff objects. + */ + void diff_cleanupMerge(List diffs) { + diffs.addLast(new Diff(DIFF_EQUAL, '')); // Add a dummy entry at the end. + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + String text_delete = ''; + String text_insert = ''; + int commonlength; + while (pointer < diffs.length) { + switch (diffs[pointer].operation) { + case DIFF_INSERT: + count_insert++; + text_insert = '$text_insert${diffs[pointer].text}'; + pointer++; + break; + case DIFF_DELETE: + count_delete++; + text_delete = '$text_delete${diffs[pointer].text}'; + pointer++; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete != 0 && count_insert != 0) { + // Factor out any common prefixies. + commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if ((pointer - count_delete - count_insert) > 0 + && diffs[pointer - count_delete - count_insert - 1] + .operation == DIFF_EQUAL) { + final i = pointer - count_delete - count_insert - 1; + diffs[i].text = '${diffs[i].text}' + '${text_insert.substring(0, commonlength)}'; + } else { + diffs.insertRange(0, 1, new Diff(DIFF_EQUAL, + text_insert.substring(0, commonlength))); + pointer++; + } + text_insert = text_insert.substring(commonlength); + text_delete = text_delete.substring(commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + diffs[pointer].text = + '${text_insert.substring(text_insert.length + - commonlength)}${diffs[pointer].text}'; + text_insert = text_insert.substring(0, text_insert.length + - commonlength); + text_delete = text_delete.substring(0, text_delete.length + - commonlength); + } + } + // Delete the offending records and add the merged ones. + if (count_delete == 0) { + diffs.removeRange(pointer - count_insert, count_insert); + diffs.insertRange(pointer - count_insert, 1, + new Diff(DIFF_INSERT, text_insert)); + } else if (count_insert == 0) { + diffs.removeRange(pointer - count_delete, count_delete); + diffs.insertRange(pointer - count_delete, 1, + new Diff(DIFF_DELETE, text_delete)); + } else { + diffs.removeRange(pointer - count_delete - count_insert, + count_delete + count_insert); + diffs.insertRange(pointer - count_delete - count_insert, 1, + new Diff(DIFF_INSERT, text_insert)); + diffs.insertRange(pointer - count_delete - count_insert, 1, + new Diff(DIFF_DELETE, text_delete)); + } + pointer = pointer - count_delete - count_insert + + (count_delete == 0 ? 0 : 1) + + (count_insert == 0 ? 0 : 1) + 1; + } else if (pointer != 0 && diffs[pointer - 1].operation + == DIFF_EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1].text = + '${diffs[pointer - 1].text}${diffs[pointer].text}'; + diffs.removeRange(pointer, 1); + } else { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + } + if (diffs.last.text.isEmpty) { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by equalities + // which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1].operation == DIFF_EQUAL + && diffs[pointer + 1].operation == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + if (diffs[pointer].text.endsWith(diffs[pointer - 1].text)) { + // Shift the edit over the previous equality. + diffs[pointer].text = '${diffs[pointer - 1].text}' + '${diffs[pointer].text.substring(0, + diffs[pointer].text.length - diffs[pointer - 1].text.length)}'; + diffs[pointer + 1].text = + '${diffs[pointer - 1].text}${diffs[pointer + 1].text}'; + diffs.removeRange(pointer - 1, 1); + changes = true; + } else if (diffs[pointer].text.startsWith(diffs[pointer + 1].text)) { + // Shift the edit over the next equality. + diffs[pointer - 1].text = + '${diffs[pointer - 1].text}${diffs[pointer + 1].text}'; + diffs[pointer].text = + '${diffs[pointer].text.substring(diffs[pointer + 1].text.length)}' + '${diffs[pointer + 1].text}'; + diffs.removeRange(pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } + } + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * [diffs] is a List of Diff objects. + * [loc] is the location within text1. + * Returns the location within text2. + */ + int diff_xIndex(List diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff = null; + for (Diff aDiff in diffs) { + if (aDiff.operation != DIFF_INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length; + } + if (aDiff.operation != DIFF_DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length; + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff != null && lastDiff.operation == DIFF_DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); + } + + /** + * Convert a Diff list into a pretty HTML report. + * [diffs] is a List of Diff objects. + * Returns an HTML representation. + */ + String diff_prettyHtml(List diffs) { + final html = new StringBuffer(); + for (Diff aDiff in diffs) { + String text = aDiff.text.replaceAll('&', '&').replaceAll('<', '<') + .replaceAll('>', '>').replaceAll('\n', '¶
'); + switch (aDiff.operation) { + case DIFF_INSERT: + html.add('').add(text) + .add(''); + break; + case DIFF_DELETE: + html.add('').add(text) + .add(''); + break; + case DIFF_EQUAL: + html.add('').add(text).add(''); + break; + } + } + return html.toString(); + } + + /** + * Compute and return the source text (all equalities and deletions). + * [diffs] is a List of Diff objects. + * Returns the source text. + */ + String diff_text1(List diffs) { + final text = new StringBuffer(); + for (Diff aDiff in diffs) { + if (aDiff.operation != DIFF_INSERT) { + text.add(aDiff.text); + } + } + return text.toString(); + } + + /** + * Compute and return the destination text (all equalities and insertions). + * [diffs] is a List of Diff objects. + * Returns the destination text. + */ + String diff_text2(List diffs) { + final text = new StringBuffer(); + for (Diff aDiff in diffs) { + if (aDiff.operation != DIFF_DELETE) { + text.add(aDiff.text); + } + } + return text.toString(); + } + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * [diffs] is a List of Diff objects. + * Returns the number of changes. + */ + int diff_levenshtein(List diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + for (Diff aDiff in diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + insertions += aDiff.text.length; + break; + case DIFF_DELETE: + deletions += aDiff.text.length; + break; + case DIFF_EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += max(insertions, deletions); + return levenshtein; + } + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * [diffs] is a List of Diff objects. + * Returns the delta text. + */ + String diff_toDelta(List diffs) { + final text = new StringBuffer(); + for (Diff aDiff in diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + text.add('+').add(encodeUri(aDiff.text)).add('\t'); + break; + case DIFF_DELETE: + text.add('-').add(aDiff.text.length).add('\t'); + break; + case DIFF_EQUAL: + text.add('=').add(aDiff.text.length).add('\t'); + break; + } + } + String delta = text.toString(); + if (!delta.isEmpty) { + // Strip off trailing tab character. + delta = delta.substring(0, delta.length - 1); + } + return delta.replaceAll('%20', ' '); + } + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * [text1] is the source string for the diff. + * [delta] is the delta text. + * Returns a List of Diff objects or null if invalid. + * Throws ArgumentError if invalid input. + */ + List diff_fromDelta(String text1, String delta) { + final diffs = []; + int pointer = 0; // Cursor in text1 + final tokens = delta.split('\t'); + for (String token in tokens) { + if (token.length == 0) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + String param = token.substring(1); + switch (token[0]) { + case '+': + // decode would change all "+" to " " + param = param.replaceAll('+', '%2B'); + try { + param = decodeUri(param); + } on ArgumentError catch (e) { + // Malformed URI sequence. + throw new ArgumentError( + 'Illegal escape in diff_fromDelta: $param'); + } + diffs.add(new Diff(DIFF_INSERT, param)); + break; + case '-': + // Fall through. + case '=': + int n; + try { + n = int.parse(param); + } on FormatException catch (e) { + throw new ArgumentError( + 'Invalid number in diff_fromDelta: $param'); + } + if (n < 0) { + throw new ArgumentError( + 'Negative number in diff_fromDelta: $param'); + } + String text; + try { + text = text1.substring(pointer, pointer += n); + } on RangeError catch (e) { + throw new ArgumentError('Delta length ($pointer)' + ' larger than source text length (${text1.length}).'); + } + if (token[0] == '=') { + diffs.add(new Diff(DIFF_EQUAL, text)); + } else { + diffs.add(new Diff(DIFF_DELETE, text)); + } + break; + default: + // Anything else is an error. + throw new ArgumentError( + 'Invalid diff operation in diff_fromDelta: ${token[0]}'); + } + } + if (pointer != text1.length) { + throw new ArgumentError('Delta length ($pointer)' + ' smaller than source text length (${text1.length}).'); + } + return diffs; + } + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * [text] is the text to search. + * [pattern] is the pattern to search for. + * [loc] is the location to search around. + * Returns the best match index or -1. + */ + int match_main(String text, String pattern, int loc) { + // Check for null inputs. + if (text == null || pattern == null) { + throw new ArgumentError('Null inputs. (match_main)'); + } + + loc = max(0, min(loc, text.length)); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.length == 0) { + // Nothing to match. + return -1; + } else if (loc + pattern.length <= text.length + && text.substring(loc, loc + pattern.length) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return _match_bitap(text, pattern, loc); + } + } + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * [text] is the the text to search. + * [pattern] is the pattern to search for. + * [loc] is the location to search around. + * Returns the best match index or -1. + */ + int _match_bitap(String text, String pattern, int loc) { + Expect.isTrue(Match_MaxBits == 0 || pattern.length <= Match_MaxBits, + 'Pattern too long for this application.'); + + // Initialise the alphabet. + Map s = _match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = min(_match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length); + if (best_loc != -1) { + score_threshold = min(_match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + final matchmask = 1 << (pattern.length - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length + text.length; + List last_rd; + for (int d = 0; d < pattern.length; d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (_match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = ((bin_max - bin_min) / 2 + bin_min).toInt(); + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + int start = max(1, loc - bin_mid + 1); + int finish = min(loc + bin_mid, text.length) + pattern.length; + + final rd = new List(finish + 2); + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.length <= j - 1 || !s.containsKey(text[j - 1])) { + // Out of range. + charMatch = 0; + } else { + charMatch = s[text[j - 1]]; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = _match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (_match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = rd; + } + return best_loc; + } + + /** + * Compute and return the score for a match with e errors and x location. + * [e] is the number of errors in match. + * [x] is the location of match. + * [loc] is the expected location of match. + * [pattern] is the pattern being sought. + * Returns the overall score for match (0.0 = good, 1.0 = bad). + */ + double _match_bitapScore(int e, int x, int loc, String pattern) { + final accuracy = e / pattern.length; + final proximity = (loc - x).abs(); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + proximity / Match_Distance; + } + + /** + * Initialise the alphabet for the Bitap algorithm. + * [pattern] is the the text to encode. + * Returns a Map of character locations. + */ + Map _match_alphabet(String pattern) { + final s = new HashMap(); + for (int i = 0; i < pattern.length; i++) { + s[pattern[i]] = 0; + } + for (int i = 0; i < pattern.length; i++) { + s[pattern[i]] = s[pattern[i]] | (1 << (pattern.length - i - 1)); + } + return s; + } + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * [patch] is the phe patch to grow. + * [text] is the source text. + */ + void _patch_addContext(Patch patch, String text) { + if (text.isEmpty) { + return; + } + String pattern = text.substring(patch.start2, patch.start2 + patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) + && pattern.length < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = text.substring(max(0, patch.start2 - padding), + min(text.length, patch.start2 + patch.length1 + padding)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + final prefix = text.substring(max(0, patch.start2 - padding), + patch.start2); + if (!prefix.isEmpty) { + patch.diffs.insertRange(0, 1, new Diff(DIFF_EQUAL, prefix)); + } + // Add the suffix. + final suffix = text.substring(patch.start2 + patch.length1, + min(text.length, patch.start2 + patch.length1 + padding)); + if (!suffix.isEmpty) { + patch.diffs.addLast(new Diff(DIFF_EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.length; + patch.start2 -= prefix.length; + // Extend the lengths. + patch.length1 += prefix.length + suffix.length; + patch.length2 += prefix.length + suffix.length; + } + + /** + * Compute a list of patches to turn text1 into text2. + * Use diffs if provided, otherwise compute it ourselves. + * There are four ways to call this function, depending on what data is + * available to the caller: + * Method 1: + * [a] = text1, [opt_b] = text2 + * Method 2: + * [a] = diffs + * Method 3 (optimal): + * [a] = text1, [opt_b] = diffs + * Method 4 (deprecated, use method 3): + * [a] = text1, [opt_b] = text2, [opt_c] = diffs + * Returns a List of Patch objects. + */ + List patch_make(a, [opt_b, opt_c]) { + String text1; + List diffs; + if (a is String && opt_b is String && opt_c == null) { + // Method 1: text1, text2 + // Compute diffs from text1 and text2. + text1 = a; + diffs = diff_main(text1, opt_b, true); + if (diffs.length > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + } else if (a is List && opt_b == null && opt_c == null) { + // Method 2: diffs + // Compute text1 from diffs. + diffs = a; + text1 = diff_text1(diffs); + } else if (a is String && opt_b is List && opt_c == null) { + // Method 3: text1, diffs + text1 = a; + diffs = opt_b; + } else if (a is String && opt_b is String && opt_c is List) { + // Method 4: text1, text2, diffs + // text2 is not used. + text1 = a; + diffs = opt_c; + } else { + throw new ArgumentError('Unknown call format to patch_make.'); + } + + final patches = []; + if (diffs.isEmpty) { + return patches; // Get rid of the null case. + } + Patch patch = new Patch(); + final postpatch_buffer = new StringBuffer(); + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + String prepatch_text = text1; + String postpatch_text = text1; + for (Diff aDiff in diffs) { + if (patch.diffs.isEmpty && aDiff.operation != DIFF_EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case DIFF_INSERT: + patch.diffs.add(aDiff); + patch.length2 += aDiff.text.length; + postpatch_buffer.clear(); + postpatch_text = + postpatch_buffer.add(postpatch_text.substring(0, char_count2)) + .add(aDiff.text).add(postpatch_text.substring(char_count2)) + .toString(); + break; + case DIFF_DELETE: + patch.length1 += aDiff.text.length; + patch.diffs.add(aDiff); + postpatch_buffer.clear(); + postpatch_text = + postpatch_buffer.add(postpatch_text.substring(0, char_count2)) + .add(postpatch_text.substring(char_count2 + aDiff.text.length)) + .toString(); + break; + case DIFF_EQUAL: + if (aDiff.text.length <= 2 * Patch_Margin + && !patch.diffs.isEmpty && aDiff != diffs.last) { + // Small equality inside a patch. + patch.diffs.add(aDiff); + patch.length1 += aDiff.text.length; + patch.length2 += aDiff.text.length; + } + + if (aDiff.text.length >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.isEmpty) { + _patch_addContext(patch, prepatch_text); + patches.add(patch); + patch = new Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != DIFF_INSERT) { + char_count1 += aDiff.text.length; + } + if (aDiff.operation != DIFF_DELETE) { + char_count2 += aDiff.text.length; + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.isEmpty) { + _patch_addContext(patch, prepatch_text); + patches.add(patch); + } + + return patches; + } + + /** + * Given an array of patches, return another array that is identical. + * [patches] is a List of Patch objects. + * Returns a List of Patch objects. + */ + List patch_deepCopy(List patches) { + final patchesCopy = []; + for (Patch aPatch in patches) { + final patchCopy = new Patch(); + for (Diff aDiff in aPatch.diffs) { + patchCopy.diffs.add(new Diff(aDiff.operation, aDiff.text)); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.add(patchCopy); + } + return patchesCopy; + } + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * [patches] is a List of Patch objects + * [text] is the old text. + * Returns a two element List, containing the new text and a List of + * bool values. + */ + List patch_apply(List patches, String text) { + if (patches.isEmpty) { + return [text, []]; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy(patches); + + final nullPadding = patch_addPadding(patches); + text = '$nullPadding$text$nullPadding'; + patch_splitMax(patches); + + final text_buffer = new StringBuffer(); + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + final results = new List(patches.length); + for (Patch aPatch in patches) { + int expected_loc = aPatch.start2 + delta; + String text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.length > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main(text, + text1.substring(0, Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, + text1.substring(text1.length - Match_MaxBits), + expected_loc + text1.length - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + String text2; + if (end_loc == -1) { + text2 = text.substring(start_loc, + min(start_loc + text1.length, text.length)); + } else { + text2 = text.substring(start_loc, + min(end_loc + Match_MaxBits, text.length)); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text_buffer.clear(); + text = text_buffer.add(text.substring(0, start_loc)) + .add(diff_text2(aPatch.diffs)) + .add(text.substring(start_loc + text1.length)).toString(); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + final diffs = diff_main(text1, text2, false); + if (text1.length > Match_MaxBits + && diff_levenshtein(diffs) / text1.length + > Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + _diff_cleanupSemanticLossless(diffs); + int index1 = 0; + for (Diff aDiff in aPatch.diffs) { + if (aDiff.operation != DIFF_EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == DIFF_INSERT) { + // Insertion + text_buffer.clear(); + text = text_buffer.add(text.substring(0, start_loc + index2)) + .add(aDiff.text) + .add(text.substring(start_loc + index2)).toString(); + } else if (aDiff.operation == DIFF_DELETE) { + // Deletion + text_buffer.clear(); + text = text_buffer.add(text.substring(0, start_loc + index2)) + .add(text.substring(start_loc + diff_xIndex(diffs, + index1 + aDiff.text.length))).toString(); + } + } + if (aDiff.operation != DIFF_DELETE) { + index1 += aDiff.text.length; + } + } + } + } + } + x++; + } + // Strip the padding off. + text = text.substring(nullPadding.length, text.length - nullPadding.length); + return [text, results]; + } + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * [patches] is a List of Patch objects. + * Returns the padding string added to each side. + */ + String patch_addPadding(List patches) { + final paddingLength = Patch_Margin; + final paddingCodes = []; + for (int x = 1; x <= paddingLength; x++) { + paddingCodes.add(x); + } + String nullPadding = new String.fromCharCodes(paddingCodes); + + // Bump all the patches forward. + for (Patch aPatch in patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch patch = patches[0]; + List diffs = patch.diffs; + if (diffs.isEmpty || diffs[0].operation != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.insertRange(0, 1, new Diff(DIFF_EQUAL, nullPadding)); + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs[0].text.length) { + // Grow first equality. + Diff firstDiff = diffs[0]; + int extraLength = paddingLength - firstDiff.text.length; + firstDiff.text = + '${nullPadding.substring(firstDiff.text.length)}${firstDiff.text}'; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches.last; + diffs = patch.diffs; + if (diffs.isEmpty || diffs.last.operation != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.addLast(new Diff(DIFF_EQUAL, nullPadding)); + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs.last.text.length) { + // Grow last equality. + Diff lastDiff = diffs.last; + int extraLength = paddingLength - lastDiff.text.length; + lastDiff.text = + '${lastDiff.text}${nullPadding.substring(0, extraLength)}'; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; + } + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * [patches] is a List of Patch objects. + */ + patch_splitMax(List patches) { + final patch_size = Match_MaxBits; + for (var x = 0; x < patches.length; x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + Patch bigpatch = patches[x]; + // Remove the big old patch. + patches.removeRange(x--, 1); + int start1 = bigpatch.start1; + int start2 = bigpatch.start2; + String precontext = ''; + while (!bigpatch.diffs.isEmpty) { + // Create one of several smaller patches. + final patch = new Patch(); + bool empty = true; + patch.start1 = start1 - precontext.length; + patch.start2 = start2 - precontext.length; + if (!precontext.isEmpty) { + patch.length1 = patch.length2 = precontext.length; + patch.diffs.add(new Diff(DIFF_EQUAL, precontext)); + } + while (!bigpatch.diffs.isEmpty + && patch.length1 < patch_size - Patch_Margin) { + int diff_type = bigpatch.diffs[0].operation; + String diff_text = bigpatch.diffs[0].text; + if (diff_type == DIFF_INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length; + start2 += diff_text.length; + patch.diffs.addLast(bigpatch.diffs[0]); + bigpatch.diffs.removeRange(0, 1); + empty = false; + } else if (diff_type == DIFF_DELETE && patch.diffs.length == 1 + && patch.diffs[0].operation == DIFF_EQUAL + && diff_text.length > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length; + start1 += diff_text.length; + empty = false; + patch.diffs.add(new Diff(diff_type, diff_text)); + bigpatch.diffs.removeRange(0, 1); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substring(0, min(diff_text.length, + patch_size - patch.length1 - Patch_Margin)); + patch.length1 += diff_text.length; + start1 += diff_text.length; + if (diff_type == DIFF_EQUAL) { + patch.length2 += diff_text.length; + start2 += diff_text.length; + } else { + empty = false; + } + patch.diffs.add(new Diff(diff_type, diff_text)); + if (diff_text == bigpatch.diffs[0].text) { + bigpatch.diffs.removeRange(0, 1); + } else { + bigpatch.diffs[0].text = bigpatch.diffs[0].text + .substring(diff_text.length); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = precontext.substring(max(0, precontext.length + - Patch_Margin)); + // Append the end context for this patch. + String postcontext; + if (diff_text1(bigpatch.diffs).length > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + if (!postcontext.isEmpty) { + patch.length1 += postcontext.length; + patch.length2 += postcontext.length; + if (!patch.diffs.isEmpty + && patch.diffs.last.operation == DIFF_EQUAL) { + patch.diffs.last.text = '${patch.diffs.last.text}$postcontext'; + } else { + patch.diffs.add(new Diff(DIFF_EQUAL, postcontext)); + } + } + if (!empty) { + patches.insertRange(++x, 1, patch); + } + } + } + } + + /** + * Take a list of patches and return a textual representation. + * [patches] is a List of Patch objects. + * Returns a text representation of patches. + */ + String patch_toText(List patches) { + final text = new StringBuffer(); + for (Patch aPatch in patches) { + text.add(aPatch); + } + return text.toString(); + } + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * [textline] is a text representation of patches. + * Returns a List of Patch objects. + * Throws ArgumentError if invalid input. + */ + List patch_fromText(String textline) { + final patches = []; + if (textline.isEmpty) { + return patches; + } + final text = textline.split('\n'); + int textPointer = 0; + final patchHeader + = new RegExp('^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@\$'); + while (textPointer < text.length) { + Match m = patchHeader.firstMatch(text[textPointer]); + if (m == null) { + throw new ArgumentError( + 'Invalid patch string: ${text[textPointer]}'); + } + final patch = new Patch(); + patches.add(patch); + patch.start1 = int.parse(m.group(1)); + if (m.group(2).isEmpty) { + patch.start1--; + patch.length1 = 1; + } else if (m.group(2) == '0') { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = int.parse(m.group(2)); + } + + patch.start2 = int.parse(m.group(3)); + if (m.group(4).isEmpty) { + patch.start2--; + patch.length2 = 1; + } else if (m.group(4) == '0') { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = int.parse(m.group(4)); + } + textPointer++; + + while (textPointer < text.length) { + if (!text[textPointer].isEmpty) { + final sign = text[textPointer][0]; + String line; + try { + line = decodeUri(text[textPointer].substring(1)); + } on ArgumentError catch (e) { + // Malformed URI sequence. + throw new ArgumentError( + 'Illegal escape in patch_fromText: $line'); + } + if (sign == '-') { + // Deletion. + patch.diffs.add(new Diff(DIFF_DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.add(new Diff(DIFF_INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.add(new Diff(DIFF_EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw new ArgumentError( + 'Invalid patch mode "$sign" in: $line'); + } + } + textPointer++; + } + } + return patches; + } +} diff --git a/dart/DiffClass.dart b/dart/DiffClass.dart new file mode 100644 index 0000000..4c8a8f3 --- /dev/null +++ b/dart/DiffClass.dart @@ -0,0 +1,57 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +part of DiffMatchPatch; + +/** + * Class representing one diff operation. + */ +class Diff { + /** + * One of: DIFF_INSERT, DIFF_DELETE or DIFF_EQUAL. + */ + int operation; + /** + * The text associated with this diff operation. + */ + String text; + + /** + * Constructor. Initializes the diff with the provided values. + * [operation] is one of DIFF_INSERT, DIFF_DELETE or DIFF_EQUAL. + * [text] is the text being applied. + */ + Diff(this.operation, this.text); + + /** + * Display a human-readable version of this Diff. + * Returns a text version. + */ + String toString() { + String prettyText = this.text.replaceAll('\n', '\u00b6'); + return 'Diff(${this.operation},"$prettyText")'; + } + + /** + * Is this Diff equivalent to another Diff? + * [other] is another Diff to compare against. + * Returns true or false. + */ + bool operator ==(Diff other) => + operation == other.operation && text == other.text; +} diff --git a/dart/DiffMatchPatch.dart b/dart/DiffMatchPatch.dart new file mode 100644 index 0000000..4383359 --- /dev/null +++ b/dart/DiffMatchPatch.dart @@ -0,0 +1,26 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +library DiffMatchPatch; + +import 'dart:math'; +import 'dart:uri'; + +part 'DMPClass.dart'; +part 'DiffClass.dart'; +part 'PatchClass.dart'; diff --git a/dart/DiffMatchPatchTest.dart b/dart/DiffMatchPatchTest.dart new file mode 100644 index 0000000..fbf630d --- /dev/null +++ b/dart/DiffMatchPatchTest.dart @@ -0,0 +1,871 @@ +/** + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the 'License'); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an 'AS IS' BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Can't import DiffMatchPatch library since the private functions would be +// unavailable. Instead, import all the source files. +import 'dart:math'; +import 'dart:uri'; +part 'DMPClass.dart'; +part 'DiffClass.dart'; +part 'PatchClass.dart'; + +List _diff_rebuildtexts(diffs) { + // Construct the two texts which made up the diff originally. + final text1 = new StringBuffer(); + final text2 = new StringBuffer(); + for (int x = 0; x < diffs.length; x++) { + if (diffs[x].operation != DIFF_INSERT) { + text1.add(diffs[x].text); + } + if (diffs[x].operation != DIFF_DELETE) { + text2.add(diffs[x].text); + } + } + return [text1.toString(), text2.toString()]; +} + +DiffMatchPatch dmp; + +// DIFF TEST FUNCTIONS + + +void testDiffCommonPrefix() { + // Detect any common prefix. + Expect.equals(0, dmp.diff_commonPrefix('abc', 'xyz'), 'diff_commonPrefix: Null case.'); + + Expect.equals(4, dmp.diff_commonPrefix('1234abcdef', '1234xyz'), 'diff_commonPrefix: Non-null case.'); + + Expect.equals(4, dmp.diff_commonPrefix('1234', '1234xyz'), 'diff_commonPrefix: Whole case.'); +} + +void testDiffCommonSuffix() { + // Detect any common suffix. + Expect.equals(0, dmp.diff_commonSuffix('abc', 'xyz'), 'diff_commonSuffix: Null case.'); + + Expect.equals(4, dmp.diff_commonSuffix('abcdef1234', 'xyz1234'), 'diff_commonSuffix: Non-null case.'); + + Expect.equals(4, dmp.diff_commonSuffix('1234', 'xyz1234'), 'diff_commonSuffix: Whole case.'); +} + +void testDiffCommonOverlap() { + // Detect any suffix/prefix overlap. + Expect.equals(0, dmp._diff_commonOverlap('', 'abcd'), 'diff_commonOverlap: Null case.'); + + Expect.equals(3, dmp._diff_commonOverlap('abc', 'abcd'), 'diff_commonOverlap: Whole case.'); + + Expect.equals(0, dmp._diff_commonOverlap('123456', 'abcd'), 'diff_commonOverlap: No overlap.'); + + Expect.equals(3, dmp._diff_commonOverlap('123456xxx', 'xxxabcd'), 'diff_commonOverlap: Overlap.'); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + Expect.equals(0, dmp._diff_commonOverlap('fi', '\ufb01i'), 'diff_commonOverlap: Unicode.'); +} + +void testDiffHalfmatch() { + // Detect a halfmatch. + dmp.Diff_Timeout = 1.0; + Expect.isNull(dmp._diff_halfMatch('1234567890', 'abcdef'), 'diff_halfMatch: No match #1.'); + + Expect.isNull(dmp._diff_halfMatch('12345', '23'), 'diff_halfMatch: No match #2.'); + + Expect.listEquals(['12', '90', 'a', 'z', '345678'], dmp._diff_halfMatch('1234567890', 'a345678z'), 'diff_halfMatch: Single Match #1.'); + + Expect.listEquals(['a', 'z', '12', '90', '345678'], dmp._diff_halfMatch('a345678z', '1234567890'), 'diff_halfMatch: Single Match #2.'); + + Expect.listEquals(['abc', 'z', '1234', '0', '56789'], dmp._diff_halfMatch('abc56789z', '1234567890'), 'diff_halfMatch: Single Match #3.'); + + Expect.listEquals(['a', 'xyz', '1', '7890', '23456'], dmp._diff_halfMatch('a23456xyz', '1234567890'), 'diff_halfMatch: Single Match #4.'); + + Expect.listEquals(['12123', '123121', 'a', 'z', '1234123451234'], dmp._diff_halfMatch('121231234123451234123121', 'a1234123451234z'), 'diff_halfMatch: Multiple Matches #1.'); + + Expect.listEquals(['', '-=-=-=-=-=', 'x', '', 'x-=-=-=-=-=-=-='], dmp._diff_halfMatch('x-=-=-=-=-=-=-=-=-=-=-=-=', 'xx-=-=-=-=-=-=-='), 'diff_halfMatch: Multiple Matches #2.'); + + Expect.listEquals(['-=-=-=-=-=', '', '', 'y', '-=-=-=-=-=-=-=y'], dmp._diff_halfMatch('-=-=-=-=-=-=-=-=-=-=-=-=y', '-=-=-=-=-=-=-=yy'), 'diff_halfMatch: Multiple Matches #3.'); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + Expect.listEquals(['qHillo', 'w', 'x', 'Hulloy', 'HelloHe'], dmp._diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy'), 'diff_halfMatch: Non-optimal halfmatch.'); + + dmp.Diff_Timeout = 0.0; + Expect.isNull(dmp._diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy'), 'diff_halfMatch: Optimal no halfmatch.'); +} + +void testDiffLinesToChars() { + void assertLinesToCharsResultEquals(Map a, Map b, String error_msg) { + Expect.equals(a['chars1'], b['chars1'], error_msg); + Expect.equals(a['chars2'], b['chars2'], error_msg); + Expect.listEquals(a['lineArray'], b['lineArray'], error_msg); + } + + // Convert lines down to characters. + assertLinesToCharsResultEquals({'chars1': '\u0001\u0002\u0001', 'chars2': '\u0002\u0001\u0002', 'lineArray': ['', 'alpha\n', 'beta\n']}, dmp._diff_linesToChars('alpha\nbeta\nalpha\n', 'beta\nalpha\nbeta\n'), 'diff_linesToChars: Shared lines.'); + + assertLinesToCharsResultEquals({'chars1': '', 'chars2': '\u0001\u0002\u0003\u0003', 'lineArray': ['', 'alpha\r\n', 'beta\r\n', '\r\n']}, dmp._diff_linesToChars('', 'alpha\r\nbeta\r\n\r\n\r\n'), 'diff_linesToChars: Empty string and blank lines.'); + + assertLinesToCharsResultEquals({'chars1': '\u0001', 'chars2': '\u0002', 'lineArray': ['', 'a', 'b']}, dmp._diff_linesToChars('a', 'b'), 'diff_linesToChars: No linebreaks.'); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + List lineList = []; + StringBuffer charList = new StringBuffer(); + for (int x = 1; x < n + 1; x++) { + lineList.add('$x\n'); + charList.add(new String.fromCharCodes([x])); + } + Expect.equals(n, lineList.length); + String lines = Strings.join(lineList, ''); + String chars = charList.toString(); + Expect.equals(n, chars.length); + lineList.insertRange(0, 1, ''); + assertLinesToCharsResultEquals({'chars1': chars, 'chars2': '', 'lineArray': lineList}, dmp._diff_linesToChars(lines, ''), 'diff_linesToChars: More than 256.'); +} + +void testDiffCharsToLines() { + // First check that Diff equality works. + Expect.isTrue(new Diff(DIFF_EQUAL, 'a') == new Diff(DIFF_EQUAL, 'a'), 'diff_charsToLines: Equality #1.'); + + Expect.equals(new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_EQUAL, 'a'), 'diff_charsToLines: Equality #2.'); + + // Convert chars up to lines. + List diffs = [new Diff(DIFF_EQUAL, '\u0001\u0002\u0001'), new Diff(DIFF_INSERT, '\u0002\u0001\u0002')]; + dmp._diff_charsToLines(diffs, ['', 'alpha\n', 'beta\n']); + Expect.listEquals([new Diff(DIFF_EQUAL, 'alpha\nbeta\nalpha\n'), new Diff(DIFF_INSERT, 'beta\nalpha\nbeta\n')], diffs, 'diff_charsToLines: Shared lines.'); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + List lineList = []; + StringBuffer charList = new StringBuffer(); + for (int x = 1; x < n + 1; x++) { + lineList.add('$x\n'); + charList.add(new String.fromCharCodes([x])); + } + Expect.equals(n, lineList.length); + String lines = Strings.join(lineList, ''); + String chars = charList.toString(); + Expect.equals(n, chars.length); + lineList.insertRange(0, 1, ''); + diffs = [new Diff(DIFF_DELETE, chars)]; + dmp._diff_charsToLines(diffs, lineList); + Expect.listEquals([new Diff(DIFF_DELETE, lines)], diffs, 'diff_charsToLines: More than 256.'); +} + +void testDiffCleanupMerge() { + // Cleanup a messy diff. + List diffs = []; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([], diffs, 'diff_cleanupMerge: Null case.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_INSERT, 'c')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_INSERT, 'c')], diffs, 'diff_cleanupMerge: No change case.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_EQUAL, 'b'), new Diff(DIFF_EQUAL, 'c')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'abc')], diffs, 'diff_cleanupMerge: Merge equalities.'); + + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_DELETE, 'c')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abc')], diffs, 'diff_cleanupMerge: Merge deletions.'); + + diffs = [new Diff(DIFF_INSERT, 'a'), new Diff(DIFF_INSERT, 'b'), new Diff(DIFF_INSERT, 'c')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_INSERT, 'abc')], diffs, 'diff_cleanupMerge: Merge insertions.'); + + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, 'b'), new Diff(DIFF_DELETE, 'c'), new Diff(DIFF_INSERT, 'd'), new Diff(DIFF_EQUAL, 'e'), new Diff(DIFF_EQUAL, 'f')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'ac'), new Diff(DIFF_INSERT, 'bd'), new Diff(DIFF_EQUAL, 'ef')], diffs, 'diff_cleanupMerge: Merge interweave.'); + + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, 'abc'), new Diff(DIFF_DELETE, 'dc')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'd'), new Diff(DIFF_INSERT, 'b'), new Diff(DIFF_EQUAL, 'c')], diffs, 'diff_cleanupMerge: Prefix and suffix detection.'); + + diffs = [new Diff(DIFF_EQUAL, 'x'), new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, 'abc'), new Diff(DIFF_DELETE, 'dc'), new Diff(DIFF_EQUAL, 'y')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'xa'), new Diff(DIFF_DELETE, 'd'), new Diff(DIFF_INSERT, 'b'), new Diff(DIFF_EQUAL, 'cy')], diffs, 'diff_cleanupMerge: Prefix and suffix detection with equalities.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_INSERT, 'ba'), new Diff(DIFF_EQUAL, 'c')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_INSERT, 'ab'), new Diff(DIFF_EQUAL, 'ac')], diffs, 'diff_cleanupMerge: Slide edit left.'); + + diffs = [new Diff(DIFF_EQUAL, 'c'), new Diff(DIFF_INSERT, 'ab'), new Diff(DIFF_EQUAL, 'a')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'ca'), new Diff(DIFF_INSERT, 'ba')], diffs, 'diff_cleanupMerge: Slide edit right.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_EQUAL, 'c'), new Diff(DIFF_DELETE, 'ac'), new Diff(DIFF_EQUAL, 'x')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_EQUAL, 'acx')], diffs, 'diff_cleanupMerge: Slide edit left recursive.'); + + diffs = [new Diff(DIFF_EQUAL, 'x'), new Diff(DIFF_DELETE, 'ca'), new Diff(DIFF_EQUAL, 'c'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_EQUAL, 'a')]; + dmp.diff_cleanupMerge(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'xca'), new Diff(DIFF_DELETE, 'cba')], diffs, 'diff_cleanupMerge: Slide edit right recursive.'); +} + +void testDiffCleanupSemanticLossless() { + // Slide diffs to match logical boundaries. + List diffs = []; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([], diffs, 'diff_cleanupSemanticLossless: Null case.'); + + diffs = [new Diff(DIFF_EQUAL, 'AAA\r\n\r\nBBB'), new Diff(DIFF_INSERT, '\r\nDDD\r\n\r\nBBB'), new Diff(DIFF_EQUAL, '\r\nEEE')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'AAA\r\n\r\n'), new Diff(DIFF_INSERT, 'BBB\r\nDDD\r\n\r\n'), new Diff(DIFF_EQUAL, 'BBB\r\nEEE')], diffs, 'diff_cleanupSemanticLossless: Blank lines.'); + + diffs = [new Diff(DIFF_EQUAL, 'AAA\r\nBBB'), new Diff(DIFF_INSERT, ' DDD\r\nBBB'), new Diff(DIFF_EQUAL, ' EEE')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'AAA\r\n'), new Diff(DIFF_INSERT, 'BBB DDD\r\n'), new Diff(DIFF_EQUAL, 'BBB EEE')], diffs, 'diff_cleanupSemanticLossless: Line boundaries.'); + + diffs = [new Diff(DIFF_EQUAL, 'The c'), new Diff(DIFF_INSERT, 'ow and the c'), new Diff(DIFF_EQUAL, 'at.')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'The '), new Diff(DIFF_INSERT, 'cow and the '), new Diff(DIFF_EQUAL, 'cat.')], diffs, 'diff_cleanupSemanticLossless: Word boundaries.'); + + diffs = [new Diff(DIFF_EQUAL, 'The-c'), new Diff(DIFF_INSERT, 'ow-and-the-c'), new Diff(DIFF_EQUAL, 'at.')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'The-'), new Diff(DIFF_INSERT, 'cow-and-the-'), new Diff(DIFF_EQUAL, 'cat.')], diffs, 'diff_cleanupSemanticLossless: Alphanumeric boundaries.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_EQUAL, 'ax')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_EQUAL, 'aax')], diffs, 'diff_cleanupSemanticLossless: Hitting the start.'); + + diffs = [new Diff(DIFF_EQUAL, 'xa'), new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_EQUAL, 'a')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'xaa'), new Diff(DIFF_DELETE, 'a')], diffs, 'diff_cleanupSemanticLossless: Hitting the end.'); + + diffs = [new Diff(DIFF_EQUAL, 'The xxx. The '), new Diff(DIFF_INSERT, 'zzz. The '), new Diff(DIFF_EQUAL, 'yyy.')]; + dmp._diff_cleanupSemanticLossless(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'The xxx.'), new Diff(DIFF_INSERT, ' The zzz.'), new Diff(DIFF_EQUAL, ' The yyy.')], diffs, 'diff_cleanupSemanticLossless: Sentence boundaries.'); +} + +void testDiffCleanupSemantic() { + // Cleanup semantically trivial equalities. + List diffs = []; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([], diffs, 'diff_cleanupSemantic: Null case.'); + + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, 'cd'), new Diff(DIFF_EQUAL, '12'), new Diff(DIFF_DELETE, 'e')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, 'cd'), new Diff(DIFF_EQUAL, '12'), new Diff(DIFF_DELETE, 'e')], diffs, 'diff_cleanupSemantic: No elimination #1.'); + + diffs = [new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_INSERT, 'ABC'), new Diff(DIFF_EQUAL, '1234'), new Diff(DIFF_DELETE, 'wxyz')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_INSERT, 'ABC'), new Diff(DIFF_EQUAL, '1234'), new Diff(DIFF_DELETE, 'wxyz')], diffs, 'diff_cleanupSemantic: No elimination #2.'); + + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_EQUAL, 'b'), new Diff(DIFF_DELETE, 'c')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_INSERT, 'b')], diffs, 'diff_cleanupSemantic: Simple elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_EQUAL, 'cd'), new Diff(DIFF_DELETE, 'e'), new Diff(DIFF_EQUAL, 'f'), new Diff(DIFF_INSERT, 'g')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abcdef'), new Diff(DIFF_INSERT, 'cdfg')], diffs, 'diff_cleanupSemantic: Backpass elimination.'); + + diffs = [new Diff(DIFF_INSERT, '1'), new Diff(DIFF_EQUAL, 'A'), new Diff(DIFF_DELETE, 'B'), new Diff(DIFF_INSERT, '2'), new Diff(DIFF_EQUAL, '_'), new Diff(DIFF_INSERT, '1'), new Diff(DIFF_EQUAL, 'A'), new Diff(DIFF_DELETE, 'B'), new Diff(DIFF_INSERT, '2')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'AB_AB'), new Diff(DIFF_INSERT, '1A2_1A2')], diffs, 'diff_cleanupSemantic: Multiple elimination.'); + + diffs = [new Diff(DIFF_EQUAL, 'The c'), new Diff(DIFF_DELETE, 'ow and the c'), new Diff(DIFF_EQUAL, 'at.')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_EQUAL, 'The '), new Diff(DIFF_DELETE, 'cow and the '), new Diff(DIFF_EQUAL, 'cat.')], diffs, 'diff_cleanupSemantic: Word boundaries.'); + + diffs = [new Diff(DIFF_DELETE, 'abcxx'), new Diff(DIFF_INSERT, 'xxdef')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abcxx'), new Diff(DIFF_INSERT, 'xxdef')], diffs, 'diff_cleanupSemantic: No overlap elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'abcxxx'), new Diff(DIFF_INSERT, 'xxxdef')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_EQUAL, 'xxx'), new Diff(DIFF_INSERT, 'def')], diffs, 'diff_cleanupSemantic: Overlap elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'xxxabc'), new Diff(DIFF_INSERT, 'defxxx')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_INSERT, 'def'), new Diff(DIFF_EQUAL, 'xxx'), new Diff(DIFF_DELETE, 'abc')], diffs, 'diff_cleanupSemantic: Reverse overlap elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'abcd1212'), new Diff(DIFF_INSERT, '1212efghi'), new Diff(DIFF_EQUAL, '----'), new Diff(DIFF_DELETE, 'A3'), new Diff(DIFF_INSERT, '3BC')]; + dmp.diff_cleanupSemantic(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abcd'), new Diff(DIFF_EQUAL, '1212'), new Diff(DIFF_INSERT, 'efghi'), new Diff(DIFF_EQUAL, '----'), new Diff(DIFF_DELETE, 'A'), new Diff(DIFF_EQUAL, '3'), new Diff(DIFF_INSERT, 'BC')], diffs, 'diff_cleanupSemantic: Two overlap eliminations.'); +} + +void testDiffCleanupEfficiency() { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + List diffs = []; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([], diffs, 'diff_cleanupEfficiency: Null case.'); + + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'wxyz'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '34')]; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'wxyz'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '34')], diffs, 'diff_cleanupEfficiency: No elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'xyz'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '34')]; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abxyzcd'), new Diff(DIFF_INSERT, '12xyz34')], diffs, 'diff_cleanupEfficiency: Four-edit elimination.'); + + diffs = [new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'x'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '34')]; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'xcd'), new Diff(DIFF_INSERT, '12x34')], diffs, 'diff_cleanupEfficiency: Three-edit elimination.'); + + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'xy'), new Diff(DIFF_INSERT, '34'), new Diff(DIFF_EQUAL, 'z'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '56')]; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abxyzcd'), new Diff(DIFF_INSERT, '12xy34z56')], diffs, 'diff_cleanupEfficiency: Backpass elimination.'); + + dmp.Diff_EditCost = 5; + diffs = [new Diff(DIFF_DELETE, 'ab'), new Diff(DIFF_INSERT, '12'), new Diff(DIFF_EQUAL, 'wxyz'), new Diff(DIFF_DELETE, 'cd'), new Diff(DIFF_INSERT, '34')]; + dmp.diff_cleanupEfficiency(diffs); + Expect.listEquals([new Diff(DIFF_DELETE, 'abwxyzcd'), new Diff(DIFF_INSERT, '12wxyz34')], diffs, 'diff_cleanupEfficiency: High cost elimination.'); + dmp.Diff_EditCost = 4; +} + +void testDiffPrettyHtml() { + // Pretty print. + List diffs = [new Diff(DIFF_EQUAL, 'a\n'), new Diff(DIFF_DELETE, 'b'), new Diff(DIFF_INSERT, 'c&d')]; + Expect.equals('
<B>b</B>c&d', dmp.diff_prettyHtml(diffs), 'diff_prettyHtml:'); +} + +void testDiffText() { + // Compute the source and destination texts. + List diffs = [new Diff(DIFF_EQUAL, 'jump'), new Diff(DIFF_DELETE, 's'), new Diff(DIFF_INSERT, 'ed'), new Diff(DIFF_EQUAL, ' over '), new Diff(DIFF_DELETE, 'the'), new Diff(DIFF_INSERT, 'a'), new Diff(DIFF_EQUAL, ' lazy')]; + Expect.equals('jumps over the lazy', dmp.diff_text1(diffs), 'diff_text1:'); + Expect.equals('jumped over a lazy', dmp.diff_text2(diffs), 'diff_text2:'); +} + +void testDiffDelta() { + // Convert a diff into delta string. + List diffs = [new Diff(DIFF_EQUAL, 'jump'), new Diff(DIFF_DELETE, 's'), new Diff(DIFF_INSERT, 'ed'), new Diff(DIFF_EQUAL, ' over '), new Diff(DIFF_DELETE, 'the'), new Diff(DIFF_INSERT, 'a'), new Diff(DIFF_EQUAL, ' lazy'), new Diff(DIFF_INSERT, 'old dog')]; + String text1 = dmp.diff_text1(diffs); + Expect.equals('jumps over the lazy', text1, 'diff_text1: Base text.'); + + String delta = dmp.diff_toDelta(diffs); + Expect.equals('=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog', delta, 'diff_toDelta:'); + + // Convert delta string into a diff. + Expect.listEquals(diffs, dmp.diff_fromDelta(text1, delta), 'diff_fromDelta: Normal.'); + + // Generates error (19 < 20). + Expect.throws(() => dmp.diff_fromDelta('${text1}x', delta), null, 'diff_fromDelta: Too long.'); + + // Generates error (19 > 18). + Expect.throws(() => dmp.diff_fromDelta(text1.substring(1), delta), null, 'diff_fromDelta: Too short.'); + + // Generates error (%c3%xy invalid Unicode). + Expect.throws(() => dmp.diff_fromDelta('', '+%c3%xy'), null, 'diff_fromDelta: Invalid character.'); + + // Test deltas with special characters. + diffs = [new Diff(DIFF_EQUAL, '\u0680 \x00 \t %'), new Diff(DIFF_DELETE, '\u0681 \x01 \n ^'), new Diff(DIFF_INSERT, '\u0682 \x02 \\ |')]; + text1 = dmp.diff_text1(diffs); + Expect.equals('\u0680 \x00 \t %\u0681 \x01 \n ^', text1, 'diff_text1: Unicode text.'); + + delta = dmp.diff_toDelta(diffs); + Expect.equals('=7\t-7\t+%DA%82 %02 %5C %7C', delta, 'diff_toDelta: Unicode.'); + + Expect.listEquals(diffs, dmp.diff_fromDelta(text1, delta), 'diff_fromDelta: Unicode.'); + + // Verify pool of unchanged characters. + diffs = [new Diff(DIFF_INSERT, 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + \$ , # ')]; + String text2 = dmp.diff_text2(diffs); + Expect.equals('A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + \$ , # ', text2, 'diff_text2: Unchanged characters.'); + + delta = dmp.diff_toDelta(diffs); + Expect.equals('+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + \$ , # ', delta, 'diff_toDelta: Unchanged characters.'); + + // Convert delta string into a diff. + Expect.listEquals(diffs, dmp.diff_fromDelta('', delta), 'diff_fromDelta: Unchanged characters.'); +} + +void testDiffXIndex() { + // Translate a location in text1 to text2. + List diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, '1234'), new Diff(DIFF_EQUAL, 'xyz')]; + Expect.equals(5, dmp.diff_xIndex(diffs, 2), 'diff_xIndex: Translation on equality.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, '1234'), new Diff(DIFF_EQUAL, 'xyz')]; + Expect.equals(1, dmp.diff_xIndex(diffs, 3), 'diff_xIndex: Translation on deletion.'); +} + +void testDiffLevenshtein() { + List diffs = [new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_INSERT, '1234'), new Diff(DIFF_EQUAL, 'xyz')]; + Expect.equals(4, dmp.diff_levenshtein(diffs), 'Levenshtein with trailing equality.'); + + diffs = [new Diff(DIFF_EQUAL, 'xyz'), new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_INSERT, '1234')]; + Expect.equals(4, dmp.diff_levenshtein(diffs), 'Levenshtein with leading equality.'); + + diffs = [new Diff(DIFF_DELETE, 'abc'), new Diff(DIFF_EQUAL, 'xyz'), new Diff(DIFF_INSERT, '1234')]; + Expect.equals(7, dmp.diff_levenshtein(diffs), 'Levenshtein with middle equality.'); +} + +void testDiffBisect() { + // Normal. + String a = 'cat'; + String b = 'map'; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + List diffs = [new Diff(DIFF_DELETE, 'c'), new Diff(DIFF_INSERT, 'm'), new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 't'), new Diff(DIFF_INSERT, 'p')]; + // One year should be sufficient. + Date deadline = new Date.now().add(new Duration(days : 365)); + Expect.listEquals(diffs, dmp._diff_bisect(a, b, deadline), 'diff_bisect: Normal.'); + + // Timeout. + diffs = [new Diff(DIFF_DELETE, 'cat'), new Diff(DIFF_INSERT, 'map')]; + // Set deadline to one year ago. + deadline = new Date.now().subtract(new Duration(days : 365)); + Expect.listEquals(diffs, dmp._diff_bisect(a, b, deadline), 'diff_bisect: Timeout.'); +} + +void testDiffMain() { + // Perform a trivial diff. + List diffs = []; + Expect.listEquals(diffs, dmp.diff_main('', '', false), 'diff_main: Null case.'); + + diffs = [new Diff(DIFF_EQUAL, 'abc')]; + Expect.listEquals(diffs, dmp.diff_main('abc', 'abc', false), 'diff_main: Equality.'); + + diffs = [new Diff(DIFF_EQUAL, 'ab'), new Diff(DIFF_INSERT, '123'), new Diff(DIFF_EQUAL, 'c')]; + Expect.listEquals(diffs, dmp.diff_main('abc', 'ab123c', false), 'diff_main: Simple insertion.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, '123'), new Diff(DIFF_EQUAL, 'bc')]; + Expect.listEquals(diffs, dmp.diff_main('a123bc', 'abc', false), 'diff_main: Simple deletion.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_INSERT, '123'), new Diff(DIFF_EQUAL, 'b'), new Diff(DIFF_INSERT, '456'), new Diff(DIFF_EQUAL, 'c')]; + Expect.listEquals(diffs, dmp.diff_main('abc', 'a123b456c', false), 'diff_main: Two insertions.'); + + diffs = [new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, '123'), new Diff(DIFF_EQUAL, 'b'), new Diff(DIFF_DELETE, '456'), new Diff(DIFF_EQUAL, 'c')]; + Expect.listEquals(diffs, dmp.diff_main('a123b456c', 'abc', false), 'diff_main: Two deletions.'); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0.0; + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, 'b')]; + Expect.listEquals(diffs, dmp.diff_main('a', 'b', false), 'diff_main: Simple case #1.'); + + diffs = [new Diff(DIFF_DELETE, 'Apple'), new Diff(DIFF_INSERT, 'Banana'), new Diff(DIFF_EQUAL, 's are a'), new Diff(DIFF_INSERT, 'lso'), new Diff(DIFF_EQUAL, ' fruit.')]; + Expect.listEquals(diffs, dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false), 'diff_main: Simple case #2.'); + + diffs = [new Diff(DIFF_DELETE, 'a'), new Diff(DIFF_INSERT, '\u0680'), new Diff(DIFF_EQUAL, 'x'), new Diff(DIFF_DELETE, '\t'), new Diff(DIFF_INSERT, '\000')]; + Expect.listEquals(diffs, dmp.diff_main('ax\t', '\u0680x\000', false), 'diff_main: Simple case #3.'); + + diffs = [new Diff(DIFF_DELETE, '1'), new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, 'y'), new Diff(DIFF_EQUAL, 'b'), new Diff(DIFF_DELETE, '2'), new Diff(DIFF_INSERT, 'xab')]; + Expect.listEquals(diffs, dmp.diff_main('1ayb2', 'abxab', false), 'diff_main: Overlap #1.'); + + diffs = [new Diff(DIFF_INSERT, 'xaxcx'), new Diff(DIFF_EQUAL, 'abc'), new Diff(DIFF_DELETE, 'y')]; + Expect.listEquals(diffs, dmp.diff_main('abcy', 'xaxcxabc', false), 'diff_main: Overlap #2.'); + + diffs = [new Diff(DIFF_DELETE, 'ABCD'), new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_DELETE, '='), new Diff(DIFF_INSERT, '-'), new Diff(DIFF_EQUAL, 'bcd'), new Diff(DIFF_DELETE, '='), new Diff(DIFF_INSERT, '-'), new Diff(DIFF_EQUAL, 'efghijklmnopqrs'), new Diff(DIFF_DELETE, 'EFGHIJKLMNOefg')]; + Expect.listEquals(diffs, dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false), 'diff_main: Overlap #3.'); + + diffs = [new Diff(DIFF_INSERT, ' '), new Diff(DIFF_EQUAL, 'a'), new Diff(DIFF_INSERT, 'nd'), new Diff(DIFF_EQUAL, ' [[Pennsylvania]]'), new Diff(DIFF_DELETE, ' and [[New')]; + Expect.listEquals(diffs, dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false), 'diff_main: Large equality.'); + + dmp.Diff_Timeout = 0.1; // 100ms + String a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n'; + String b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n'; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = '$a$a'; + b = '$b$b'; + } + Date startTime = new Date.now(); + dmp.diff_main(a, b); + Date endTime = new Date.now(); + double elapsedSeconds = endTime.difference(startTime).inMilliseconds / 1000; + // Test that we took at least the timeout period. + Expect.isTrue(dmp.Diff_Timeout <= elapsedSeconds, 'diff_main: Timeout min.'); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // ************* + // Dart Note: Currently (2011) Dart's performance is out of control, so this + // diff takes 3.5 seconds on a 0.1 second timeout. Commented out. + // ************* + // Expect.isTrue(dmp.Diff_Timeout * 2 > elapsedSeconds, 'diff_main: Timeout max.'); + dmp.Diff_Timeout = 0.0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; + b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n'; + Expect.listEquals(dmp.diff_main(a, b, true), dmp.diff_main(a, b, false), 'diff_main: Simple line-mode.'); + + a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'; + b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij'; + Expect.listEquals(dmp.diff_main(a, b, true), dmp.diff_main(a, b, false), 'diff_main: Single line-mode.'); + + a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; + b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n'; + List texts_linemode = _diff_rebuildtexts(dmp.diff_main(a, b, true)); + List texts_textmode = _diff_rebuildtexts(dmp.diff_main(a, b, false)); + Expect.listEquals(texts_textmode, texts_linemode, 'diff_main: Overlap line-mode.'); + + // Test null inputs. + Expect.throws(() => dmp.diff_main(null, null), null, 'diff_main: Null inputs.'); +} + + +// MATCH TEST FUNCTIONS + +void testMatchAlphabet() { + void assertMapEquals(Map a, Map b, String error_msg) { + Expect.setEquals(a.keys, b.keys, error_msg); + for (var x in a.keys) { + Expect.equals(a[x], b[x], "$error_msg [Key: $x]"); + } + } + + // Initialise the bitmasks for Bitap. + Map bitmask = {'a': 4, 'b': 2, 'c': 1}; + assertMapEquals(bitmask, dmp._match_alphabet('abc'), 'match_alphabet: Unique.'); + + bitmask = {'a': 37, 'b': 18, 'c': 8}; + assertMapEquals(bitmask, dmp._match_alphabet('abcaba'), 'match_alphabet: Duplicates.'); +} + +void testMatchBitap() { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5; + Expect.equals(5, dmp._match_bitap('abcdefghijk', 'fgh', 5), 'match_bitap: Exact match #1.'); + + Expect.equals(5, dmp._match_bitap('abcdefghijk', 'fgh', 0), 'match_bitap: Exact match #2.'); + + Expect.equals(4, dmp._match_bitap('abcdefghijk', 'efxhi', 0), 'match_bitap: Fuzzy match #1.'); + + Expect.equals(2, dmp._match_bitap('abcdefghijk', 'cdefxyhijk', 5), 'match_bitap: Fuzzy match #2.'); + + Expect.equals(-1, dmp._match_bitap('abcdefghijk', 'bxy', 1), 'match_bitap: Fuzzy match #3.'); + + Expect.equals(2, dmp._match_bitap('123456789xx0', '3456789x0', 2), 'match_bitap: Overflow.'); + + Expect.equals(0, dmp._match_bitap('abcdef', 'xxabc', 4), 'match_bitap: Before start match.'); + + Expect.equals(3, dmp._match_bitap('abcdef', 'defyy', 4), 'match_bitap: Beyond end match.'); + + Expect.equals(0, dmp._match_bitap('abcdef', 'xabcdefy', 0), 'match_bitap: Oversized pattern.'); + + dmp.Match_Threshold = 0.4; + Expect.equals(4, dmp._match_bitap('abcdefghijk', 'efxyhi', 1), 'match_bitap: Threshold #1.'); + + dmp.Match_Threshold = 0.3; + Expect.equals(-1, dmp._match_bitap('abcdefghijk', 'efxyhi', 1), 'match_bitap: Threshold #2.'); + + dmp.Match_Threshold = 0.0; + Expect.equals(1, dmp._match_bitap('abcdefghijk', 'bcdef', 1), 'match_bitap: Threshold #3.'); + + dmp.Match_Threshold = 0.5; + Expect.equals(0, dmp._match_bitap('abcdexyzabcde', 'abccde', 3), 'match_bitap: Multiple select #1.'); + + Expect.equals(8, dmp._match_bitap('abcdexyzabcde', 'abccde', 5), 'match_bitap: Multiple select #2.'); + + dmp.Match_Distance = 10; // Strict location. + Expect.equals(-1, dmp._match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24), 'match_bitap: Distance test #1.'); + + Expect.equals(0, dmp._match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdxxefg', 1), 'match_bitap: Distance test #2.'); + + dmp.Match_Distance = 1000; // Loose location. + Expect.equals(0, dmp._match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24), 'match_bitap: Distance test #3.'); +} + +void testMatchMain() { + // Full match. + Expect.equals(0, dmp.match_main('abcdef', 'abcdef', 1000), 'match_main: Equality.'); + + Expect.equals(-1, dmp.match_main('', 'abcdef', 1), 'match_main: Null text.'); + + Expect.equals(3, dmp.match_main('abcdef', '', 3), 'match_main: Null pattern.'); + + Expect.equals(3, dmp.match_main('abcdef', 'de', 3), 'match_main: Exact match.'); + + Expect.equals(3, dmp.match_main('abcdef', 'defy', 4), 'match_main: Beyond end match.'); + + Expect.equals(0, dmp.match_main('abcdef', 'abcdefy', 0), 'match_main: Oversized pattern.'); + + dmp.Match_Threshold = 0.7; + Expect.equals(4, dmp.match_main('I am the very model of a modern major general.', ' that berry ', 5), 'match_main: Complex match.'); + dmp.Match_Threshold = 0.5; + + // Test null inputs. + Expect.throws(() => dmp.match_main(null, null, 0), null, 'match_main: Null inputs.'); +} + + +// PATCH TEST FUNCTIONS + + +void testPatchObj() { + // Patch Object. + Patch p = new Patch(); + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = [new Diff(DIFF_EQUAL, 'jump'), new Diff(DIFF_DELETE, 's'), new Diff(DIFF_INSERT, 'ed'), new Diff(DIFF_EQUAL, ' over '), new Diff(DIFF_DELETE, 'the'), new Diff(DIFF_INSERT, 'a'), new Diff(DIFF_EQUAL, '\nlaz')]; + String strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n'; + Expect.equals(strp, p.toString(), 'Patch: toString.'); +} + +void testPatchFromText() { + Expect.isTrue(dmp.patch_fromText('').isEmpty, 'patch_fromText: #0.'); + + String strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n'; + Expect.equals(strp, dmp.patch_fromText(strp)[0].toString(), 'patch_fromText: #1.'); + + Expect.equals('@@ -1 +1 @@\n-a\n+b\n', dmp.patch_fromText('@@ -1 +1 @@\n-a\n+b\n')[0].toString(), 'patch_fromText: #2.'); + + Expect.equals('@@ -1,3 +0,0 @@\n-abc\n', dmp.patch_fromText('@@ -1,3 +0,0 @@\n-abc\n')[0].toString(), 'patch_fromText: #3.'); + + Expect.equals('@@ -0,0 +1,3 @@\n+abc\n', dmp.patch_fromText('@@ -0,0 +1,3 @@\n+abc\n')[0].toString(), 'patch_fromText: #4.'); + + // Generates error. + Expect.throws(() => dmp.patch_fromText('Bad\nPatch\n'), null, 'patch_fromText: #5.'); +} + +void testPatchToText() { + String strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n'; + List patches; + patches = dmp.patch_fromText(strp); + Expect.equals(strp, dmp.patch_toText(patches), 'patch_toText: Single.'); + + strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n'; + patches = dmp.patch_fromText(strp); + Expect.equals(strp, dmp.patch_toText(patches), 'patch_toText: Dual.'); +} + +void testPatchAddContext() { + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[0]; + dmp._patch_addContext(p, 'The quick brown fox jumps over the lazy dog.'); + Expect.equals('@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n', p.toString(), 'patch_addContext: Simple case.'); + + p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[0]; + dmp._patch_addContext(p, 'The quick brown fox jumps.'); + Expect.equals('@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n', p.toString(), 'patch_addContext: Not enough trailing context.'); + + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[0]; + dmp._patch_addContext(p, 'The quick brown fox jumps.'); + Expect.equals('@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n', p.toString(), 'patch_addContext: Not enough leading context.'); + + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[0]; + dmp._patch_addContext(p, 'The quick brown fox jumps. The quick brown fox crashes.'); + Expect.equals('@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n', p.toString(), 'patch_addContext: Ambiguity.'); +} + +void testPatchMake() { + List patches; + patches = dmp.patch_make('', ''); + Expect.equals('', dmp.patch_toText(patches), 'patch_make: Null case.'); + + String text1 = 'The quick brown fox jumps over the lazy dog.'; + String text2 = 'That quick brown fox jumped over a lazy dog.'; + String expectedPatch = '@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n'; + // The second patch must be '-21,17 +21,18', not '-22,17 +21,18' due to rolling context. + patches = dmp.patch_make(text2, text1); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Text2+Text1 inputs.'); + + expectedPatch = '@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n'; + patches = dmp.patch_make(text1, text2); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Text1+Text2 inputs.'); + + List diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Diff input.'); + + patches = dmp.patch_make(text1, diffs); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Text1+Diff inputs.'); + + patches = dmp.patch_make(text1, text2, diffs); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Text1+Text2+Diff inputs (deprecated).'); + + patches = dmp.patch_make('`1234567890-=[]\\;\',./', '~!@#\$%^&*()_+{}|:"<>?'); + Expect.equals('@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!@#\$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n', dmp.patch_toText(patches), 'patch_toText: Character encoding.'); + + diffs = [new Diff(DIFF_DELETE, '`1234567890-=[]\\;\',./'), new Diff(DIFF_INSERT, '~!@#\$%^&*()_+{}|:"<>?')]; + Expect.listEquals(diffs, dmp.patch_fromText('@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!@#\$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n')[0].diffs, 'patch_fromText: Character decoding.'); + + final sb = new StringBuffer(); + for (int x = 0; x < 100; x++) { + sb.add('abcdef'); + } + text1 = sb.toString(); + text2 = '${text1}123'; + expectedPatch = '@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n'; + patches = dmp.patch_make(text1, text2); + Expect.equals(expectedPatch, dmp.patch_toText(patches), 'patch_make: Long string with repeats.'); + + // Test null inputs. + Expect.throws(() => dmp.patch_make(null), null, 'patch_make: Null inputs.'); +} + +void testPatchSplitMax() { + // Assumes that Match_MaxBits is 32. + List patches; + patches = dmp.patch_make('abcdefghijklmnopqrstuvwxyz01234567890', 'XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0'); + dmp.patch_splitMax(patches); + Expect.equals('@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n', dmp.patch_toText(patches), 'patch_splitMax: #1.'); + + patches = dmp.patch_make('abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz', 'abcdefuvwxyz'); + String oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + Expect.equals(oldToText, dmp.patch_toText(patches), 'patch_splitMax: #2.'); + + patches = dmp.patch_make('1234567890123456789012345678901234567890123456789012345678901234567890', 'abc'); + dmp.patch_splitMax(patches); + Expect.equals('@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n', dmp.patch_toText(patches), 'patch_splitMax: #3.'); + + patches = dmp.patch_make('abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1', 'abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1'); + dmp.patch_splitMax(patches); + Expect.equals('@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n', dmp.patch_toText(patches), 'patch_splitMax: #4.'); +} + +void testPatchAddPadding() { + List patches; + patches = dmp.patch_make('', 'test'); + Expect.equals('@@ -0,0 +1,4 @@\n+test\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges full.'); + dmp.patch_addPadding(patches); + Expect.equals('@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges full.'); + + patches = dmp.patch_make('XY', 'XtestY'); + Expect.equals('@@ -1,2 +1,6 @@\n X\n+test\n Y\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges partial.'); + dmp.patch_addPadding(patches); + Expect.equals('@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges partial.'); + + patches = dmp.patch_make('XXXXYYYY', 'XXXXtestYYYY'); + Expect.equals('@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges none.'); + dmp.patch_addPadding(patches); + Expect.equals('@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches), 'patch_addPadding: Both edges none.'); +} + +void testPatchApply() { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5; + dmp.Patch_DeleteThreshold = 0.5; + List patches; + patches = dmp.patch_make('', ''); + List results = dmp.patch_apply(patches, 'Hello world.'); + List boolArray = results[1]; + String resultStr = '${results[0]}\t${boolArray.length}'; + Expect.equals('Hello world.\t0', resultStr, 'patch_apply: Null case.'); + + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', 'That quick brown fox jumped over a lazy dog.'); + results = dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('That quick brown fox jumped over a lazy dog.\ttrue\ttrue', resultStr, 'patch_apply: Exact match.'); + + results = dmp.patch_apply(patches, 'The quick red rabbit jumps over the tired tiger.'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('That quick red rabbit jumped over a tired tiger.\ttrue\ttrue', resultStr, 'patch_apply: Partial match.'); + + results = dmp.patch_apply(patches, 'I am the very model of a modern major general.'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('I am the very model of a modern major general.\tfalse\tfalse', resultStr, 'patch_apply: Failed match.'); + + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('xabcy\ttrue\ttrue', resultStr, 'patch_apply: Big delete, small change.'); + + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x12345678901234567890---------------++++++++++---------------12345678901234567890y'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue', resultStr, 'patch_apply: Big delete, big change 1.'); + + dmp.Patch_DeleteThreshold = 0.6; + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x12345678901234567890---------------++++++++++---------------12345678901234567890y'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('xabcy\ttrue\ttrue', resultStr, 'patch_apply: Big delete, big change 2.'); + dmp.Patch_DeleteThreshold = 0.5; + + // Compensate for failed patch. + dmp.Match_Threshold = 0.0; + dmp.Match_Distance = 0; + patches = dmp.patch_make('abcdefghijklmnopqrstuvwxyz--------------------1234567890', 'abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890'); + results = dmp.patch_apply(patches, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}\t${boolArray[1]}'; + Expect.equals('ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue', resultStr, 'patch_apply: Compensate for failed patch.'); + dmp.Match_Threshold = 0.5; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make('', 'test'); + String patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ''); + Expect.equals(patchStr, dmp.patch_toText(patches), 'patch_apply: No side effects.'); + + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', 'Woof'); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.'); + Expect.equals(patchStr, dmp.patch_toText(patches), 'patch_apply: No side effects with major delete.'); + + patches = dmp.patch_make('', 'test'); + results = dmp.patch_apply(patches, ''); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}'; + Expect.equals('test\ttrue', resultStr, 'patch_apply: Edge exact match.'); + + patches = dmp.patch_make('XY', 'XtestY'); + results = dmp.patch_apply(patches, 'XY'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}'; + Expect.equals('XtestY\ttrue', resultStr, 'patch_apply: Near edge exact match.'); + + patches = dmp.patch_make('y', 'y123'); + results = dmp.patch_apply(patches, 'x'); + boolArray = results[1]; + resultStr = '${results[0]}\t${boolArray[0]}'; + Expect.equals('x123\ttrue', resultStr, 'patch_apply: Edge partial match.'); +} + +// Run each test. +// TODO: Use the Dart unit test framework (once it is published). +main() { + dmp = new DiffMatchPatch(); + + testDiffCommonPrefix(); + testDiffCommonSuffix(); + testDiffCommonOverlap(); + testDiffHalfmatch(); + testDiffLinesToChars(); + testDiffCharsToLines(); + testDiffCleanupMerge(); + testDiffCleanupSemanticLossless(); + testDiffCleanupSemantic(); + testDiffCleanupEfficiency(); + testDiffPrettyHtml(); + testDiffText(); + testDiffDelta(); + testDiffXIndex(); + testDiffLevenshtein(); + testDiffBisect(); + testDiffMain(); + + testMatchAlphabet(); + testMatchBitap(); + testMatchMain(); + + testPatchObj(); + testPatchFromText(); + testPatchToText(); + testPatchAddContext(); + testPatchMake(); + testPatchSplitMax(); + testPatchAddPadding(); + testPatchApply(); + + print('All tests passed.'); +} diff --git a/dart/PatchClass.dart b/dart/PatchClass.dart new file mode 100644 index 0000000..9b9d404 --- /dev/null +++ b/dart/PatchClass.dart @@ -0,0 +1,78 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +part of DiffMatchPatch; + +/** + * Class representing one patch operation. + */ +class Patch { + List diffs; + int start1; + int start2; + int length1 = 0; + int length2 = 0; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch() { + this.diffs = []; + } + + /** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * Returns the GNU diff string. + */ + String toString() { + String coords1, coords2; + if (this.length1 == 0) { + coords1 = '${this.start1},0'; + } else if (this.length1 == 1) { + coords1 = (this.start1 + 1).toString(); + } else { + coords1 = '${this.start1 + 1},${this.length1}'; + } + if (this.length2 == 0) { + coords2 = '${this.start2},0'; + } else if (this.length2 == 1) { + coords2 = (this.start2 + 1).toString(); + } else { + coords2 = '${this.start2 + 1},${this.length2}'; + } + final text = new StringBuffer('@@ -$coords1 +$coords2 @@\n'); + // Escape the body of the patch with %xx notation. + for (Diff aDiff in this.diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + text.add('+'); + break; + case DIFF_DELETE: + text.add('-'); + break; + case DIFF_EQUAL: + text.add(' '); + break; + } + text.add(encodeUri(aDiff.text)).add('\n'); + } + return text.toString().replaceAll('%20', ' '); + } +} diff --git a/dart/README.txt b/dart/README.txt new file mode 100644 index 0000000..2f05cc1 --- /dev/null +++ b/dart/README.txt @@ -0,0 +1,13 @@ +Warning: + +The Dart language is currently (as of December 2011) a Technology Preview. +Until Dart has its first official release, this port of Diff Match Patch may +change from version to version to take advantage of new Dart features. + +Therefore if you use this Diff Match Patch library there may be API changes +between versions and you may need to make minor updates to your code. + +For example, if Dart adds enums, then DIFF_INSERT/DIFF_DELETE/DIFF_EQUAL will +become an enum. + +-- Neil \ No newline at end of file diff --git a/demos/demo_diff.html b/demos/demo_diff.html new file mode 100644 index 0000000..97a11ce --- /dev/null +++ b/demos/demo_diff.html @@ -0,0 +1,87 @@ + + + + Diff, Match and Patch: Demo of Diff + + + + +

Diff, Match and Patch

+

Demo of Diff

+ +

Diff takes two texts and finds the differences. This implementation works on a character by character basis. +The result of any diff may contain 'chaff', irrelevant small commonalities which complicate the output. +A post-diff cleanup algorithm factors out these trivial commonalities.

+ + + +
+ + + +
+

Text Version 1:

+
+

Text Version 2:

+
+ +

Diff timeout:

+

seconds
+If the mapping phase of the diff computation takes longer than this, then the computation +is truncated and the best solution to date is returned. While guaranteed to be correct, +it may not be optimal. A timeout of '0' allows for unlimited computation.

+ +

Post-diff cleanup:

+
+
+
+
Increase human readability by factoring out commonalities which are likely to be +coincidental.
+
+, +edit cost: +
Increase computational efficiency by factoring out short commonalities which are +not worth the overhead. The larger the edit cost, the more agressive the cleanup.
+
+
+
Raw output.
+
+ +

+
+ +
+ +
+Back to Diff, Match and Patch + + + diff --git a/demos/demo_match.html b/demos/demo_match.html new file mode 100644 index 0000000..249467a --- /dev/null +++ b/demos/demo_match.html @@ -0,0 +1,92 @@ + + + + Diff, Match and Patch: Demo of Match + + + + +

Diff, Match and Patch

+

Demo of Match

+ +

Match looks for a pattern within a larger text. +This implementation of match is fuzzy, meaning it can find a match even if the +pattern contains errors and doesn't exactly match what is found in the text. +This implementation also accepts an expected location, near which the match should be found. +The candidate matches are scored based on a) the number of spelling differences between the +pattern and the text and b) the distance between the candidate match and the expected location. +The match distance parameter sets the relative importance of these two metrics.

+ +
+

Text:

+ + +

Fuzzy pattern:

+


+Aproximate pattern to search for in the text. Due to limitations of the Bitap algorithm, the pattern has a limited length.

+ +

Fuzzy location:

+


+Aproximately where in the text is the pattern expected to be found?

+ +

Match distance:

+


+Determines how close the match must be to the fuzzy location (specified above). An exact letter match which is 'distance' characters away from the fuzzy location would +score as a complete mismatch. A distance of '0' requires the match be at the exact location specified, a threshold of '1000' +would require a perfect match to be within 800 characters of the fuzzy location to be found using a 0.8 threshold.

+ +

Match threshold:

+


+At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match (of both letters and location), a threshold of '1.0' would match anything.

+ + +
+ +
+ +
+ + + +
+Back to Diff, Match and Patch + + + diff --git a/demos/demo_patch.html b/demos/demo_patch.html new file mode 100644 index 0000000..845808a --- /dev/null +++ b/demos/demo_patch.html @@ -0,0 +1,121 @@ + + + + Diff, Match and Patch: Demo of Patch + + + + +

Diff, Match and Patch

+

Demo of Patch

+ +

Two texts can be diffed against each other, generating a list of patches. +These patches can then be applied against a third text. If the third text has edits of its own, this version of patch +will apply its changes on a best-effort basis, reporting which patches succeeded and which failed.

+ +

In this scenario Shakespeare wrote Hamlet in Early Modern English, the source document. Then two derivative +works were created. One is Hamlet updated to Modern English. The other is a Star Trek parody in Early Modern English. +This demonstrantion creates a list of patches between the source and the Modern English version. Then it +applies those patches onto the Star Trek parody, thus creating a Star Trek parody in +Modern English.

+ + + +
+

Shakespeare's copy:

+ + + +
Old Version:
New Version:
+

+
+ +
+ +

Trekkie's copy:

+ + + +
Old Version:
New Version:
+ +
+ +
    +
    + +
    +Back to Diff, Match and Patch + + + + diff --git a/java/name/fraser/neil/plaintext/diff_match_patch.java b/java/name/fraser/neil/plaintext/diff_match_patch.java new file mode 100644 index 0000000..f4ea2a4 --- /dev/null +++ b/java/name/fraser/neil/plaintext/diff_match_patch.java @@ -0,0 +1,2470 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package name.fraser.neil.plaintext; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.net.URLDecoder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Stack; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + */ + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ +public class diff_match_patch { + + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + /** + * Number of seconds to map a diff before giving up (0 for infinity). + */ + public float Diff_Timeout = 1.0f; + /** + * Cost of an empty edit operation in terms of edit characters. + */ + public short Diff_EditCost = 4; + /** + * At what point is no match declared (0.0 = perfection, 1.0 = very loose). + */ + public float Match_Threshold = 0.5f; + /** + * How far to search for a match (0 = exact location, 1000+ = broad match). + * A match this many characters away from the expected location will add + * 1.0 to the score (0.0 is a perfect match). + */ + public int Match_Distance = 1000; + /** + * When deleting a large block of text (over ~64 characters), how close do + * the contents have to be to match the expected contents. (0.0 = perfection, + * 1.0 = very loose). Note that Match_Threshold controls how closely the + * end points of a delete need to match. + */ + public float Patch_DeleteThreshold = 0.5f; + /** + * Chunk size for context length. + */ + public short Patch_Margin = 4; + + /** + * The number of bits in an int. + */ + private short Match_MaxBits = 32; + + /** + * Internal class for returning results from diff_linesToChars(). + * Other less paranoid languages just use a three-element array. + */ + protected static class LinesToCharsResult { + protected String chars1; + protected String chars2; + protected List lineArray; + + protected LinesToCharsResult(String chars1, String chars2, + List lineArray) { + this.chars1 = chars1; + this.chars2 = chars2; + this.lineArray = lineArray; + } + } + + + // DIFF FUNCTIONS + + + /** + * The data structure representing a diff is a Linked list of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ + public enum Operation { + DELETE, INSERT, EQUAL + } + + /** + * Find the differences between two texts. + * Run a faster, slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + public LinkedList diff_main(String text1, String text2) { + return diff_main(text1, text2, true); + } + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + public LinkedList diff_main(String text1, String text2, + boolean checklines) { + // Set a deadline by which time the diff must be complete. + long deadline; + if (Diff_Timeout <= 0) { + deadline = Long.MAX_VALUE; + } else { + deadline = System.currentTimeMillis() + (long) (Diff_Timeout * 1000); + } + return diff_main(text1, text2, checklines, deadline); + } + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ + private LinkedList diff_main(String text1, String text2, + boolean checklines, long deadline) { + // Check for null inputs. + if (text1 == null || text2 == null) { + throw new IllegalArgumentException("Null inputs. (diff_main)"); + } + + // Check for equality (speedup). + LinkedList diffs; + if (text1.equals(text2)) { + diffs = new LinkedList(); + if (text1.length() != 0) { + diffs.add(new Diff(Operation.EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + String commonprefix = text1.substring(0, commonlength); + text1 = text1.substring(commonlength); + text2 = text2.substring(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(text1, text2); + String commonsuffix = text1.substring(text1.length() - commonlength); + text1 = text1.substring(0, text1.length() - commonlength); + text2 = text2.substring(0, text2.length() - commonlength); + + // Compute the diff on the middle block. + diffs = diff_compute(text1, text2, checklines, deadline); + + // Restore the prefix and suffix. + if (commonprefix.length() != 0) { + diffs.addFirst(new Diff(Operation.EQUAL, commonprefix)); + } + if (commonsuffix.length() != 0) { + diffs.addLast(new Diff(Operation.EQUAL, commonsuffix)); + } + + diff_cleanupMerge(diffs); + return diffs; + } + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private LinkedList diff_compute(String text1, String text2, + boolean checklines, long deadline) { + LinkedList diffs = new LinkedList(); + + if (text1.length() == 0) { + // Just add some text (speedup). + diffs.add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + if (text2.length() == 0) { + // Just delete some text (speedup). + diffs.add(new Diff(Operation.DELETE, text1)); + return diffs; + } + + String longtext = text1.length() > text2.length() ? text1 : text2; + String shorttext = text1.length() > text2.length() ? text2 : text1; + int i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + Operation op = (text1.length() > text2.length()) ? + Operation.DELETE : Operation.INSERT; + diffs.add(new Diff(op, longtext.substring(0, i))); + diffs.add(new Diff(Operation.EQUAL, shorttext)); + diffs.add(new Diff(op, longtext.substring(i + shorttext.length()))); + return diffs; + } + + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.add(new Diff(Operation.DELETE, text1)); + diffs.add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + // Check to see if the problem can be split in two. + String[] hm = diff_halfMatch(text1, text2); + if (hm != null) { + // A half-match was found, sort out the return data. + String text1_a = hm[0]; + String text1_b = hm[1]; + String text2_a = hm[2]; + String text2_b = hm[3]; + String mid_common = hm[4]; + // Send both pairs off for separate processing. + LinkedList diffs_a = diff_main(text1_a, text2_a, + checklines, deadline); + LinkedList diffs_b = diff_main(text1_b, text2_b, + checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.add(new Diff(Operation.EQUAL, mid_common)); + diffs.addAll(diffs_b); + return diffs; + } + + if (checklines && text1.length() > 100 && text2.length() > 100) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); + } + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private LinkedList diff_lineMode(String text1, String text2, + long deadline) { + // Scan the text on a line-by-line basis first. + LinesToCharsResult b = diff_linesToChars(text1, text2); + text1 = b.chars1; + text2 = b.chars2; + List linearray = b.lineArray; + + LinkedList diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.add(new Diff(Operation.EQUAL, "")); + int count_delete = 0; + int count_insert = 0; + String text_delete = ""; + String text_insert = ""; + ListIterator pointer = diffs.listIterator(); + Diff thisDiff = pointer.next(); + while (thisDiff != null) { + switch (thisDiff.operation) { + case INSERT: + count_insert++; + text_insert += thisDiff.text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff.text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + pointer.previous(); + for (int j = 0; j < count_delete + count_insert; j++) { + pointer.previous(); + pointer.remove(); + } + for (Diff newDiff : diff_main(text_delete, text_insert, false, + deadline)) { + pointer.add(newDiff); + } + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + break; + } + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + diffs.removeLast(); // Remove the dummy entry at the end. + + return diffs; + } + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + protected LinkedList diff_bisect(String text1, String text2, + long deadline) { + // Cache the text lengths to prevent multiple calls. + int text1_length = text1.length(); + int text2_length = text2.length(); + int max_d = (text1_length + text2_length + 1) / 2; + int v_offset = max_d; + int v_length = 2 * max_d; + int[] v1 = new int[v_length]; + int[] v2 = new int[v_length]; + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + boolean front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (System.currentTimeMillis() > deadline) { + break; + } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1.charAt(x1) == text2.charAt(y1)) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1.charAt(text1_length - x2 - 1) + == text2.charAt(text2_length - y2 - 1)) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + LinkedList diffs = new LinkedList(); + diffs.add(new Diff(Operation.DELETE, text1)); + diffs.add(new Diff(Operation.INSERT, text2)); + return diffs; + } + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private LinkedList diff_bisectSplit(String text1, String text2, + int x, int y, long deadline) { + String text1a = text1.substring(0, x); + String text2a = text2.substring(0, y); + String text1b = text1.substring(x); + String text2b = text2.substring(y); + + // Compute both diffs serially. + LinkedList diffs = diff_main(text1a, text2a, false, deadline); + LinkedList diffsb = diff_main(text1b, text2b, false, deadline); + + diffs.addAll(diffsb); + return diffs; + } + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return An object containing the encoded text1, the encoded text2 and + * the List of unique strings. The zeroth element of the List of + * unique strings is intentionally blank. + */ + protected LinesToCharsResult diff_linesToChars(String text1, String text2) { + List lineArray = new ArrayList(); + Map lineHash = new HashMap(); + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.add(""); + + String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); + String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); + return new LinesToCharsResult(chars1, chars2, lineArray); + } + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private String diff_linesToCharsMunge(String text, List lineArray, + Map lineHash) { + int lineStart = 0; + int lineEnd = -1; + String line; + StringBuilder chars = new StringBuilder(); + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.length() - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = text.substring(lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (lineHash.containsKey(line)) { + chars.append(String.valueOf((char) (int) lineHash.get(line))); + } else { + lineArray.add(line); + lineHash.put(line, lineArray.size() - 1); + chars.append(String.valueOf((char) (lineArray.size() - 1))); + } + } + return chars.toString(); + } + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ + protected void diff_charsToLines(LinkedList diffs, + List lineArray) { + StringBuilder text; + for (Diff diff : diffs) { + text = new StringBuilder(); + for (int y = 0; y < diff.text.length(); y++) { + text.append(lineArray.get(diff.text.charAt(y))); + } + diff.text = text.toString(); + } + } + + /** + * Determine the common prefix of two strings + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public int diff_commonPrefix(String text1, String text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + int n = Math.min(text1.length(), text2.length()); + for (int i = 0; i < n; i++) { + if (text1.charAt(i) != text2.charAt(i)) { + return i; + } + } + return n; + } + + /** + * Determine the common suffix of two strings + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public int diff_commonSuffix(String text1, String text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + int text1_length = text1.length(); + int text2_length = text2.length(); + int n = Math.min(text1_length, text2_length); + for (int i = 1; i <= n; i++) { + if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) { + return i - 1; + } + } + return n; + } + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected int diff_commonOverlap(String text1, String text2) { + // Cache the text lengths to prevent multiple calls. + int text1_length = text1.length(); + int text2_length = text2.length(); + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + if (text1_length > text2_length) { + text1 = text1.substring(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2 = text2.substring(0, text1_length); + } + int text_length = Math.min(text1_length, text2_length); + // Quick check for the worst case. + if (text1.equals(text2)) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + String pattern = text1.substring(text_length - length); + int found = text2.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1.substring(text_length - length).equals( + text2.substring(0, length))) { + best = length; + length++; + } + } + } + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + protected String[] diff_halfMatch(String text1, String text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return null; + } + String longtext = text1.length() > text2.length() ? text1 : text2; + String shorttext = text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return null; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + String[] hm1 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 3) / 4); + // Check again based on the third quarter. + String[] hm2 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 1) / 2); + String[] hm; + if (hm1 == null && hm2 == null) { + return null; + } else if (hm2 == null) { + hm = hm1; + } else if (hm1 == null) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + //return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; + } else { + return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]}; + } + } + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private String[] diff_halfMatchI(String longtext, String shorttext, int i) { + // Start with a 1/4 length substring at position i as a seed. + String seed = longtext.substring(i, i + longtext.length() / 4); + int j = -1; + String best_common = ""; + String best_longtext_a = "", best_longtext_b = ""; + String best_shorttext_a = "", best_shorttext_b = ""; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + int prefixLength = diff_commonPrefix(longtext.substring(i), + shorttext.substring(j)); + int suffixLength = diff_commonSuffix(longtext.substring(0, i), + shorttext.substring(0, j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = shorttext.substring(j - suffixLength, j) + + shorttext.substring(j, j + prefixLength); + best_longtext_a = longtext.substring(0, i - suffixLength); + best_longtext_b = longtext.substring(i + prefixLength); + best_shorttext_a = shorttext.substring(0, j - suffixLength); + best_shorttext_b = shorttext.substring(j + prefixLength); + } + } + if (best_common.length() * 2 >= longtext.length()) { + return new String[]{best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common}; + } else { + return null; + } + } + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public void diff_cleanupSemantic(LinkedList diffs) { + if (diffs.isEmpty()) { + return; + } + boolean changes = false; + Stack equalities = new Stack(); // Stack of qualities. + String lastequality = null; // Always equal to equalities.lastElement().text + ListIterator pointer = diffs.listIterator(); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + Diff thisDiff = pointer.next(); + while (thisDiff != null) { + if (thisDiff.operation == Operation.EQUAL) { + // Equality found. + equalities.push(thisDiff); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff.text; + } else { + // An insertion or deletion. + if (thisDiff.operation == Operation.INSERT) { + length_insertions2 += thisDiff.text.length(); + } else { + length_deletions2 += thisDiff.text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality != null && (lastequality.length() + <= Math.max(length_insertions1, length_deletions1)) + && (lastequality.length() + <= Math.max(length_insertions2, length_deletions2))) { + //System.out.println("Splitting: '" + lastequality + "'"); + // Walk back to offending equality. + while (thisDiff != equalities.lastElement()) { + thisDiff = pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.set(new Diff(Operation.DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.add(new Diff(Operation.INSERT, lastequality)); + + equalities.pop(); // Throw away the equality we just deleted. + if (!equalities.empty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.empty()) { + // There are no previous equalities, walk back to the start. + while (pointer.hasPrevious()) { + pointer.previous(); + } + } else { + // There is a safe equality we can fall back to. + thisDiff = equalities.lastElement(); + while (thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_insertions2 = 0; + length_deletions1 = 0; + length_deletions2 = 0; + lastequality = null; + changes = true; + } + } + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = diffs.listIterator(); + Diff prevDiff = null; + thisDiff = null; + if (pointer.hasNext()) { + prevDiff = pointer.next(); + if (pointer.hasNext()) { + thisDiff = pointer.next(); + } + } + while (thisDiff != null) { + if (prevDiff.operation == Operation.DELETE && + thisDiff.operation == Operation.INSERT) { + String deletion = prevDiff.text; + String insertion = thisDiff.text; + int overlap_length1 = this.diff_commonOverlap(deletion, insertion); + int overlap_length2 = this.diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer.previous(); + pointer.add(new Diff(Operation.EQUAL, + insertion.substring(0, overlap_length1))); + prevDiff.text = + deletion.substring(0, deletion.length() - overlap_length1); + thisDiff.text = insertion.substring(overlap_length1); + // pointer.add inserts the element before the cursor, so there is + // no need to step past the new element. + } + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer.previous(); + pointer.add(new Diff(Operation.EQUAL, + deletion.substring(0, overlap_length2))); + prevDiff.operation = Operation.INSERT; + prevDiff.text = + insertion.substring(0, insertion.length() - overlap_length2); + thisDiff.operation = Operation.DELETE; + thisDiff.text = deletion.substring(overlap_length2); + // pointer.add inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + prevDiff = thisDiff; + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + } + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ + public void diff_cleanupSemanticLossless(LinkedList diffs) { + String equality1, edit, equality2; + String commonString; + int commonOffset; + int score, bestScore; + String bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + ListIterator pointer = diffs.listIterator(); + Diff prevDiff = pointer.hasNext() ? pointer.next() : null; + Diff thisDiff = pointer.hasNext() ? pointer.next() : null; + Diff nextDiff = pointer.hasNext() ? pointer.next() : null; + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != null) { + if (prevDiff.operation == Operation.EQUAL && + nextDiff.operation == Operation.EQUAL) { + // This is a single edit surrounded by equalities. + equality1 = prevDiff.text; + edit = thisDiff.text; + equality2 = nextDiff.text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset != 0) { + commonString = edit.substring(edit.length() - commonOffset); + equality1 = equality1.substring(0, equality1.length() - commonOffset); + edit = commonString + edit.substring(0, edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (edit.length() != 0 && equality2.length() != 0 + && edit.charAt(0) == equality2.charAt(0)) { + equality1 += edit.charAt(0); + edit = edit.substring(1) + equality2.charAt(0); + equality2 = equality2.substring(1); + score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (!prevDiff.text.equals(bestEquality1)) { + // We have an improvement, save it back to the diff. + if (bestEquality1.length() != 0) { + prevDiff.text = bestEquality1; + } else { + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + pointer.next(); // Walk past nextDiff. + } + thisDiff.text = bestEdit; + if (bestEquality2.length() != 0) { + nextDiff.text = bestEquality2; + } else { + pointer.remove(); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? pointer.next() : null; + } + } + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private int diff_cleanupSemanticScore(String one, String two) { + if (one.length() == 0 || two.length() == 0) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + char char1 = one.charAt(one.length() - 1); + char char2 = two.charAt(0); + boolean nonAlphaNumeric1 = !Character.isLetterOrDigit(char1); + boolean nonAlphaNumeric2 = !Character.isLetterOrDigit(char2); + boolean whitespace1 = nonAlphaNumeric1 && Character.isWhitespace(char1); + boolean whitespace2 = nonAlphaNumeric2 && Character.isWhitespace(char2); + boolean lineBreak1 = whitespace1 + && Character.getType(char1) == Character.CONTROL; + boolean lineBreak2 = whitespace2 + && Character.getType(char2) == Character.CONTROL; + boolean blankLine1 = lineBreak1 && BLANKLINEEND.matcher(one).find(); + boolean blankLine2 = lineBreak2 && BLANKLINESTART.matcher(two).find(); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + // Define some regex patterns for matching boundaries. + private Pattern BLANKLINEEND + = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL); + private Pattern BLANKLINESTART + = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public void diff_cleanupEfficiency(LinkedList diffs) { + if (diffs.isEmpty()) { + return; + } + boolean changes = false; + Stack equalities = new Stack(); // Stack of equalities. + String lastequality = null; // Always equal to equalities.lastElement().text + ListIterator pointer = diffs.listIterator(); + // Is there an insertion operation before the last equality. + boolean pre_ins = false; + // Is there a deletion operation before the last equality. + boolean pre_del = false; + // Is there an insertion operation after the last equality. + boolean post_ins = false; + // Is there a deletion operation after the last equality. + boolean post_del = false; + Diff thisDiff = pointer.next(); + Diff safeDiff = thisDiff; // The last Diff that is known to be unsplitable. + while (thisDiff != null) { + if (thisDiff.operation == Operation.EQUAL) { + // Equality found. + if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) { + // Candidate found. + equalities.push(thisDiff); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff.text; + } else { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = null; + safeDiff = thisDiff; + } + post_ins = post_del = false; + } else { + // An insertion or deletion. + if (thisDiff.operation == Operation.DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (lastequality != null + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length() < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { + //System.out.println("Splitting: '" + lastequality + "'"); + // Walk back to offending equality. + while (thisDiff != equalities.lastElement()) { + thisDiff = pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.set(new Diff(Operation.DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality)); + + equalities.pop(); // Throw away the equality we just deleted. + lastequality = null; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } else { + if (!equalities.empty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.empty()) { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } else { + // There is an equality we can fall back to. + thisDiff = equalities.lastElement(); + } + while (thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + post_ins = post_del = false; + } + + changes = true; + } + } + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + + if (changes) { + diff_cleanupMerge(diffs); + } + } + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ + public void diff_cleanupMerge(LinkedList diffs) { + diffs.add(new Diff(Operation.EQUAL, "")); // Add a dummy entry at the end. + ListIterator pointer = diffs.listIterator(); + int count_delete = 0; + int count_insert = 0; + String text_delete = ""; + String text_insert = ""; + Diff thisDiff = pointer.next(); + Diff prevEqual = null; + int commonlength; + while (thisDiff != null) { + switch (thisDiff.operation) { + case INSERT: + count_insert++; + text_insert += thisDiff.text; + prevEqual = null; + break; + case DELETE: + count_delete++; + text_delete += thisDiff.text; + prevEqual = null; + break; + case EQUAL: + if (count_delete + count_insert > 1) { + boolean both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + pointer.previous(); // Reverse direction. + while (count_delete-- > 0) { + pointer.previous(); + pointer.remove(); + } + while (count_insert-- > 0) { + pointer.previous(); + pointer.remove(); + } + if (both_types) { + // Factor out any common prefixies. + commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if (pointer.hasPrevious()) { + thisDiff = pointer.previous(); + assert thisDiff.operation == Operation.EQUAL + : "Previous diff should have been an equality."; + thisDiff.text += text_insert.substring(0, commonlength); + pointer.next(); + } else { + pointer.add(new Diff(Operation.EQUAL, + text_insert.substring(0, commonlength))); + } + text_insert = text_insert.substring(commonlength); + text_delete = text_delete.substring(commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + thisDiff = pointer.next(); + thisDiff.text = text_insert.substring(text_insert.length() + - commonlength) + thisDiff.text; + text_insert = text_insert.substring(0, text_insert.length() + - commonlength); + text_delete = text_delete.substring(0, text_delete.length() + - commonlength); + pointer.previous(); + } + } + // Insert the merged records. + if (text_delete.length() != 0) { + pointer.add(new Diff(Operation.DELETE, text_delete)); + } + if (text_insert.length() != 0) { + pointer.add(new Diff(Operation.INSERT, text_insert)); + } + // Step forward to the equality. + thisDiff = pointer.hasNext() ? pointer.next() : null; + } else if (prevEqual != null) { + // Merge this equality with the previous one. + prevEqual.text += thisDiff.text; + pointer.remove(); + thisDiff = pointer.previous(); + pointer.next(); // Forward direction + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + prevEqual = thisDiff; + break; + } + thisDiff = pointer.hasNext() ? pointer.next() : null; + } + if (diffs.getLast().text.length() == 0) { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + /* + * Second pass: look for single edits surrounded on both sides by equalities + * which can be shifted sideways to eliminate an equality. + * e.g: ABAC -> ABAC + */ + boolean changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer = diffs.listIterator(); + Diff prevDiff = pointer.hasNext() ? pointer.next() : null; + thisDiff = pointer.hasNext() ? pointer.next() : null; + Diff nextDiff = pointer.hasNext() ? pointer.next() : null; + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != null) { + if (prevDiff.operation == Operation.EQUAL && + nextDiff.operation == Operation.EQUAL) { + // This is a single edit surrounded by equalities. + if (thisDiff.text.endsWith(prevDiff.text)) { + // Shift the edit over the previous equality. + thisDiff.text = prevDiff.text + + thisDiff.text.substring(0, thisDiff.text.length() + - prevDiff.text.length()); + nextDiff.text = prevDiff.text + nextDiff.text; + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + thisDiff = pointer.next(); // Walk past nextDiff. + nextDiff = pointer.hasNext() ? pointer.next() : null; + changes = true; + } else if (thisDiff.text.startsWith(nextDiff.text)) { + // Shift the edit over the next equality. + prevDiff.text += nextDiff.text; + thisDiff.text = thisDiff.text.substring(nextDiff.text.length()) + + nextDiff.text; + pointer.remove(); // Delete nextDiff. + nextDiff = pointer.hasNext() ? pointer.next() : null; + changes = true; + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? pointer.next() : null; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } + } + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public int diff_xIndex(LinkedList diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff = null; + for (Diff aDiff : diffs) { + if (aDiff.operation != Operation.INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != Operation.DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff != null && lastDiff.operation == Operation.DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); + } + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ + public String diff_prettyHtml(LinkedList diffs) { + StringBuilder html = new StringBuilder(); + for (Diff aDiff : diffs) { + String text = aDiff.text.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶
    "); + switch (aDiff.operation) { + case INSERT: + html.append("").append(text) + .append(""); + break; + case DELETE: + html.append("").append(text) + .append(""); + break; + case EQUAL: + html.append("").append(text).append(""); + break; + } + } + return html.toString(); + } + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ + public String diff_text1(LinkedList diffs) { + StringBuilder text = new StringBuilder(); + for (Diff aDiff : diffs) { + if (aDiff.operation != Operation.INSERT) { + text.append(aDiff.text); + } + } + return text.toString(); + } + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ + public String diff_text2(LinkedList diffs) { + StringBuilder text = new StringBuilder(); + for (Diff aDiff : diffs) { + if (aDiff.operation != Operation.DELETE) { + text.append(aDiff.text); + } + } + return text.toString(); + } + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ + public int diff_levenshtein(LinkedList diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + for (Diff aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += Math.max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += Math.max(insertions, deletions); + return levenshtein; + } + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of Diff objects. + * @return Delta text. + */ + public String diff_toDelta(LinkedList diffs) { + StringBuilder text = new StringBuilder(); + for (Diff aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + try { + text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8") + .replace('+', ' ')).append("\t"); + } catch (UnsupportedEncodingException e) { + // Not likely on modern system. + throw new Error("This system does not support UTF-8.", e); + } + break; + case DELETE: + text.append("-").append(aDiff.text.length()).append("\t"); + break; + case EQUAL: + text.append("=").append(aDiff.text.length()).append("\t"); + break; + } + } + String delta = text.toString(); + if (delta.length() != 0) { + // Strip off trailing tab character. + delta = delta.substring(0, delta.length() - 1); + delta = unescapeForEncodeUriCompatability(delta); + } + return delta; + } + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of Diff objects or null if invalid. + * @throws IllegalArgumentException If invalid input. + */ + public LinkedList diff_fromDelta(String text1, String delta) + throws IllegalArgumentException { + LinkedList diffs = new LinkedList(); + int pointer = 0; // Cursor in text1 + String[] tokens = delta.split("\t"); + for (String token : tokens) { + if (token.length() == 0) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + String param = token.substring(1); + switch (token.charAt(0)) { + case '+': + // decode would change all "+" to " " + param = param.replace("+", "%2B"); + try { + param = URLDecoder.decode(param, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // Not likely on modern system. + throw new Error("This system does not support UTF-8.", e); + } catch (IllegalArgumentException e) { + // Malformed URI sequence. + throw new IllegalArgumentException( + "Illegal escape in diff_fromDelta: " + param, e); + } + diffs.add(new Diff(Operation.INSERT, param)); + break; + case '-': + // Fall through. + case '=': + int n; + try { + n = Integer.parseInt(param); + } catch (NumberFormatException e) { + throw new IllegalArgumentException( + "Invalid number in diff_fromDelta: " + param, e); + } + if (n < 0) { + throw new IllegalArgumentException( + "Negative number in diff_fromDelta: " + param); + } + String text; + try { + text = text1.substring(pointer, pointer += n); + } catch (StringIndexOutOfBoundsException e) { + throw new IllegalArgumentException("Delta length (" + pointer + + ") larger than source text length (" + text1.length() + + ").", e); + } + if (token.charAt(0) == '=') { + diffs.add(new Diff(Operation.EQUAL, text)); + } else { + diffs.add(new Diff(Operation.DELETE, text)); + } + break; + default: + // Anything else is an error. + throw new IllegalArgumentException( + "Invalid diff operation in diff_fromDelta: " + token.charAt(0)); + } + } + if (pointer != text1.length()) { + throw new IllegalArgumentException("Delta length (" + pointer + + ") smaller than source text length (" + text1.length() + ")."); + } + return diffs; + } + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public int match_main(String text, String pattern, int loc) { + // Check for null inputs. + if (text == null || pattern == null) { + throw new IllegalArgumentException("Null inputs. (match_main)"); + } + + loc = Math.max(0, Math.min(loc, text.length())); + if (text.equals(pattern)) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.length() == 0) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() + && text.substring(loc, loc + pattern.length()).equals(pattern)) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } + } + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected int match_bitap(String text, String pattern, int loc) { + assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits) + : "Pattern too long for this application."; + + // Initialise the alphabet. + Map s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length()); + if (best_loc != -1) { + score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + int matchmask = 1 << (pattern.length() - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + // Empty initialization added to appease Java compiler. + int[] last_rd = new int[0]; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + int start = Math.max(1, loc - bin_mid + 1); + int finish = Math.min(loc + bin_mid, text.length()) + pattern.length(); + + int[] rd = new int[finish + 2]; + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) { + // Out of range. + charMatch = 0; + } else { + charMatch = s.get(text.charAt(j - 1)); + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = Math.max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = rd; + } + return best_loc; + } + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private double match_bitapScore(int e, int x, int loc, String pattern) { + float accuracy = (float) e / pattern.length(); + int proximity = Math.abs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / (float) Match_Distance); + } + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected Map match_alphabet(String pattern) { + Map s = new HashMap(); + char[] char_pattern = pattern.toCharArray(); + for (char c : char_pattern) { + s.put(c, 0); + } + int i = 0; + for (char c : char_pattern) { + s.put(c, s.get(c) | (1 << (pattern.length() - i - 1))); + i++; + } + return s; + } + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected void patch_addContext(Patch patch, String text) { + if (text.length() == 0) { + return; + } + String pattern = text.substring(patch.start2, patch.start2 + patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) + && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = text.substring(Math.max(0, patch.start2 - padding), + Math.min(text.length(), patch.start2 + patch.length1 + padding)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + String prefix = text.substring(Math.max(0, patch.start2 - padding), + patch.start2); + if (prefix.length() != 0) { + patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix)); + } + // Add the suffix. + String suffix = text.substring(patch.start2 + patch.length1, + Math.min(text.length(), patch.start2 + patch.length1 + padding)); + if (suffix.length() != 0) { + patch.diffs.addLast(new Diff(Operation.EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); + } + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ + public LinkedList patch_make(String text1, String text2) { + if (text1 == null || text2 == null) { + throw new IllegalArgumentException("Null inputs. (patch_make)"); + } + // No diffs provided, compute our own. + LinkedList diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of Diff objects for text1 to text2. + * @return LinkedList of Patch objects. + */ + public LinkedList patch_make(LinkedList diffs) { + if (diffs == null) { + throw new IllegalArgumentException("Null inputs. (patch_make)"); + } + // No origin string provided, compute our own. + String text1 = diff_text1(diffs); + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text + * @param text2 Ignored. + * @param diffs Array of Diff objects for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(String text1, LinkedList diffs). + */ + public LinkedList patch_make(String text1, String text2, + LinkedList diffs) { + return patch_make(text1, diffs); + } + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of Diff objects for text1 to text2. + * @return LinkedList of Patch objects. + */ + public LinkedList patch_make(String text1, LinkedList diffs) { + if (text1 == null || diffs == null) { + throw new IllegalArgumentException("Null inputs. (patch_make)"); + } + + LinkedList patches = new LinkedList(); + if (diffs.isEmpty()) { + return patches; // Get rid of the null case. + } + Patch patch = new Patch(); + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + String prepatch_text = text1; + String postpatch_text = text1; + for (Diff aDiff : diffs) { + if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case INSERT: + patch.diffs.add(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.substring(0, char_count2) + + aDiff.text + postpatch_text.substring(char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.add(aDiff); + postpatch_text = postpatch_text.substring(0, char_count2) + + postpatch_text.substring(char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin + && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) { + // Small equality inside a patch. + patch.diffs.add(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.add(patch); + patch = new Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != Operation.INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != Operation.DELETE) { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.add(patch); + } + + return patches; + } + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of Patch objects. + * @return Array of Patch objects. + */ + public LinkedList patch_deepCopy(LinkedList patches) { + LinkedList patchesCopy = new LinkedList(); + for (Patch aPatch : patches) { + Patch patchCopy = new Patch(); + for (Diff aDiff : aPatch.diffs) { + Diff diffCopy = new Diff(aDiff.operation, aDiff.text); + patchCopy.diffs.add(diffCopy); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.add(patchCopy); + } + return patchesCopy; + } + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of Patch objects + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ + public Object[] patch_apply(LinkedList patches, String text) { + if (patches.isEmpty()) { + return new Object[]{text, new boolean[0]}; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy(patches); + + String nullPadding = patch_addPadding(patches); + text = nullPadding + text + nullPadding; + patch_splitMax(patches); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + boolean[] results = new boolean[patches.size()]; + for (Patch aPatch : patches) { + int expected_loc = aPatch.start2 + delta; + String text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.length() > this.Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main(text, + text1.substring(0, this.Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, + text1.substring(text1.length() - this.Match_MaxBits), + expected_loc + text1.length() - this.Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + String text2; + if (end_loc == -1) { + text2 = text.substring(start_loc, + Math.min(start_loc + text1.length(), text.length())); + } else { + text2 = text.substring(start_loc, + Math.min(end_loc + this.Match_MaxBits, text.length())); + } + if (text1.equals(text2)) { + // Perfect match, just shove the replacement text in. + text = text.substring(0, start_loc) + diff_text2(aPatch.diffs) + + text.substring(start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + LinkedList diffs = diff_main(text1, text2, false); + if (text1.length() > this.Match_MaxBits + && diff_levenshtein(diffs) / (float) text1.length() + > this.Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + int index1 = 0; + for (Diff aDiff : aPatch.diffs) { + if (aDiff.operation != Operation.EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == Operation.INSERT) { + // Insertion + text = text.substring(0, start_loc + index2) + aDiff.text + + text.substring(start_loc + index2); + } else if (aDiff.operation == Operation.DELETE) { + // Deletion + text = text.substring(0, start_loc + index2) + + text.substring(start_loc + diff_xIndex(diffs, + index1 + aDiff.text.length())); + } + } + if (aDiff.operation != Operation.DELETE) { + index1 += aDiff.text.length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = text.substring(nullPadding.length(), text.length() + - nullPadding.length()); + return new Object[]{text, results}; + } + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of Patch objects. + * @return The padding string added to each side. + */ + public String patch_addPadding(LinkedList patches) { + short paddingLength = this.Patch_Margin; + String nullPadding = ""; + for (short x = 1; x <= paddingLength; x++) { + nullPadding += String.valueOf((char) x); + } + + // Bump all the patches forward. + for (Patch aPatch : patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch patch = patches.getFirst(); + LinkedList diffs = patch.diffs; + if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) { + // Add nullPadding equality. + diffs.addFirst(new Diff(Operation.EQUAL, nullPadding)); + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs.getFirst().text.length()) { + // Grow first equality. + Diff firstDiff = diffs.getFirst(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = nullPadding.substring(firstDiff.text.length()) + + firstDiff.text; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches.getLast(); + diffs = patch.diffs; + if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) { + // Add nullPadding equality. + diffs.addLast(new Diff(Operation.EQUAL, nullPadding)); + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs.getLast().text.length()) { + // Grow last equality. + Diff lastDiff = diffs.getLast(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.substring(0, extraLength); + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; + } + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ + public void patch_splitMax(LinkedList patches) { + short patch_size = Match_MaxBits; + String precontext, postcontext; + Patch patch; + int start1, start2; + boolean empty; + Operation diff_type; + String diff_text; + ListIterator pointer = patches.listIterator(); + Patch bigpatch = pointer.hasNext() ? pointer.next() : null; + while (bigpatch != null) { + if (bigpatch.length1 <= Match_MaxBits) { + bigpatch = pointer.hasNext() ? pointer.next() : null; + continue; + } + // Remove the big old patch. + pointer.remove(); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = ""; + while (!bigpatch.diffs.isEmpty()) { + // Create one of several smaller patches. + patch = new Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (precontext.length() != 0) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.add(new Diff(Operation.EQUAL, precontext)); + } + while (!bigpatch.diffs.isEmpty() + && patch.length1 < patch_size - Patch_Margin) { + diff_type = bigpatch.diffs.getFirst().operation; + diff_text = bigpatch.diffs.getFirst().text; + if (diff_type == Operation.INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.addLast(bigpatch.diffs.removeFirst()); + empty = false; + } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1 + && patch.diffs.getFirst().operation == Operation.EQUAL + && diff_text.length() > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.add(new Diff(diff_type, diff_text)); + bigpatch.diffs.removeFirst(); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substring(0, Math.min(diff_text.length(), + patch_size - patch.length1 - Patch_Margin)); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == Operation.EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.add(new Diff(diff_type, diff_text)); + if (diff_text.equals(bigpatch.diffs.getFirst().text)) { + bigpatch.diffs.removeFirst(); + } else { + bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text + .substring(diff_text.length()); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = precontext.substring(Math.max(0, precontext.length() + - Patch_Margin)); + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + if (postcontext.length() != 0) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if (!patch.diffs.isEmpty() + && patch.diffs.getLast().operation == Operation.EQUAL) { + patch.diffs.getLast().text += postcontext; + } else { + patch.diffs.add(new Diff(Operation.EQUAL, postcontext)); + } + } + if (!empty) { + pointer.add(patch); + } + } + bigpatch = pointer.hasNext() ? pointer.next() : null; + } + } + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public String patch_toText(List patches) { + StringBuilder text = new StringBuilder(); + for (Patch aPatch : patches) { + text.append(aPatch); + } + return text.toString(); + } + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws IllegalArgumentException If invalid input. + */ + public List patch_fromText(String textline) + throws IllegalArgumentException { + List patches = new LinkedList(); + if (textline.length() == 0) { + return patches; + } + List textList = Arrays.asList(textline.split("\n")); + LinkedList text = new LinkedList(textList); + Patch patch; + Pattern patchHeader + = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); + Matcher m; + char sign; + String line; + while (!text.isEmpty()) { + m = patchHeader.matcher(text.getFirst()); + if (!m.matches()) { + throw new IllegalArgumentException( + "Invalid patch string: " + text.getFirst()); + } + patch = new Patch(); + patches.add(patch); + patch.start1 = Integer.parseInt(m.group(1)); + if (m.group(2).length() == 0) { + patch.start1--; + patch.length1 = 1; + } else if (m.group(2).equals("0")) { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = Integer.parseInt(m.group(2)); + } + + patch.start2 = Integer.parseInt(m.group(3)); + if (m.group(4).length() == 0) { + patch.start2--; + patch.length2 = 1; + } else if (m.group(4).equals("0")) { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = Integer.parseInt(m.group(4)); + } + text.removeFirst(); + + while (!text.isEmpty()) { + try { + sign = text.getFirst().charAt(0); + } catch (IndexOutOfBoundsException e) { + // Blank line? Whatever. + text.removeFirst(); + continue; + } + line = text.getFirst().substring(1); + line = line.replace("+", "%2B"); // decode would change all "+" to " " + try { + line = URLDecoder.decode(line, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // Not likely on modern system. + throw new Error("This system does not support UTF-8.", e); + } catch (IllegalArgumentException e) { + // Malformed URI sequence. + throw new IllegalArgumentException( + "Illegal escape in patch_fromText: " + line, e); + } + if (sign == '-') { + // Deletion. + patch.diffs.add(new Diff(Operation.DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.add(new Diff(Operation.INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.add(new Diff(Operation.EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw new IllegalArgumentException( + "Invalid patch mode '" + sign + "' in: " + line); + } + text.removeFirst(); + } + } + return patches; + } + + + /** + * Class representing one diff operation. + */ + public static class Diff { + /** + * One of: INSERT, DELETE or EQUAL. + */ + public Operation operation; + /** + * The text associated with this diff operation. + */ + public String text; + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + public Diff(Operation operation, String text) { + // Construct a diff with the specified operation and text. + this.operation = operation; + this.text = text; + } + + /** + * Display a human-readable version of this Diff. + * @return text version. + */ + public String toString() { + String prettyText = this.text.replace('\n', '\u00b6'); + return "Diff(" + this.operation + ",\"" + prettyText + "\")"; + } + + /** + * Create a numeric hash value for a Diff. + * This function is not used by DMP. + * @return Hash value. + */ + @Override + public int hashCode() { + final int prime = 31; + int result = (operation == null) ? 0 : operation.hashCode(); + result += prime * ((text == null) ? 0 : text.hashCode()); + return result; + } + + /** + * Is this Diff equivalent to another Diff? + * @param obj Another Diff to compare against. + * @return true or false. + */ + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + Diff other = (Diff) obj; + if (operation != other.operation) { + return false; + } + if (text == null) { + if (other.text != null) { + return false; + } + } else if (!text.equals(other.text)) { + return false; + } + return true; + } + } + + + /** + * Class representing one patch operation. + */ + public static class Patch { + public LinkedList diffs; + public int start1; + public int start2; + public int length1; + public int length2; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + public Patch() { + this.diffs = new LinkedList(); + } + + /** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return The GNU diff string. + */ + public String toString() { + String coords1, coords2; + if (this.length1 == 0) { + coords1 = this.start1 + ",0"; + } else if (this.length1 == 1) { + coords1 = Integer.toString(this.start1 + 1); + } else { + coords1 = (this.start1 + 1) + "," + this.length1; + } + if (this.length2 == 0) { + coords2 = this.start2 + ",0"; + } else if (this.length2 == 1) { + coords2 = Integer.toString(this.start2 + 1); + } else { + coords2 = (this.start2 + 1) + "," + this.length2; + } + StringBuilder text = new StringBuilder(); + text.append("@@ -").append(coords1).append(" +").append(coords2) + .append(" @@\n"); + // Escape the body of the patch with %xx notation. + for (Diff aDiff : this.diffs) { + switch (aDiff.operation) { + case INSERT: + text.append('+'); + break; + case DELETE: + text.append('-'); + break; + case EQUAL: + text.append(' '); + break; + } + try { + text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' ')) + .append("\n"); + } catch (UnsupportedEncodingException e) { + // Not likely on modern system. + throw new Error("This system does not support UTF-8.", e); + } + } + return unescapeForEncodeUriCompatability(text.toString()); + } + } + + /** + * Unescape selected chars for compatability with JavaScript's encodeURI. + * In speed critical applications this could be dropped since the + * receiving application will certainly decode these fine. + * Note that this function is case-sensitive. Thus "%3f" would not be + * unescaped. But this is ok because it is only called with the output of + * URLEncoder.encode which returns uppercase hex. + * + * Example: "%3F" -> "?", "%24" -> "$", etc. + * + * @param str The string to escape. + * @return The escaped string. + */ + private static String unescapeForEncodeUriCompatability(String str) { + return str.replace("%21", "!").replace("%7E", "~") + .replace("%27", "'").replace("%28", "(").replace("%29", ")") + .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?") + .replace("%3A", ":").replace("%40", "@").replace("%26", "&") + .replace("%3D", "=").replace("%2B", "+").replace("%24", "$") + .replace("%2C", ",").replace("%23", "#"); + } +} diff --git a/java/name/fraser/neil/plaintext/diff_match_patch_test.java b/java/name/fraser/neil/plaintext/diff_match_patch_test.java new file mode 100644 index 0000000..85f1d0a --- /dev/null +++ b/java/name/fraser/neil/plaintext/diff_match_patch_test.java @@ -0,0 +1,910 @@ +/* + * Diff Match and Patch -- Test harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package name.fraser.neil.plaintext; + +import junit.framework.TestCase; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import name.fraser.neil.plaintext.diff_match_patch.Diff; +import name.fraser.neil.plaintext.diff_match_patch.LinesToCharsResult; +import name.fraser.neil.plaintext.diff_match_patch.Patch; + +public class diff_match_patch_test extends TestCase { + + private diff_match_patch dmp; + private diff_match_patch.Operation DELETE = diff_match_patch.Operation.DELETE; + private diff_match_patch.Operation EQUAL = diff_match_patch.Operation.EQUAL; + private diff_match_patch.Operation INSERT = diff_match_patch.Operation.INSERT; + + protected void setUp() { + // Create an instance of the diff_match_patch object. + dmp = new diff_match_patch(); + } + + + // DIFF TEST FUNCTIONS + + + public void testDiffCommonPrefix() { + // Detect any common prefix. + assertEquals("diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix("abc", "xyz")); + + assertEquals("diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + + assertEquals("diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix("1234", "1234xyz")); + } + + public void testDiffCommonSuffix() { + // Detect any common suffix. + assertEquals("diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix("abc", "xyz")); + + assertEquals("diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + + assertEquals("diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix("1234", "xyz1234")); + } + + public void testDiffCommonOverlap() { + // Detect any suffix/prefix overlap. + assertEquals("diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap("", "abcd")); + + assertEquals("diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap("abc", "abcd")); + + assertEquals("diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap("123456", "abcd")); + + assertEquals("diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals("diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap("fi", "\ufb01i")); + } + + public void testDiffHalfmatch() { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertNull("diff_halfMatch: No match #1.", dmp.diff_halfMatch("1234567890", "abcdef")); + + assertNull("diff_halfMatch: No match #2.", dmp.diff_halfMatch("12345", "23")); + + assertArrayEquals("diff_halfMatch: Single Match #1.", new String[]{"12", "90", "a", "z", "345678"}, dmp.diff_halfMatch("1234567890", "a345678z")); + + assertArrayEquals("diff_halfMatch: Single Match #2.", new String[]{"a", "z", "12", "90", "345678"}, dmp.diff_halfMatch("a345678z", "1234567890")); + + assertArrayEquals("diff_halfMatch: Single Match #3.", new String[]{"abc", "z", "1234", "0", "56789"}, dmp.diff_halfMatch("abc56789z", "1234567890")); + + assertArrayEquals("diff_halfMatch: Single Match #4.", new String[]{"a", "xyz", "1", "7890", "23456"}, dmp.diff_halfMatch("a23456xyz", "1234567890")); + + assertArrayEquals("diff_halfMatch: Multiple Matches #1.", new String[]{"12123", "123121", "a", "z", "1234123451234"}, dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + + assertArrayEquals("diff_halfMatch: Multiple Matches #2.", new String[]{"", "-=-=-=-=-=", "x", "", "x-=-=-=-=-=-=-="}, dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + + assertArrayEquals("diff_halfMatch: Multiple Matches #3.", new String[]{"-=-=-=-=-=", "", "", "y", "-=-=-=-=-=-=-=y"}, dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertArrayEquals("diff_halfMatch: Non-optimal halfmatch.", new String[]{"qHillo", "w", "x", "Hulloy", "HelloHe"}, dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + + dmp.Diff_Timeout = 0; + assertNull("diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + } + + public void testDiffLinesToChars() { + // Convert lines down to characters. + ArrayList tmpVector = new ArrayList(); + tmpVector.add(""); + tmpVector.add("alpha\n"); + tmpVector.add("beta\n"); + assertLinesToCharsResultEquals("diff_linesToChars: Shared lines.", new LinesToCharsResult("\u0001\u0002\u0001", "\u0002\u0001\u0002", tmpVector), dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); + + tmpVector.clear(); + tmpVector.add(""); + tmpVector.add("alpha\r\n"); + tmpVector.add("beta\r\n"); + tmpVector.add("\r\n"); + assertLinesToCharsResultEquals("diff_linesToChars: Empty string and blank lines.", new LinesToCharsResult("", "\u0001\u0002\u0003\u0003", tmpVector), dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); + + tmpVector.clear(); + tmpVector.add(""); + tmpVector.add("a"); + tmpVector.add("b"); + assertLinesToCharsResultEquals("diff_linesToChars: No linebreaks.", new LinesToCharsResult("\u0001", "\u0002", tmpVector), dmp.diff_linesToChars("a", "b")); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + StringBuilder lineList = new StringBuilder(); + StringBuilder charList = new StringBuilder(); + for (int x = 1; x < n + 1; x++) { + tmpVector.add(x + "\n"); + lineList.append(x + "\n"); + charList.append(String.valueOf((char) x)); + } + assertEquals(n, tmpVector.size()); + String lines = lineList.toString(); + String chars = charList.toString(); + assertEquals(n, chars.length()); + tmpVector.add(0, ""); + assertLinesToCharsResultEquals("diff_linesToChars: More than 256.", new LinesToCharsResult(chars, "", tmpVector), dmp.diff_linesToChars(lines, "")); + } + + public void testDiffCharsToLines() { + // First check that Diff equality works. + assertTrue("diff_charsToLines: Equality #1.", new Diff(EQUAL, "a").equals(new Diff(EQUAL, "a"))); + + assertEquals("diff_charsToLines: Equality #2.", new Diff(EQUAL, "a"), new Diff(EQUAL, "a")); + + // Convert chars up to lines. + LinkedList diffs = diffList(new Diff(EQUAL, "\u0001\u0002\u0001"), new Diff(INSERT, "\u0002\u0001\u0002")); + ArrayList tmpVector = new ArrayList(); + tmpVector.add(""); + tmpVector.add("alpha\n"); + tmpVector.add("beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines: Shared lines.", diffList(new Diff(EQUAL, "alpha\nbeta\nalpha\n"), new Diff(INSERT, "beta\nalpha\nbeta\n")), diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + StringBuilder lineList = new StringBuilder(); + StringBuilder charList = new StringBuilder(); + for (int x = 1; x < n + 1; x++) { + tmpVector.add(x + "\n"); + lineList.append(x + "\n"); + charList.append(String.valueOf((char) x)); + } + assertEquals(n, tmpVector.size()); + String lines = lineList.toString(); + String chars = charList.toString(); + assertEquals(n, chars.length()); + tmpVector.add(0, ""); + diffs = diffList(new Diff(DELETE, chars)); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines: More than 256.", diffList(new Diff(DELETE, lines)), diffs); + } + + public void testDiffCleanupMerge() { + // Cleanup a messy diff. + LinkedList diffs = diffList(); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Null case.", diffList(), diffs); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "b"), new Diff(INSERT, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: No change case.", diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "b"), new Diff(INSERT, "c")), diffs); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(EQUAL, "b"), new Diff(EQUAL, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge equalities.", diffList(new Diff(EQUAL, "abc")), diffs); + + diffs = diffList(new Diff(DELETE, "a"), new Diff(DELETE, "b"), new Diff(DELETE, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge deletions.", diffList(new Diff(DELETE, "abc")), diffs); + + diffs = diffList(new Diff(INSERT, "a"), new Diff(INSERT, "b"), new Diff(INSERT, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge insertions.", diffList(new Diff(INSERT, "abc")), diffs); + + diffs = diffList(new Diff(DELETE, "a"), new Diff(INSERT, "b"), new Diff(DELETE, "c"), new Diff(INSERT, "d"), new Diff(EQUAL, "e"), new Diff(EQUAL, "f")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge interweave.", diffList(new Diff(DELETE, "ac"), new Diff(INSERT, "bd"), new Diff(EQUAL, "ef")), diffs); + + diffs = diffList(new Diff(DELETE, "a"), new Diff(INSERT, "abc"), new Diff(DELETE, "dc")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection.", diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "d"), new Diff(INSERT, "b"), new Diff(EQUAL, "c")), diffs); + + diffs = diffList(new Diff(EQUAL, "x"), new Diff(DELETE, "a"), new Diff(INSERT, "abc"), new Diff(DELETE, "dc"), new Diff(EQUAL, "y")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(new Diff(EQUAL, "xa"), new Diff(DELETE, "d"), new Diff(INSERT, "b"), new Diff(EQUAL, "cy")), diffs); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(INSERT, "ba"), new Diff(EQUAL, "c")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left.", diffList(new Diff(INSERT, "ab"), new Diff(EQUAL, "ac")), diffs); + + diffs = diffList(new Diff(EQUAL, "c"), new Diff(INSERT, "ab"), new Diff(EQUAL, "a")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right.", diffList(new Diff(EQUAL, "ca"), new Diff(INSERT, "ba")), diffs); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "b"), new Diff(EQUAL, "c"), new Diff(DELETE, "ac"), new Diff(EQUAL, "x")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left recursive.", diffList(new Diff(DELETE, "abc"), new Diff(EQUAL, "acx")), diffs); + + diffs = diffList(new Diff(EQUAL, "x"), new Diff(DELETE, "ca"), new Diff(EQUAL, "c"), new Diff(DELETE, "b"), new Diff(EQUAL, "a")); + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right recursive.", diffList(new Diff(EQUAL, "xca"), new Diff(DELETE, "cba")), diffs); + } + + public void testDiffCleanupSemanticLossless() { + // Slide diffs to match logical boundaries. + LinkedList diffs = diffList(); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Null case.", diffList(), diffs); + + diffs = diffList(new Diff(EQUAL, "AAA\r\n\r\nBBB"), new Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), new Diff(EQUAL, "\r\nEEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Blank lines.", diffList(new Diff(EQUAL, "AAA\r\n\r\n"), new Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), new Diff(EQUAL, "BBB\r\nEEE")), diffs); + + diffs = diffList(new Diff(EQUAL, "AAA\r\nBBB"), new Diff(INSERT, " DDD\r\nBBB"), new Diff(EQUAL, " EEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Line boundaries.", diffList(new Diff(EQUAL, "AAA\r\n"), new Diff(INSERT, "BBB DDD\r\n"), new Diff(EQUAL, "BBB EEE")), diffs); + + diffs = diffList(new Diff(EQUAL, "The c"), new Diff(INSERT, "ow and the c"), new Diff(EQUAL, "at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Word boundaries.", diffList(new Diff(EQUAL, "The "), new Diff(INSERT, "cow and the "), new Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(new Diff(EQUAL, "The-c"), new Diff(INSERT, "ow-and-the-c"), new Diff(EQUAL, "at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Alphanumeric boundaries.", diffList(new Diff(EQUAL, "The-"), new Diff(INSERT, "cow-and-the-"), new Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "a"), new Diff(EQUAL, "ax")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Hitting the start.", diffList(new Diff(DELETE, "a"), new Diff(EQUAL, "aax")), diffs); + + diffs = diffList(new Diff(EQUAL, "xa"), new Diff(DELETE, "a"), new Diff(EQUAL, "a")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Hitting the end.", diffList(new Diff(EQUAL, "xaa"), new Diff(DELETE, "a")), diffs); + + diffs = diffList(new Diff(EQUAL, "The xxx. The "), new Diff(INSERT, "zzz. The "), new Diff(EQUAL, "yyy.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Sentence boundaries.", diffList(new Diff(EQUAL, "The xxx."), new Diff(INSERT, " The zzz."), new Diff(EQUAL, " The yyy.")), diffs); + } + + public void testDiffCleanupSemantic() { + // Cleanup semantically trivial equalities. + LinkedList diffs = diffList(); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); + + diffs = diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "cd"), new Diff(EQUAL, "12"), new Diff(DELETE, "e")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #1.", diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "cd"), new Diff(EQUAL, "12"), new Diff(DELETE, "e")), diffs); + + diffs = diffList(new Diff(DELETE, "abc"), new Diff(INSERT, "ABC"), new Diff(EQUAL, "1234"), new Diff(DELETE, "wxyz")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #2.", diffList(new Diff(DELETE, "abc"), new Diff(INSERT, "ABC"), new Diff(EQUAL, "1234"), new Diff(DELETE, "wxyz")), diffs); + + diffs = diffList(new Diff(DELETE, "a"), new Diff(EQUAL, "b"), new Diff(DELETE, "c")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Simple elimination.", diffList(new Diff(DELETE, "abc"), new Diff(INSERT, "b")), diffs); + + diffs = diffList(new Diff(DELETE, "ab"), new Diff(EQUAL, "cd"), new Diff(DELETE, "e"), new Diff(EQUAL, "f"), new Diff(INSERT, "g")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Backpass elimination.", diffList(new Diff(DELETE, "abcdef"), new Diff(INSERT, "cdfg")), diffs); + + diffs = diffList(new Diff(INSERT, "1"), new Diff(EQUAL, "A"), new Diff(DELETE, "B"), new Diff(INSERT, "2"), new Diff(EQUAL, "_"), new Diff(INSERT, "1"), new Diff(EQUAL, "A"), new Diff(DELETE, "B"), new Diff(INSERT, "2")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Multiple elimination.", diffList(new Diff(DELETE, "AB_AB"), new Diff(INSERT, "1A2_1A2")), diffs); + + diffs = diffList(new Diff(EQUAL, "The c"), new Diff(DELETE, "ow and the c"), new Diff(EQUAL, "at.")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(new Diff(EQUAL, "The "), new Diff(DELETE, "cow and the "), new Diff(EQUAL, "cat.")), diffs); + + diffs = diffList(new Diff(DELETE, "abcxx"), new Diff(INSERT, "xxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No overlap elimination.", diffList(new Diff(DELETE, "abcxx"), new Diff(INSERT, "xxdef")), diffs); + + diffs = diffList(new Diff(DELETE, "abcxxx"), new Diff(INSERT, "xxxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Overlap elimination.", diffList(new Diff(DELETE, "abc"), new Diff(EQUAL, "xxx"), new Diff(INSERT, "def")), diffs); + + diffs = diffList(new Diff(DELETE, "xxxabc"), new Diff(INSERT, "defxxx")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", diffList(new Diff(INSERT, "def"), new Diff(EQUAL, "xxx"), new Diff(DELETE, "abc")), diffs); + + diffs = diffList(new Diff(DELETE, "abcd1212"), new Diff(INSERT, "1212efghi"), new Diff(EQUAL, "----"), new Diff(DELETE, "A3"), new Diff(INSERT, "3BC")); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Two overlap eliminations.", diffList(new Diff(DELETE, "abcd"), new Diff(EQUAL, "1212"), new Diff(INSERT, "efghi"), new Diff(EQUAL, "----"), new Diff(DELETE, "A"), new Diff(EQUAL, "3"), new Diff(INSERT, "BC")), diffs); + } + + public void testDiffCleanupEfficiency() { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + LinkedList diffs = diffList(); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Null case.", diffList(), diffs); + + diffs = diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "12"), new Diff(EQUAL, "wxyz"), new Diff(DELETE, "cd"), new Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: No elimination.", diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "12"), new Diff(EQUAL, "wxyz"), new Diff(DELETE, "cd"), new Diff(INSERT, "34")), diffs); + + diffs = diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "12"), new Diff(EQUAL, "xyz"), new Diff(DELETE, "cd"), new Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Four-edit elimination.", diffList(new Diff(DELETE, "abxyzcd"), new Diff(INSERT, "12xyz34")), diffs); + + diffs = diffList(new Diff(INSERT, "12"), new Diff(EQUAL, "x"), new Diff(DELETE, "cd"), new Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Three-edit elimination.", diffList(new Diff(DELETE, "xcd"), new Diff(INSERT, "12x34")), diffs); + + diffs = diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "12"), new Diff(EQUAL, "xy"), new Diff(INSERT, "34"), new Diff(EQUAL, "z"), new Diff(DELETE, "cd"), new Diff(INSERT, "56")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Backpass elimination.", diffList(new Diff(DELETE, "abxyzcd"), new Diff(INSERT, "12xy34z56")), diffs); + + dmp.Diff_EditCost = 5; + diffs = diffList(new Diff(DELETE, "ab"), new Diff(INSERT, "12"), new Diff(EQUAL, "wxyz"), new Diff(DELETE, "cd"), new Diff(INSERT, "34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: High cost elimination.", diffList(new Diff(DELETE, "abwxyzcd"), new Diff(INSERT, "12wxyz34")), diffs); + dmp.Diff_EditCost = 4; + } + + public void testDiffPrettyHtml() { + // Pretty print. + LinkedList diffs = diffList(new Diff(EQUAL, "a\n"), new Diff(DELETE, "b"), new Diff(INSERT, "c&d")); + assertEquals("diff_prettyHtml:", "
    <B>b</B>c&d", dmp.diff_prettyHtml(diffs)); + } + + public void testDiffText() { + // Compute the source and destination texts. + LinkedList diffs = diffList(new Diff(EQUAL, "jump"), new Diff(DELETE, "s"), new Diff(INSERT, "ed"), new Diff(EQUAL, " over "), new Diff(DELETE, "the"), new Diff(INSERT, "a"), new Diff(EQUAL, " lazy")); + assertEquals("diff_text1:", "jumps over the lazy", dmp.diff_text1(diffs)); + assertEquals("diff_text2:", "jumped over a lazy", dmp.diff_text2(diffs)); + } + + public void testDiffDelta() { + // Convert a diff into delta string. + LinkedList diffs = diffList(new Diff(EQUAL, "jump"), new Diff(DELETE, "s"), new Diff(INSERT, "ed"), new Diff(EQUAL, " over "), new Diff(DELETE, "the"), new Diff(INSERT, "a"), new Diff(EQUAL, " lazy"), new Diff(INSERT, "old dog")); + String text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Base text.", "jumps over the lazy", text1); + + String delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + try { + dmp.diff_fromDelta(text1 + "x", delta); + fail("diff_fromDelta: Too long."); + } catch (IllegalArgumentException ex) { + // Exception expected. + } + + // Generates error (19 > 18). + try { + dmp.diff_fromDelta(text1.substring(1), delta); + fail("diff_fromDelta: Too short."); + } catch (IllegalArgumentException ex) { + // Exception expected. + } + + // Generates error (%c3%xy invalid Unicode). + try { + dmp.diff_fromDelta("", "+%c3%xy"); + fail("diff_fromDelta: Invalid character."); + } catch (IllegalArgumentException ex) { + // Exception expected. + } + + // Test deltas with special characters. + diffs = diffList(new Diff(EQUAL, "\u0680 \000 \t %"), new Diff(DELETE, "\u0681 \001 \n ^"), new Diff(INSERT, "\u0682 \002 \\ |")); + text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Unicode text.", "\u0680 \000 \t %\u0681 \001 \n ^", text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta); + + assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = diffList(new Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); + String text2 = dmp.diff_text2(diffs); + assertEquals("diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta("", delta)); + } + + public void testDiffXIndex() { + // Translate a location in text1 to text2. + LinkedList diffs = diffList(new Diff(DELETE, "a"), new Diff(INSERT, "1234"), new Diff(EQUAL, "xyz")); + assertEquals("diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "1234"), new Diff(EQUAL, "xyz")); + assertEquals("diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); + } + + public void testDiffLevenshtein() { + LinkedList diffs = diffList(new Diff(DELETE, "abc"), new Diff(INSERT, "1234"), new Diff(EQUAL, "xyz")); + assertEquals("Levenshtein with trailing equality.", 4, dmp.diff_levenshtein(diffs)); + + diffs = diffList(new Diff(EQUAL, "xyz"), new Diff(DELETE, "abc"), new Diff(INSERT, "1234")); + assertEquals("Levenshtein with leading equality.", 4, dmp.diff_levenshtein(diffs)); + + diffs = diffList(new Diff(DELETE, "abc"), new Diff(EQUAL, "xyz"), new Diff(INSERT, "1234")); + assertEquals("Levenshtein with middle equality.", 7, dmp.diff_levenshtein(diffs)); + } + + public void testDiffBisect() { + // Normal. + String a = "cat"; + String b = "map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + LinkedList diffs = diffList(new Diff(DELETE, "c"), new Diff(INSERT, "m"), new Diff(EQUAL, "a"), new Diff(DELETE, "t"), new Diff(INSERT, "p")); + assertEquals("diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, Long.MAX_VALUE)); + + // Timeout. + diffs = diffList(new Diff(DELETE, "cat"), new Diff(INSERT, "map")); + assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); + } + + public void testDiffMain() { + // Perform a trivial diff. + LinkedList diffs = diffList(); + assertEquals("diff_main: Null case.", diffs, dmp.diff_main("", "", false)); + + diffs = diffList(new Diff(EQUAL, "abc")); + assertEquals("diff_main: Equality.", diffs, dmp.diff_main("abc", "abc", false)); + + diffs = diffList(new Diff(EQUAL, "ab"), new Diff(INSERT, "123"), new Diff(EQUAL, "c")); + assertEquals("diff_main: Simple insertion.", diffs, dmp.diff_main("abc", "ab123c", false)); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "123"), new Diff(EQUAL, "bc")); + assertEquals("diff_main: Simple deletion.", diffs, dmp.diff_main("a123bc", "abc", false)); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(INSERT, "123"), new Diff(EQUAL, "b"), new Diff(INSERT, "456"), new Diff(EQUAL, "c")); + assertEquals("diff_main: Two insertions.", diffs, dmp.diff_main("abc", "a123b456c", false)); + + diffs = diffList(new Diff(EQUAL, "a"), new Diff(DELETE, "123"), new Diff(EQUAL, "b"), new Diff(DELETE, "456"), new Diff(EQUAL, "c")); + assertEquals("diff_main: Two deletions.", diffs, dmp.diff_main("a123b456c", "abc", false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = diffList(new Diff(DELETE, "a"), new Diff(INSERT, "b")); + assertEquals("diff_main: Simple case #1.", diffs, dmp.diff_main("a", "b", false)); + + diffs = diffList(new Diff(DELETE, "Apple"), new Diff(INSERT, "Banana"), new Diff(EQUAL, "s are a"), new Diff(INSERT, "lso"), new Diff(EQUAL, " fruit.")); + assertEquals("diff_main: Simple case #2.", diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + + diffs = diffList(new Diff(DELETE, "a"), new Diff(INSERT, "\u0680"), new Diff(EQUAL, "x"), new Diff(DELETE, "\t"), new Diff(INSERT, "\000")); + assertEquals("diff_main: Simple case #3.", diffs, dmp.diff_main("ax\t", "\u0680x\000", false)); + + diffs = diffList(new Diff(DELETE, "1"), new Diff(EQUAL, "a"), new Diff(DELETE, "y"), new Diff(EQUAL, "b"), new Diff(DELETE, "2"), new Diff(INSERT, "xab")); + assertEquals("diff_main: Overlap #1.", diffs, dmp.diff_main("1ayb2", "abxab", false)); + + diffs = diffList(new Diff(INSERT, "xaxcx"), new Diff(EQUAL, "abc"), new Diff(DELETE, "y")); + assertEquals("diff_main: Overlap #2.", diffs, dmp.diff_main("abcy", "xaxcxabc", false)); + + diffs = diffList(new Diff(DELETE, "ABCD"), new Diff(EQUAL, "a"), new Diff(DELETE, "="), new Diff(INSERT, "-"), new Diff(EQUAL, "bcd"), new Diff(DELETE, "="), new Diff(INSERT, "-"), new Diff(EQUAL, "efghijklmnopqrs"), new Diff(DELETE, "EFGHIJKLMNOefg")); + assertEquals("diff_main: Overlap #3.", diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)); + + diffs = diffList(new Diff(INSERT, " "), new Diff(EQUAL, "a"), new Diff(INSERT, "nd"), new Diff(EQUAL, " [[Pennsylvania]]"), new Diff(DELETE, " and [[New")); + assertEquals("diff_main: Large equality.", diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)); + + dmp.Diff_Timeout = 0.1f; // 100ms + String a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + String b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + long startTime = System.currentTimeMillis(); + dmp.diff_main(a, b); + long endTime = System.currentTimeMillis(); + // Test that we took at least the timeout period. + assertTrue("diff_main: Timeout min.", dmp.Diff_Timeout * 1000 <= endTime - startTime); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + assertTrue("diff_main: Timeout max.", dmp.Diff_Timeout * 1000 * 2 > endTime - startTime); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + String[] texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + String[] texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertArrayEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); + + // Test null inputs. + try { + dmp.diff_main(null, null); + fail("diff_main: Null inputs."); + } catch (IllegalArgumentException ex) { + // Error expected. + } + } + + + // MATCH TEST FUNCTIONS + + + public void testMatchAlphabet() { + // Initialise the bitmasks for Bitap. + Map bitmask; + bitmask = new HashMap(); + bitmask.put('a', 4); bitmask.put('b', 2); bitmask.put('c', 1); + assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); + + bitmask = new HashMap(); + bitmask.put('a', 37); bitmask.put('b', 18); bitmask.put('c', 8); + assertEquals("match_alphabet: Duplicates.", bitmask, dmp.match_alphabet("abcaba")); + } + + public void testMatchBitap() { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Exact match #1.", 5, dmp.match_bitap("abcdefghijk", "fgh", 5)); + + assertEquals("match_bitap: Exact match #2.", 5, dmp.match_bitap("abcdefghijk", "fgh", 0)); + + assertEquals("match_bitap: Fuzzy match #1.", 4, dmp.match_bitap("abcdefghijk", "efxhi", 0)); + + assertEquals("match_bitap: Fuzzy match #2.", 2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + + assertEquals("match_bitap: Fuzzy match #3.", -1, dmp.match_bitap("abcdefghijk", "bxy", 1)); + + assertEquals("match_bitap: Overflow.", 2, dmp.match_bitap("123456789xx0", "3456789x0", 2)); + + assertEquals("match_bitap: Before start match.", 0, dmp.match_bitap("abcdef", "xxabc", 4)); + + assertEquals("match_bitap: Beyond end match.", 3, dmp.match_bitap("abcdef", "defyy", 4)); + + assertEquals("match_bitap: Oversized pattern.", 0, dmp.match_bitap("abcdef", "xabcdefy", 0)); + + dmp.Match_Threshold = 0.4f; + assertEquals("match_bitap: Threshold #1.", 4, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.3f; + assertEquals("match_bitap: Threshold #2.", -1, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.0f; + assertEquals("match_bitap: Threshold #3.", 1, dmp.match_bitap("abcdefghijk", "bcdef", 1)); + + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Multiple select #1.", 0, dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + + assertEquals("match_bitap: Multiple select #2.", 8, dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + + dmp.Match_Distance = 10; // Strict location. + assertEquals("match_bitap: Distance test #1.", -1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + + assertEquals("match_bitap: Distance test #2.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals("match_bitap: Distance test #3.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + } + + public void testMatchMain() { + // Full match. + assertEquals("match_main: Equality.", 0, dmp.match_main("abcdef", "abcdef", 1000)); + + assertEquals("match_main: Null text.", -1, dmp.match_main("", "abcdef", 1)); + + assertEquals("match_main: Null pattern.", 3, dmp.match_main("abcdef", "", 3)); + + assertEquals("match_main: Exact match.", 3, dmp.match_main("abcdef", "de", 3)); + + assertEquals("match_main: Beyond end match.", 3, dmp.match_main("abcdef", "defy", 4)); + + assertEquals("match_main: Oversized pattern.", 0, dmp.match_main("abcdef", "abcdefy", 0)); + + dmp.Match_Threshold = 0.7f; + assertEquals("match_main: Complex match.", 4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)); + dmp.Match_Threshold = 0.5f; + + // Test null inputs. + try { + dmp.match_main(null, null, 0); + fail("match_main: Null inputs."); + } catch (IllegalArgumentException ex) { + // Error expected. + } + } + + + // PATCH TEST FUNCTIONS + + + public void testPatchObj() { + // Patch Object. + Patch p = new Patch(); + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = diffList(new Diff(EQUAL, "jump"), new Diff(DELETE, "s"), new Diff(INSERT, "ed"), new Diff(EQUAL, " over "), new Diff(DELETE, "the"), new Diff(INSERT, "a"), new Diff(EQUAL, "\nlaz")); + String strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("Patch: toString.", strp, p.toString()); + } + + public void testPatchFromText() { + assertTrue("patch_fromText: #0.", dmp.patch_fromText("").isEmpty()); + + String strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch_fromText: #1.", strp, dmp.patch_fromText(strp).get(0).toString()); + + assertEquals("patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n").get(0).toString()); + + assertEquals("patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n").get(0).toString()); + + assertEquals("patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n").get(0).toString()); + + // Generates error. + try { + dmp.patch_fromText("Bad\nPatch\n"); + fail("patch_fromText: #5."); + } catch (IllegalArgumentException ex) { + // Exception expected. + } + } + + public void testPatchToText() { + String strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + List patches; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Single.", strp, dmp.patch_toText(patches)); + + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Dual.", strp, dmp.patch_toText(patches)); + } + + public void testPatchAddContext() { + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").get(0); + dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); + + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").get(0); + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").get(0); + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").get(0); + dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); + assertEquals("patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); + } + + @SuppressWarnings("deprecation") + public void testPatchMake() { + LinkedList patches; + patches = dmp.patch_make("", ""); + assertEquals("patch_make: Null case.", "", dmp.patch_toText(patches)); + + String text1 = "The quick brown fox jumps over the lazy dog."; + String text2 = "That quick brown fox jumped over a lazy dog."; + String expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals("patch_make: Text2+Text1 inputs.", expectedPatch, dmp.patch_toText(patches)); + + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Text1+Text2 inputs.", expectedPatch, dmp.patch_toText(patches)); + + LinkedList diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals("patch_make: Diff input.", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, diffs); + assertEquals("patch_make: Text1+Diff inputs.", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, text2, diffs); + assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated).", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); + assertEquals("patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); + + diffs = diffList(new Diff(DELETE, "`1234567890-=[]\\;',./"), new Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")); + assertEquals("patch_fromText: Character decoding.", diffs, dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").get(0).diffs); + + text1 = ""; + for (int x = 0; x < 100; x++) { + text1 += "abcdef"; + } + text2 = text1 + "123"; + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); + + // Test null inputs. + try { + dmp.patch_make(null); + fail("patch_make: Null inputs."); + } catch (IllegalArgumentException ex) { + // Error expected. + } + } + + public void testPatchSplitMax() { + // Assumes that Match_MaxBits is 32. + LinkedList patches; + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); + String oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); + } + + public void testPatchAddPadding() { + LinkedList patches; + patches = dmp.patch_make("", "test"); + assertEquals("patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("XY", "XtestY"); + assertEquals("patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); + assertEquals("patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); + } + + public void testPatchApply() { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + LinkedList patches; + patches = dmp.patch_make("", ""); + Object[] results = dmp.patch_apply(patches, "Hello world."); + boolean[] boolArray = (boolean[]) results[1]; + String resultStr = results[0] + "\t" + boolArray.length; + assertEquals("patch_apply: Null case.", "Hello world.\t0", resultStr); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); + results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, "I am the very model of a modern major general."); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr); + + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Big delete, big change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); + results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Big delete, big change 2.", "xabcy\ttrue\ttrue", resultStr); + dmp.Patch_DeleteThreshold = 0.5f; + + // Compensate for failed patch. + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); + results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0] + "\t" + boolArray[1]; + assertEquals("patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make("", "test"); + String patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ""); + assertEquals("patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make("", "test"); + results = dmp.patch_apply(patches, ""); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + assertEquals("patch_apply: Edge exact match.", "test\ttrue", resultStr); + + patches = dmp.patch_make("XY", "XtestY"); + results = dmp.patch_apply(patches, "XY"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + assertEquals("patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr); + + patches = dmp.patch_make("y", "y123"); + results = dmp.patch_apply(patches, "x"); + boolArray = (boolean[]) results[1]; + resultStr = results[0] + "\t" + boolArray[0]; + assertEquals("patch_apply: Edge partial match.", "x123\ttrue", resultStr); + } + + private void assertArrayEquals(String error_msg, Object[] a, Object[] b) { + List list_a = Arrays.asList(a); + List list_b = Arrays.asList(b); + assertEquals(error_msg, list_a, list_b); + } + + private void assertLinesToCharsResultEquals(String error_msg, + LinesToCharsResult a, LinesToCharsResult b) { + assertEquals(error_msg, a.chars1, b.chars1); + assertEquals(error_msg, a.chars2, b.chars2); + assertEquals(error_msg, a.lineArray, b.lineArray); + } + + // Construct the two texts which made up the diff originally. + private static String[] diff_rebuildtexts(LinkedList diffs) { + String[] text = {"", ""}; + for (Diff myDiff : diffs) { + if (myDiff.operation != diff_match_patch.Operation.INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != diff_match_patch.Operation.DELETE) { + text[1] += myDiff.text; + } + } + return text; + } + + // Private function for quickly building lists of diffs. + private static LinkedList diffList(Diff... diffs) { + LinkedList myDiffList = new LinkedList(); + for (Diff myDiff : diffs) { + myDiffList.add(myDiff); + } + return myDiffList; + } +} diff --git a/javascript/diff_match_patch.js b/javascript/diff_match_patch.js new file mode 100644 index 0000000..c41b513 --- /dev/null +++ b/javascript/diff_match_patch.js @@ -0,0 +1,49 @@ +(function(){function diff_match_patch(){this.Diff_Timeout=1;this.Diff_EditCost=4;this.Match_Threshold=0.5;this.Match_Distance=1E3;this.Patch_DeleteThreshold=0.5;this.Patch_Margin=4;this.Match_MaxBits=32} +diff_match_patch.prototype.diff_main=function(a,b,c,d){"undefined"==typeof d&&(d=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[[0,a]]:[];"undefined"==typeof c&&(c=!0);var e=c,f=this.diff_commonPrefix(a,b);c=a.substring(0,f);a=a.substring(f);b=b.substring(f);var f=this.diff_commonSuffix(a,b),g=a.substring(a.length-f);a=a.substring(0,a.length-f);b=b.substring(0,b.length-f);a=this.diff_compute_(a, +b,e,d);c&&a.unshift([0,c]);g&&a.push([0,g]);this.diff_cleanupMerge(a);return a}; +diff_match_patch.prototype.diff_compute_=function(a,b,c,d){if(!a)return[[1,b]];if(!b)return[[-1,a]];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(c=[[1,e.substring(0,g)],[0,f],[1,e.substring(g+f.length)]],a.length>b.length&&(c[0][0]=c[2][0]=-1),c):1==f.length?[[-1,a],[1,b]]:(e=this.diff_halfMatch_(a,b))?(f=e[0],a=e[1],g=e[2],b=e[3],e=e[4],f=this.diff_main(f,g,c,d),c=this.diff_main(a,b,c,d),f.concat([[0,e]],c)):c&&100c);v++){for(var n=-v+r;n<=v-t;n+=2){var l=g+n,m;m=n==-v||n!=v&&j[l-1]d)t+=2;else if(s>e)r+=2;else if(q&&(l=g+k-n,0<=l&&l= +u)return this.diff_bisectSplit_(a,b,m,s,c)}}for(n=-v+p;n<=v-w;n+=2){l=g+n;u=n==-v||n!=v&&i[l-1]d)w+=2;else if(m>e)p+=2;else if(!q&&(l=g+k-n,0<=l&&(l=u)))return this.diff_bisectSplit_(a,b,m,s,c)}}return[[-1,a],[1,b]]}; +diff_match_patch.prototype.diff_bisectSplit_=function(a,b,c,d,e){var f=a.substring(0,c),g=b.substring(0,d);a=a.substring(c);b=b.substring(d);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)}; +diff_match_patch.prototype.diff_linesToChars_=function(a,b){function c(a){for(var b="",c=0,f=-1,g=d.length;fd?a=a.substring(c-d):c=a.length?[h,j,n,l,g]:null}if(0>=this.Diff_Timeout)return null; +var d=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>d.length||2*e.lengthd[4].length?g:d:d:g;var j;a.length>b.length?(g=h[0],d=h[1],e=h[2],j=h[3]):(e=h[0],j=h[1],g=h[2],d=h[3]);h=h[4];return[g,d,e,j,h]}; +diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=0,h=0,j=0,i=0;f=e){if(d>=b.length/2||d>=c.length/2)a.splice(f,0,[0,c.substring(0,d)]),a[f-1][1]=b.substring(0,b.length-d),a[f+1][1]=c.substring(d),f++}else if(e>=b.length/2||e>=c.length/2)a.splice(f,0,[0,b.substring(0,e)]),a[f-1][0]=1,a[f-1][1]=c.substring(0,c.length-e),a[f+1][0]=-1,a[f+1][1]=b.substring(e),f++;f++}f++}}; +diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(a,b){if(!a||!b)return 6;var c=a.charAt(a.length-1),d=b.charAt(0),e=c.match(diff_match_patch.nonAlphaNumericRegex_),f=d.match(diff_match_patch.nonAlphaNumericRegex_),g=e&&c.match(diff_match_patch.whitespaceRegex_),h=f&&d.match(diff_match_patch.whitespaceRegex_),c=g&&c.match(diff_match_patch.linebreakRegex_),d=h&&d.match(diff_match_patch.linebreakRegex_),i=c&&a.match(diff_match_patch.blanklineEndRegex_),j=d&&b.match(diff_match_patch.blanklineStartRegex_); +return i||j?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var c=1;c=i&&(i=k,g=d,h=e,j=f)}a[c-1][1]!=g&&(g?a[c-1][1]=g:(a.splice(c-1,1),c--),a[c][1]= +h,j?a[c+1][1]=j:(a.splice(c+1,1),c--))}c++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/; +diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=!1,h=!1,j=!1,i=!1;fb)break;e=c;f=d}return a.length!=g&&-1===a[g][0]?f:f+(b-e)}; +diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],c=/&/g,d=//g,f=/\n/g,g=0;g");switch(h){case 1:b[g]=''+j+"";break;case -1:b[g]=''+j+"";break;case 0:b[g]=""+j+""}}return b.join("")}; +diff_match_patch.prototype.diff_text1=function(a){for(var b=[],c=0;cthis.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,c);-1!=h&&(g=Math.min(d(0,h),g),h=a.lastIndexOf(b,c+b.length),-1!=h&&(g=Math.min(d(0,h),g)));for(var j=1<=i;p--){var w=e[a.charAt(p-1)];k[p]=0===t?(k[p+1]<<1|1)&w:(k[p+1]<<1|1)&w|((r[p+1]|r[p])<<1|1)|r[p+1];if(k[p]&j&&(w=d(t,p-1),w<=g))if(g=w,h=p-1,h>c)i=Math.max(1,2*c-h);else break}if(d(t+1,c)>g)break;r=k}return h}; +diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},c=0;c=2*this.Patch_Margin&& +e&&(this.patch_addContext_(a,h),c.push(a),a=new diff_match_patch.patch_obj,e=0,h=d,f=g)}1!==i&&(f+=k.length);-1!==i&&(g+=k.length)}e&&(this.patch_addContext_(a,h),c.push(a));return c};diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],c=0;cthis.Match_MaxBits){if(j=this.match_main(b,h.substring(0,this.Match_MaxBits),g),-1!=j&&(i=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==i||j>=i))j=-1}else j=this.match_main(b,h,g); +if(-1==j)e[f]=!1,d-=a[f].length2-a[f].length1;else if(e[f]=!0,d=j-g,g=-1==i?b.substring(j,j+h.length):b.substring(j,i+this.Match_MaxBits),h==g)b=b.substring(0,j)+this.diff_text2(a[f].diffs)+b.substring(j+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);for(var h=0,k,i=0;ie[0][1].length){var f=b-e[0][1].length;e[0][1]=c.substring(e[0][1].length)+e[0][1];d.start1-=f;d.start2-=f;d.length1+=f;d.length2+=f}d=a[a.length-1];e=d.diffs;0==e.length||0!=e[e.length-1][0]?(e.push([0, +c]),d.length1+=b,d.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=c.substring(0,f),d.length1+=f,d.length2+=f);return c}; +diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,c=0;c2*b?(h.length1+=i.length,e+=i.length,j=!1,h.diffs.push([g,i]),d.diffs.shift()):(i=i.substring(0,b-h.length1-this.Patch_Margin),h.length1+=i.length,e+=i.length,0===g?(h.length2+=i.length,f+=i.length):j=!1,h.diffs.push([g,i]),i==d.diffs[0][1]?d.diffs.shift():d.diffs[0][1]=d.diffs[0][1].substring(i.length))}g=this.diff_text2(h.diffs);g=g.substring(g.length-this.Patch_Margin);i=this.diff_text1(d.diffs).substring(0,this.Patch_Margin);""!==i&& +(h.length1+=i.length,h.length2+=i.length,0!==h.diffs.length&&0===h.diffs[h.diffs.length-1][0]?h.diffs[h.diffs.length-1][1]+=i:h.diffs.push([0,i]));j||a.splice(++c,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],c=0;c + + + + + + + + + + + + + + +

    If debugging errors, start with the first reported error, + subsequent tests often rely on earlier ones.

    + + + + diff --git a/javascript/diff_match_patch_test.js b/javascript/diff_match_patch_test.js new file mode 100644 index 0000000..9652197 --- /dev/null +++ b/javascript/diff_match_patch_test.js @@ -0,0 +1,937 @@ +/** + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// If expected and actual are the equivalent, pass the test. +function assertEquivalent(msg, expected, actual) { + if (typeof actual == 'undefined') { + // msg is optional. + actual = expected; + expected = msg; + msg = 'Expected: \'' + expected + '\' Actual: \'' + actual + '\''; + } + if (_equivalent(expected, actual)) { + assertEquals(msg, String.toString(expected), String.toString(actual)); + } else { + assertEquals(msg, expected, actual); + } +} + + +// Are a and b the equivalent? -- Recursive. +function _equivalent(a, b) { + if (a == b) { + return true; + } + if (typeof a == 'object' && typeof b == 'object' && a !== null && b !== null) { + if (a.toString() != b.toString()) { + return false; + } + for (var p in a) { + if (!_equivalent(a[p], b[p])) { + return false; + } + } + for (var p in b) { + if (!_equivalent(a[p], b[p])) { + return false; + } + } + return true; + } + return false; +} + + +function diff_rebuildtexts(diffs) { + // Construct the two texts which made up the diff originally. + var text1 = ''; + var text2 = ''; + for (var x = 0; x < diffs.length; x++) { + if (diffs[x][0] != DIFF_INSERT) { + text1 += diffs[x][1]; + } + if (diffs[x][0] != DIFF_DELETE) { + text2 += diffs[x][1]; + } + } + return [text1, text2]; +} + +var dmp = new diff_match_patch(); + + +// DIFF TEST FUNCTIONS + + +function testDiffCommonPrefix() { + // Detect any common prefix. + // Null case. + assertEquals(0, dmp.diff_commonPrefix('abc', 'xyz')); + + // Non-null case. + assertEquals(4, dmp.diff_commonPrefix('1234abcdef', '1234xyz')); + + // Whole case. + assertEquals(4, dmp.diff_commonPrefix('1234', '1234xyz')); +} + +function testDiffCommonSuffix() { + // Detect any common suffix. + // Null case. + assertEquals(0, dmp.diff_commonSuffix('abc', 'xyz')); + + // Non-null case. + assertEquals(4, dmp.diff_commonSuffix('abcdef1234', 'xyz1234')); + + // Whole case. + assertEquals(4, dmp.diff_commonSuffix('1234', 'xyz1234')); +} + +function testDiffCommonOverlap() { + // Detect any suffix/prefix overlap. + // Null case. + assertEquals(0, dmp.diff_commonOverlap_('', 'abcd')); + + // Whole case. + assertEquals(3, dmp.diff_commonOverlap_('abc', 'abcd')); + + // No overlap. + assertEquals(0, dmp.diff_commonOverlap_('123456', 'abcd')); + + // Overlap. + assertEquals(3, dmp.diff_commonOverlap_('123456xxx', 'xxxabcd')); + + // Unicode. + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals(0, dmp.diff_commonOverlap_('fi', '\ufb01i')); +} + +function testDiffHalfMatch() { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + // No match. + assertEquals(null, dmp.diff_halfMatch_('1234567890', 'abcdef')); + + assertEquals(null, dmp.diff_halfMatch_('12345', '23')); + + // Single Match. + assertEquivalent(['12', '90', 'a', 'z', '345678'], dmp.diff_halfMatch_('1234567890', 'a345678z')); + + assertEquivalent(['a', 'z', '12', '90', '345678'], dmp.diff_halfMatch_('a345678z', '1234567890')); + + assertEquivalent(['abc', 'z', '1234', '0', '56789'], dmp.diff_halfMatch_('abc56789z', '1234567890')); + + assertEquivalent(['a', 'xyz', '1', '7890', '23456'], dmp.diff_halfMatch_('a23456xyz', '1234567890')); + + // Multiple Matches. + assertEquivalent(['12123', '123121', 'a', 'z', '1234123451234'], dmp.diff_halfMatch_('121231234123451234123121', 'a1234123451234z')); + + assertEquivalent(['', '-=-=-=-=-=', 'x', '', 'x-=-=-=-=-=-=-='], dmp.diff_halfMatch_('x-=-=-=-=-=-=-=-=-=-=-=-=', 'xx-=-=-=-=-=-=-=')); + + assertEquivalent(['-=-=-=-=-=', '', '', 'y', '-=-=-=-=-=-=-=y'], dmp.diff_halfMatch_('-=-=-=-=-=-=-=-=-=-=-=-=y', '-=-=-=-=-=-=-=yy')); + + // Non-optimal halfmatch. + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquivalent(['qHillo', 'w', 'x', 'Hulloy', 'HelloHe'], dmp.diff_halfMatch_('qHilloHelloHew', 'xHelloHeHulloy')); + + // Optimal no halfmatch. + dmp.Diff_Timeout = 0; + assertEquals(null, dmp.diff_halfMatch_('qHilloHelloHew', 'xHelloHeHulloy')); +} + +function testDiffLinesToChars() { + function assertLinesToCharsResultEquals(a, b) { + assertEquals(a.chars1, b.chars1); + assertEquals(a.chars2, b.chars2); + assertEquivalent(a.lineArray, b.lineArray); + } + + // Convert lines down to characters. + assertLinesToCharsResultEquals({chars1: '\x01\x02\x01', chars2: '\x02\x01\x02', lineArray: ['', 'alpha\n', 'beta\n']}, dmp.diff_linesToChars_('alpha\nbeta\nalpha\n', 'beta\nalpha\nbeta\n')); + + assertLinesToCharsResultEquals({chars1: '', chars2: '\x01\x02\x03\x03', lineArray: ['', 'alpha\r\n', 'beta\r\n', '\r\n']}, dmp.diff_linesToChars_('', 'alpha\r\nbeta\r\n\r\n\r\n')); + + assertLinesToCharsResultEquals({chars1: '\x01', chars2: '\x02', lineArray: ['', 'a', 'b']}, dmp.diff_linesToChars_('a', 'b')); + + // More than 256 to reveal any 8-bit limitations. + var n = 300; + var lineList = []; + var charList = []; + for (var x = 1; x < n + 1; x++) { + lineList[x - 1] = x + '\n'; + charList[x - 1] = String.fromCharCode(x); + } + assertEquals(n, lineList.length); + var lines = lineList.join(''); + var chars = charList.join(''); + assertEquals(n, chars.length); + lineList.unshift(''); + assertLinesToCharsResultEquals({chars1: chars, chars2: '', lineArray: lineList}, dmp.diff_linesToChars_(lines, '')); +} + +function testDiffCharsToLines() { + // Convert chars up to lines. + var diffs = [[DIFF_EQUAL, '\x01\x02\x01'], [DIFF_INSERT, '\x02\x01\x02']]; + dmp.diff_charsToLines_(diffs, ['', 'alpha\n', 'beta\n']); + assertEquivalent([[DIFF_EQUAL, 'alpha\nbeta\nalpha\n'], [DIFF_INSERT, 'beta\nalpha\nbeta\n']], diffs); + + // More than 256 to reveal any 8-bit limitations. + var n = 300; + var lineList = []; + var charList = []; + for (var x = 1; x < n + 1; x++) { + lineList[x - 1] = x + '\n'; + charList[x - 1] = String.fromCharCode(x); + } + assertEquals(n, lineList.length); + var lines = lineList.join(''); + var chars = charList.join(''); + assertEquals(n, chars.length); + lineList.unshift(''); + var diffs = [[DIFF_DELETE, chars]]; + dmp.diff_charsToLines_(diffs, lineList); + assertEquivalent([[DIFF_DELETE, lines]], diffs); +} + +function testDiffCleanupMerge() { + // Cleanup a messy diff. + // Null case. + var diffs = []; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([], diffs); + + // No change case. + diffs = [[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'b'], [DIFF_INSERT, 'c']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'b'], [DIFF_INSERT, 'c']], diffs); + + // Merge equalities. + diffs = [[DIFF_EQUAL, 'a'], [DIFF_EQUAL, 'b'], [DIFF_EQUAL, 'c']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'abc']], diffs); + + // Merge deletions. + diffs = [[DIFF_DELETE, 'a'], [DIFF_DELETE, 'b'], [DIFF_DELETE, 'c']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_DELETE, 'abc']], diffs); + + // Merge insertions. + diffs = [[DIFF_INSERT, 'a'], [DIFF_INSERT, 'b'], [DIFF_INSERT, 'c']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_INSERT, 'abc']], diffs); + + // Merge interweave. + diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b'], [DIFF_DELETE, 'c'], [DIFF_INSERT, 'd'], [DIFF_EQUAL, 'e'], [DIFF_EQUAL, 'f']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_DELETE, 'ac'], [DIFF_INSERT, 'bd'], [DIFF_EQUAL, 'ef']], diffs); + + // Prefix and suffix detection. + diffs = [[DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'c']], diffs); + + // Prefix and suffix detection with equalities. + diffs = [[DIFF_EQUAL, 'x'], [DIFF_DELETE, 'a'], [DIFF_INSERT, 'abc'], [DIFF_DELETE, 'dc'], [DIFF_EQUAL, 'y']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'xa'], [DIFF_DELETE, 'd'], [DIFF_INSERT, 'b'], [DIFF_EQUAL, 'cy']], diffs); + + // Slide edit left. + diffs = [[DIFF_EQUAL, 'a'], [DIFF_INSERT, 'ba'], [DIFF_EQUAL, 'c']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'ac']], diffs); + + // Slide edit right. + diffs = [[DIFF_EQUAL, 'c'], [DIFF_INSERT, 'ab'], [DIFF_EQUAL, 'a']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'ca'], [DIFF_INSERT, 'ba']], diffs); + + // Slide edit left recursive. + diffs = [[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'b'], [DIFF_EQUAL, 'c'], [DIFF_DELETE, 'ac'], [DIFF_EQUAL, 'x']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_DELETE, 'abc'], [DIFF_EQUAL, 'acx']], diffs); + + // Slide edit right recursive. + diffs = [[DIFF_EQUAL, 'x'], [DIFF_DELETE, 'ca'], [DIFF_EQUAL, 'c'], [DIFF_DELETE, 'b'], [DIFF_EQUAL, 'a']]; + dmp.diff_cleanupMerge(diffs); + assertEquivalent([[DIFF_EQUAL, 'xca'], [DIFF_DELETE, 'cba']], diffs); +} + +function testDiffCleanupSemanticLossless() { + // Slide diffs to match logical boundaries. + // Null case. + var diffs = []; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([], diffs); + + // Blank lines. + diffs = [[DIFF_EQUAL, 'AAA\r\n\r\nBBB'], [DIFF_INSERT, '\r\nDDD\r\n\r\nBBB'], [DIFF_EQUAL, '\r\nEEE']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'AAA\r\n\r\n'], [DIFF_INSERT, 'BBB\r\nDDD\r\n\r\n'], [DIFF_EQUAL, 'BBB\r\nEEE']], diffs); + + // Line boundaries. + diffs = [[DIFF_EQUAL, 'AAA\r\nBBB'], [DIFF_INSERT, ' DDD\r\nBBB'], [DIFF_EQUAL, ' EEE']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'AAA\r\n'], [DIFF_INSERT, 'BBB DDD\r\n'], [DIFF_EQUAL, 'BBB EEE']], diffs); + + // Word boundaries. + diffs = [[DIFF_EQUAL, 'The c'], [DIFF_INSERT, 'ow and the c'], [DIFF_EQUAL, 'at.']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'The '], [DIFF_INSERT, 'cow and the '], [DIFF_EQUAL, 'cat.']], diffs); + + // Alphanumeric boundaries. + diffs = [[DIFF_EQUAL, 'The-c'], [DIFF_INSERT, 'ow-and-the-c'], [DIFF_EQUAL, 'at.']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'The-'], [DIFF_INSERT, 'cow-and-the-'], [DIFF_EQUAL, 'cat.']], diffs); + + // Hitting the start. + diffs = [[DIFF_EQUAL, 'a'], [DIFF_DELETE, 'a'], [DIFF_EQUAL, 'ax']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_EQUAL, 'aax']], diffs); + + // Hitting the end. + diffs = [[DIFF_EQUAL, 'xa'], [DIFF_DELETE, 'a'], [DIFF_EQUAL, 'a']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'xaa'], [DIFF_DELETE, 'a']], diffs); + + // Sentence boundaries. + diffs = [[DIFF_EQUAL, 'The xxx. The '], [DIFF_INSERT, 'zzz. The '], [DIFF_EQUAL, 'yyy.']]; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquivalent([[DIFF_EQUAL, 'The xxx.'], [DIFF_INSERT, ' The zzz.'], [DIFF_EQUAL, ' The yyy.']], diffs); +} + +function testDiffCleanupSemantic() { + // Cleanup semantically trivial equalities. + // Null case. + var diffs = []; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([], diffs); + + // No elimination #1. + diffs = [[DIFF_DELETE, 'ab'], [DIFF_INSERT, 'cd'], [DIFF_EQUAL, '12'], [DIFF_DELETE, 'e']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'ab'], [DIFF_INSERT, 'cd'], [DIFF_EQUAL, '12'], [DIFF_DELETE, 'e']], diffs); + + // No elimination #2. + diffs = [[DIFF_DELETE, 'abc'], [DIFF_INSERT, 'ABC'], [DIFF_EQUAL, '1234'], [DIFF_DELETE, 'wxyz']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abc'], [DIFF_INSERT, 'ABC'], [DIFF_EQUAL, '1234'], [DIFF_DELETE, 'wxyz']], diffs); + + // Simple elimination. + diffs = [[DIFF_DELETE, 'a'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, 'c']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abc'], [DIFF_INSERT, 'b']], diffs); + + // Backpass elimination. + diffs = [[DIFF_DELETE, 'ab'], [DIFF_EQUAL, 'cd'], [DIFF_DELETE, 'e'], [DIFF_EQUAL, 'f'], [DIFF_INSERT, 'g']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abcdef'], [DIFF_INSERT, 'cdfg']], diffs); + + // Multiple eliminations. + diffs = [[DIFF_INSERT, '1'], [DIFF_EQUAL, 'A'], [DIFF_DELETE, 'B'], [DIFF_INSERT, '2'], [DIFF_EQUAL, '_'], [DIFF_INSERT, '1'], [DIFF_EQUAL, 'A'], [DIFF_DELETE, 'B'], [DIFF_INSERT, '2']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'AB_AB'], [DIFF_INSERT, '1A2_1A2']], diffs); + + // Word boundaries. + diffs = [[DIFF_EQUAL, 'The c'], [DIFF_DELETE, 'ow and the c'], [DIFF_EQUAL, 'at.']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_EQUAL, 'The '], [DIFF_DELETE, 'cow and the '], [DIFF_EQUAL, 'cat.']], diffs); + + // No overlap elimination. + diffs = [[DIFF_DELETE, 'abcxx'], [DIFF_INSERT, 'xxdef']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abcxx'], [DIFF_INSERT, 'xxdef']], diffs); + + // Overlap elimination. + diffs = [[DIFF_DELETE, 'abcxxx'], [DIFF_INSERT, 'xxxdef']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abc'], [DIFF_EQUAL, 'xxx'], [DIFF_INSERT, 'def']], diffs); + + // Reverse overlap elimination. + diffs = [[DIFF_DELETE, 'xxxabc'], [DIFF_INSERT, 'defxxx']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_INSERT, 'def'], [DIFF_EQUAL, 'xxx'], [DIFF_DELETE, 'abc']], diffs); + + // Two overlap eliminations. + diffs = [[DIFF_DELETE, 'abcd1212'], [DIFF_INSERT, '1212efghi'], [DIFF_EQUAL, '----'], [DIFF_DELETE, 'A3'], [DIFF_INSERT, '3BC']]; + dmp.diff_cleanupSemantic(diffs); + assertEquivalent([[DIFF_DELETE, 'abcd'], [DIFF_EQUAL, '1212'], [DIFF_INSERT, 'efghi'], [DIFF_EQUAL, '----'], [DIFF_DELETE, 'A'], [DIFF_EQUAL, '3'], [DIFF_INSERT, 'BC']], diffs); +} + +function testDiffCleanupEfficiency() { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + // Null case. + var diffs = []; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([], diffs); + + // No elimination. + diffs = [[DIFF_DELETE, 'ab'], [DIFF_INSERT, '12'], [DIFF_EQUAL, 'wxyz'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '34']]; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([[DIFF_DELETE, 'ab'], [DIFF_INSERT, '12'], [DIFF_EQUAL, 'wxyz'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '34']], diffs); + + // Four-edit elimination. + diffs = [[DIFF_DELETE, 'ab'], [DIFF_INSERT, '12'], [DIFF_EQUAL, 'xyz'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '34']]; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([[DIFF_DELETE, 'abxyzcd'], [DIFF_INSERT, '12xyz34']], diffs); + + // Three-edit elimination. + diffs = [[DIFF_INSERT, '12'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '34']]; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([[DIFF_DELETE, 'xcd'], [DIFF_INSERT, '12x34']], diffs); + + // Backpass elimination. + diffs = [[DIFF_DELETE, 'ab'], [DIFF_INSERT, '12'], [DIFF_EQUAL, 'xy'], [DIFF_INSERT, '34'], [DIFF_EQUAL, 'z'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '56']]; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([[DIFF_DELETE, 'abxyzcd'], [DIFF_INSERT, '12xy34z56']], diffs); + + // High cost elimination. + dmp.Diff_EditCost = 5; + diffs = [[DIFF_DELETE, 'ab'], [DIFF_INSERT, '12'], [DIFF_EQUAL, 'wxyz'], [DIFF_DELETE, 'cd'], [DIFF_INSERT, '34']]; + dmp.diff_cleanupEfficiency(diffs); + assertEquivalent([[DIFF_DELETE, 'abwxyzcd'], [DIFF_INSERT, '12wxyz34']], diffs); + dmp.Diff_EditCost = 4; +} + +function testDiffPrettyHtml() { + // Pretty print. + var diffs = [[DIFF_EQUAL, 'a\n'], [DIFF_DELETE, 'b'], [DIFF_INSERT, 'c&d']]; + assertEquals('
    <B>b</B>c&d', dmp.diff_prettyHtml(diffs)); +} + +function testDiffText() { + // Compute the source and destination texts. + var diffs = [[DIFF_EQUAL, 'jump'], [DIFF_DELETE, 's'], [DIFF_INSERT, 'ed'], [DIFF_EQUAL, ' over '], [DIFF_DELETE, 'the'], [DIFF_INSERT, 'a'], [DIFF_EQUAL, ' lazy']]; + assertEquals('jumps over the lazy', dmp.diff_text1(diffs)); + + assertEquals('jumped over a lazy', dmp.diff_text2(diffs)); +} + +function testDiffDelta() { + // Convert a diff into delta string. + var diffs = [[DIFF_EQUAL, 'jump'], [DIFF_DELETE, 's'], [DIFF_INSERT, 'ed'], [DIFF_EQUAL, ' over '], [DIFF_DELETE, 'the'], [DIFF_INSERT, 'a'], [DIFF_EQUAL, ' lazy'], [DIFF_INSERT, 'old dog']]; + var text1 = dmp.diff_text1(diffs); + assertEquals('jumps over the lazy', text1); + + var delta = dmp.diff_toDelta(diffs); + assertEquals('=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog', delta); + + // Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 != 20). + try { + dmp.diff_fromDelta(text1 + 'x', delta); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } + + // Generates error (19 != 18). + try { + dmp.diff_fromDelta(text1.substring(1), delta); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } + + // Generates error (%c3%xy invalid Unicode). + try { + dmp.diff_fromDelta('', '+%c3%xy'); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } + + // Test deltas with special characters. + diffs = [[DIFF_EQUAL, '\u0680 \x00 \t %'], [DIFF_DELETE, '\u0681 \x01 \n ^'], [DIFF_INSERT, '\u0682 \x02 \\ |']]; + text1 = dmp.diff_text1(diffs); + assertEquals('\u0680 \x00 \t %\u0681 \x01 \n ^', text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals('=7\t-7\t+%DA%82 %02 %5C %7C', delta); + + // Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = [[DIFF_INSERT, 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ']]; + var text2 = dmp.diff_text2(diffs); + assertEquals('A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ', text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals('+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ', delta); + + // Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta('', delta)); +} + +function testDiffXIndex() { + // Translate a location in text1 to text2. + // Translation on equality. + assertEquals(5, dmp.diff_xIndex([[DIFF_DELETE, 'a'], [DIFF_INSERT, '1234'], [DIFF_EQUAL, 'xyz']], 2)); + + // Translation on deletion. + assertEquals(1, dmp.diff_xIndex([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '1234'], [DIFF_EQUAL, 'xyz']], 3)); +} + +function testDiffLevenshtein() { + // Levenshtein with trailing equality. + assertEquals(4, dmp.diff_levenshtein([[DIFF_DELETE, 'abc'], [DIFF_INSERT, '1234'], [DIFF_EQUAL, 'xyz']])); + // Levenshtein with leading equality. + assertEquals(4, dmp.diff_levenshtein([[DIFF_EQUAL, 'xyz'], [DIFF_DELETE, 'abc'], [DIFF_INSERT, '1234']])); + // Levenshtein with middle equality. + assertEquals(7, dmp.diff_levenshtein([[DIFF_DELETE, 'abc'], [DIFF_EQUAL, 'xyz'], [DIFF_INSERT, '1234']])); +} + +function testDiffBisect() { + // Normal. + var a = 'cat'; + var b = 'map'; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + assertEquivalent([[DIFF_DELETE, 'c'], [DIFF_INSERT, 'm'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 't'], [DIFF_INSERT, 'p']], dmp.diff_bisect_(a, b, Number.MAX_VALUE)); + + // Timeout. + assertEquivalent([[DIFF_DELETE, 'cat'], [DIFF_INSERT, 'map']], dmp.diff_bisect_(a, b, 0)); +} + +function testDiffMain() { + // Perform a trivial diff. + // Null case. + assertEquivalent([], dmp.diff_main('', '', false)); + + // Equality. + assertEquivalent([[DIFF_EQUAL, 'abc']], dmp.diff_main('abc', 'abc', false)); + + // Simple insertion. + assertEquivalent([[DIFF_EQUAL, 'ab'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'ab123c', false)); + + // Simple deletion. + assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'bc']], dmp.diff_main('a123bc', 'abc', false)); + + // Two insertions. + assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_INSERT, '123'], [DIFF_EQUAL, 'b'], [DIFF_INSERT, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('abc', 'a123b456c', false)); + + // Two deletions. + assertEquivalent([[DIFF_EQUAL, 'a'], [DIFF_DELETE, '123'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '456'], [DIFF_EQUAL, 'c']], dmp.diff_main('a123b456c', 'abc', false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + // Simple cases. + assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, 'b']], dmp.diff_main('a', 'b', false)); + + assertEquivalent([[DIFF_DELETE, 'Apple'], [DIFF_INSERT, 'Banana'], [DIFF_EQUAL, 's are a'], [DIFF_INSERT, 'lso'], [DIFF_EQUAL, ' fruit.']], dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false)); + + assertEquivalent([[DIFF_DELETE, 'a'], [DIFF_INSERT, '\u0680'], [DIFF_EQUAL, 'x'], [DIFF_DELETE, '\t'], [DIFF_INSERT, '\0']], dmp.diff_main('ax\t', '\u0680x\0', false)); + + // Overlaps. + assertEquivalent([[DIFF_DELETE, '1'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, 'y'], [DIFF_EQUAL, 'b'], [DIFF_DELETE, '2'], [DIFF_INSERT, 'xab']], dmp.diff_main('1ayb2', 'abxab', false)); + + assertEquivalent([[DIFF_INSERT, 'xaxcx'], [DIFF_EQUAL, 'abc'], [DIFF_DELETE, 'y']], dmp.diff_main('abcy', 'xaxcxabc', false)); + + assertEquivalent([[DIFF_DELETE, 'ABCD'], [DIFF_EQUAL, 'a'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'bcd'], [DIFF_DELETE, '='], [DIFF_INSERT, '-'], [DIFF_EQUAL, 'efghijklmnopqrs'], [DIFF_DELETE, 'EFGHIJKLMNOefg']], dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', 'a-bcd-efghijklmnopqrs', false)); + + // Large equality. + assertEquivalent([[DIFF_INSERT, ' '], [DIFF_EQUAL, 'a'], [DIFF_INSERT, 'nd'], [DIFF_EQUAL, ' [[Pennsylvania]]'], [DIFF_DELETE, ' and [[New']], dmp.diff_main('a [[Pennsylvania]] and [[New', ' and [[Pennsylvania]]', false)); + + // Timeout. + dmp.Diff_Timeout = 0.1; // 100ms + var a = '`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n'; + var b = 'I am the very model of a modern major general,\nI\'ve information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n'; + // Increase the text lengths by 1024 times to ensure a timeout. + for (var x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + var startTime = (new Date()).getTime(); + dmp.diff_main(a, b); + var endTime = (new Date()).getTime(); + // Test that we took at least the timeout period. + assertTrue(dmp.Diff_Timeout * 1000 <= endTime - startTime); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // **** + // TODO(fraser): For unknown reasons this is taking 500 ms on Google's + // internal test system. Whereas browsers take 140 ms. + //assertTrue(dmp.Diff_Timeout * 1000 * 2 > endTime - startTime); + // **** + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + // Simple line-mode. + a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; + b = 'abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n'; + assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)); + + // Single line-mode. + a = '1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890'; + b = 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij'; + assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)); + + // Overlap line-mode. + a = '1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n'; + b = 'abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n'; + var texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + var texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertEquivalent(texts_textmode, texts_linemode); + + // Test null inputs. + try { + dmp.diff_main(null, null); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } +} + + +// MATCH TEST FUNCTIONS + + +function testMatchAlphabet() { + // Initialise the bitmasks for Bitap. + // Unique. + assertEquivalent({'a':4, 'b':2, 'c':1}, dmp.match_alphabet_('abc')); + + // Duplicates. + assertEquivalent({'a':37, 'b':18, 'c':8}, dmp.match_alphabet_('abcaba')); +} + +function testMatchBitap() { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5; + // Exact matches. + assertEquals(5, dmp.match_bitap_('abcdefghijk', 'fgh', 5)); + + assertEquals(5, dmp.match_bitap_('abcdefghijk', 'fgh', 0)); + + // Fuzzy matches. + assertEquals(4, dmp.match_bitap_('abcdefghijk', 'efxhi', 0)); + + assertEquals(2, dmp.match_bitap_('abcdefghijk', 'cdefxyhijk', 5)); + + assertEquals(-1, dmp.match_bitap_('abcdefghijk', 'bxy', 1)); + + // Overflow. + assertEquals(2, dmp.match_bitap_('123456789xx0', '3456789x0', 2)); + + // Threshold test. + dmp.Match_Threshold = 0.4; + assertEquals(4, dmp.match_bitap_('abcdefghijk', 'efxyhi', 1)); + + dmp.Match_Threshold = 0.3; + assertEquals(-1, dmp.match_bitap_('abcdefghijk', 'efxyhi', 1)); + + dmp.Match_Threshold = 0.0; + assertEquals(1, dmp.match_bitap_('abcdefghijk', 'bcdef', 1)); + dmp.Match_Threshold = 0.5; + + // Multiple select. + assertEquals(0, dmp.match_bitap_('abcdexyzabcde', 'abccde', 3)); + + assertEquals(8, dmp.match_bitap_('abcdexyzabcde', 'abccde', 5)); + + // Distance test. + dmp.Match_Distance = 10; // Strict location. + assertEquals(-1, dmp.match_bitap_('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24)); + + assertEquals(0, dmp.match_bitap_('abcdefghijklmnopqrstuvwxyz', 'abcdxxefg', 1)); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals(0, dmp.match_bitap_('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 24)); +} + +function testMatchMain() { + // Full match. + // Shortcut matches. + assertEquals(0, dmp.match_main('abcdef', 'abcdef', 1000)); + + assertEquals(-1, dmp.match_main('', 'abcdef', 1)); + + assertEquals(3, dmp.match_main('abcdef', '', 3)); + + assertEquals(3, dmp.match_main('abcdef', 'de', 3)); + + // Beyond end match. + assertEquals(3, dmp.match_main("abcdef", "defy", 4)); + + // Oversized pattern. + assertEquals(0, dmp.match_main("abcdef", "abcdefy", 0)); + + // Complex match. + assertEquals(4, dmp.match_main('I am the very model of a modern major general.', ' that berry ', 5)); + + // Test null inputs. + try { + dmp.match_main(null, null, 0); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } +} + + +// PATCH TEST FUNCTIONS + + +function testPatchObj() { + // Patch Object. + var p = new diff_match_patch.patch_obj(); + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = [[DIFF_EQUAL, 'jump'], [DIFF_DELETE, 's'], [DIFF_INSERT, 'ed'], [DIFF_EQUAL, ' over '], [DIFF_DELETE, 'the'], [DIFF_INSERT, 'a'], [DIFF_EQUAL, '\nlaz']]; + var strp = p.toString(); + assertEquals('@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n', strp); +} + +function testPatchFromText() { + assertEquivalent([], dmp.patch_fromText(strp)); + + var strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n'; + assertEquals(strp, dmp.patch_fromText(strp)[0].toString()); + + assertEquals('@@ -1 +1 @@\n-a\n+b\n', dmp.patch_fromText('@@ -1 +1 @@\n-a\n+b\n')[0].toString()); + + assertEquals('@@ -1,3 +0,0 @@\n-abc\n', dmp.patch_fromText('@@ -1,3 +0,0 @@\n-abc\n')[0].toString()); + + assertEquals('@@ -0,0 +1,3 @@\n+abc\n', dmp.patch_fromText('@@ -0,0 +1,3 @@\n+abc\n')[0].toString()); + + // Generates error. + try { + dmp.patch_fromText('Bad\nPatch\n'); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } +} + +function testPatchToText() { + var strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n'; + var p = dmp.patch_fromText(strp); + assertEquals(strp, dmp.patch_toText(p)); + + strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n'; + p = dmp.patch_fromText(strp); + assertEquals(strp, dmp.patch_toText(p)); +} + +function testPatchAddContext() { + dmp.Patch_Margin = 4; + var p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[0]; + dmp.patch_addContext_(p, 'The quick brown fox jumps over the lazy dog.'); + assertEquals('@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n', p.toString()); + + // Same, but not enough trailing context. + p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[0]; + dmp.patch_addContext_(p, 'The quick brown fox jumps.'); + assertEquals('@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n', p.toString()); + + // Same, but not enough leading context. + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[0]; + dmp.patch_addContext_(p, 'The quick brown fox jumps.'); + assertEquals('@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n', p.toString()); + + // Same, but with ambiguity. + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[0]; + dmp.patch_addContext_(p, 'The quick brown fox jumps. The quick brown fox crashes.'); + assertEquals('@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n', p.toString()); +} + +function testPatchMake() { + // Null case. + var patches = dmp.patch_make('', ''); + assertEquals('', dmp.patch_toText(patches)); + + var text1 = 'The quick brown fox jumps over the lazy dog.'; + var text2 = 'That quick brown fox jumped over a lazy dog.'; + // Text2+Text1 inputs. + var expectedPatch = '@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n'; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Text1+Text2 inputs. + expectedPatch = '@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n'; + patches = dmp.patch_make(text1, text2); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Diff input. + var diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Text1+Diff inputs. + patches = dmp.patch_make(text1, diffs); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Text1+Text2+Diff inputs (deprecated). + patches = dmp.patch_make(text1, text2, diffs); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Character encoding. + patches = dmp.patch_make('`1234567890-=[]\\;\',./', '~!@#$%^&*()_+{}|:"<>?'); + assertEquals('@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n', dmp.patch_toText(patches)); + + // Character decoding. + diffs = [[DIFF_DELETE, '`1234567890-=[]\\;\',./'], [DIFF_INSERT, '~!@#$%^&*()_+{}|:"<>?']]; + assertEquivalent(diffs, dmp.patch_fromText('@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;\',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n')[0].diffs); + + // Long string with repeats. + text1 = ''; + for (var x = 0; x < 100; x++) { + text1 += 'abcdef'; + } + text2 = text1 + '123'; + expectedPatch = '@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n'; + patches = dmp.patch_make(text1, text2); + assertEquals(expectedPatch, dmp.patch_toText(patches)); + + // Test null inputs. + try { + dmp.patch_make(null); + assertEquals(Error, null); + } catch (e) { + // Exception expected. + } +} + +function testPatchSplitMax() { + // Assumes that dmp.Match_MaxBits is 32. + var patches = dmp.patch_make('abcdefghijklmnopqrstuvwxyz01234567890', 'XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0'); + dmp.patch_splitMax(patches); + assertEquals('@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n', dmp.patch_toText(patches)); + + patches = dmp.patch_make('abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz', 'abcdefuvwxyz'); + var oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals(oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make('1234567890123456789012345678901234567890123456789012345678901234567890', 'abc'); + dmp.patch_splitMax(patches); + assertEquals('@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n', dmp.patch_toText(patches)); + + patches = dmp.patch_make('abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1', 'abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1'); + dmp.patch_splitMax(patches); + assertEquals('@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n', dmp.patch_toText(patches)); +} + +function testPatchAddPadding() { + // Both edges full. + var patches = dmp.patch_make('', 'test'); + assertEquals('@@ -0,0 +1,4 @@\n+test\n', dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals('@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n', dmp.patch_toText(patches)); + + // Both edges partial. + patches = dmp.patch_make('XY', 'XtestY'); + assertEquals('@@ -1,2 +1,6 @@\n X\n+test\n Y\n', dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals('@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n', dmp.patch_toText(patches)); + + // Both edges none. + patches = dmp.patch_make('XXXXYYYY', 'XXXXtestYYYY'); + assertEquals('@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals('@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches)); +} + +function testPatchApply() { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5; + dmp.Patch_DeleteThreshold = 0.5; + // Null case. + var patches = dmp.patch_make('', ''); + var results = dmp.patch_apply(patches, 'Hello world.'); + assertEquivalent(['Hello world.', []], results); + + // Exact match. + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', 'That quick brown fox jumped over a lazy dog.'); + results = dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.'); + assertEquivalent(['That quick brown fox jumped over a lazy dog.', [true, true]], results); + + // Partial match. + results = dmp.patch_apply(patches, 'The quick red rabbit jumps over the tired tiger.'); + assertEquivalent(['That quick red rabbit jumped over a tired tiger.', [true, true]], results); + + // Failed match. + results = dmp.patch_apply(patches, 'I am the very model of a modern major general.'); + assertEquivalent(['I am the very model of a modern major general.', [false, false]], results); + + // Big delete, small change. + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y'); + assertEquivalent(['xabcy', [true, true]], results); + + // Big delete, big change 1. + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x12345678901234567890---------------++++++++++---------------12345678901234567890y'); + assertEquivalent(['xabc12345678901234567890---------------++++++++++---------------12345678901234567890y', [false, true]], results); + + // Big delete, big change 2. + dmp.Patch_DeleteThreshold = 0.6; + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789012345678901234567890y', 'xabcy'); + results = dmp.patch_apply(patches, 'x12345678901234567890---------------++++++++++---------------12345678901234567890y'); + assertEquivalent(['xabcy', [true, true]], results); + dmp.Patch_DeleteThreshold = 0.5; + + // Compensate for failed patch. + dmp.Match_Threshold = 0.0; + dmp.Match_Distance = 0; + patches = dmp.patch_make('abcdefghijklmnopqrstuvwxyz--------------------1234567890', 'abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890'); + results = dmp.patch_apply(patches, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890'); + assertEquivalent(['ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890', [false, true]], results); + dmp.Match_Threshold = 0.5; + dmp.Match_Distance = 1000; + + // No side effects. + patches = dmp.patch_make('', 'test'); + var patchstr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ''); + assertEquals(patchstr, dmp.patch_toText(patches)); + + // No side effects with major delete. + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', 'Woof'); + patchstr = dmp.patch_toText(patches); + dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.'); + assertEquals(patchstr, dmp.patch_toText(patches)); + + // Edge exact match. + patches = dmp.patch_make('', 'test'); + results = dmp.patch_apply(patches, ''); + assertEquivalent(['test', [true]], results); + + // Near edge exact match. + patches = dmp.patch_make('XY', 'XtestY'); + results = dmp.patch_apply(patches, 'XY'); + assertEquivalent(['XtestY', [true]], results); + + // Edge partial match. + patches = dmp.patch_make('y', 'y123'); + results = dmp.patch_apply(patches, 'x'); + assertEquivalent(['x123', [true]], results); +} diff --git a/javascript/diff_match_patch_uncompressed.js b/javascript/diff_match_patch_uncompressed.js new file mode 100644 index 0000000..d56f2e7 --- /dev/null +++ b/javascript/diff_match_patch_uncompressed.js @@ -0,0 +1,2192 @@ +/** + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @fileoverview Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * @author fraser@google.com (Neil Fraser) + */ + +/** + * Class containing the diff, match and patch methods. + * @constructor + */ +function diff_match_patch() { + + // Defaults. + // Redefine these in your program to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + this.Diff_Timeout = 1.0; + // Cost of an empty edit operation in terms of edit characters. + this.Diff_EditCost = 4; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + this.Match_Threshold = 0.5; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + this.Match_Distance = 1000; + // When deleting a large block of text (over ~64 characters), how close do + // the contents have to be to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + this.Patch_DeleteThreshold = 0.5; + // Chunk size for context length. + this.Patch_Margin = 4; + + // The number of bits in an int. + this.Match_MaxBits = 32; +} + + +// DIFF FUNCTIONS + + +/** + * The data structure representing a diff is an array of tuples: + * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']] + * which means: delete 'Hello', add 'Goodbye' and keep ' world.' + */ +var DIFF_DELETE = -1; +var DIFF_INSERT = 1; +var DIFF_EQUAL = 0; + +/** @typedef {{0: number, 1: string}} */ +diff_match_patch.Diff; + + +/** + * Find the differences between two texts. Simplifies the problem by stripping + * any common prefix or suffix off the texts before diffing. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {boolean=} opt_checklines Optional speedup flag. If present and false, + * then don't run a line-level diff first to identify the changed areas. + * Defaults to true, which does a faster, slightly less optimal diff. + * @param {number} opt_deadline Optional time when the diff should be complete + * by. Used internally for recursive calls. Users should set DiffTimeout + * instead. + * @return {!Array.} Array of diff tuples. + */ +diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines, + opt_deadline) { + // Set a deadline by which time the diff must be complete. + if (typeof opt_deadline == 'undefined') { + if (this.Diff_Timeout <= 0) { + opt_deadline = Number.MAX_VALUE; + } else { + opt_deadline = (new Date).getTime() + this.Diff_Timeout * 1000; + } + } + var deadline = opt_deadline; + + // Check for null inputs. + if (text1 == null || text2 == null) { + throw new Error('Null input. (diff_main)'); + } + + // Check for equality (speedup). + if (text1 == text2) { + if (text1) { + return [[DIFF_EQUAL, text1]]; + } + return []; + } + + if (typeof opt_checklines == 'undefined') { + opt_checklines = true; + } + var checklines = opt_checklines; + + // Trim off common prefix (speedup). + var commonlength = this.diff_commonPrefix(text1, text2); + var commonprefix = text1.substring(0, commonlength); + text1 = text1.substring(commonlength); + text2 = text2.substring(commonlength); + + // Trim off common suffix (speedup). + commonlength = this.diff_commonSuffix(text1, text2); + var commonsuffix = text1.substring(text1.length - commonlength); + text1 = text1.substring(0, text1.length - commonlength); + text2 = text2.substring(0, text2.length - commonlength); + + // Compute the diff on the middle block. + var diffs = this.diff_compute_(text1, text2, checklines, deadline); + + // Restore the prefix and suffix. + if (commonprefix) { + diffs.unshift([DIFF_EQUAL, commonprefix]); + } + if (commonsuffix) { + diffs.push([DIFF_EQUAL, commonsuffix]); + } + this.diff_cleanupMerge(diffs); + return diffs; +}; + + +/** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {boolean} checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster, slightly less optimal diff. + * @param {number} deadline Time when the diff should be complete by. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines, + deadline) { + var diffs; + + if (!text1) { + // Just add some text (speedup). + return [[DIFF_INSERT, text2]]; + } + + if (!text2) { + // Just delete some text (speedup). + return [[DIFF_DELETE, text1]]; + } + + var longtext = text1.length > text2.length ? text1 : text2; + var shorttext = text1.length > text2.length ? text2 : text1; + var i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + diffs = [[DIFF_INSERT, longtext.substring(0, i)], + [DIFF_EQUAL, shorttext], + [DIFF_INSERT, longtext.substring(i + shorttext.length)]]; + // Swap insertions for deletions if diff is reversed. + if (text1.length > text2.length) { + diffs[0][0] = diffs[2][0] = DIFF_DELETE; + } + return diffs; + } + + if (shorttext.length == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; + } + + // Check to see if the problem can be split in two. + var hm = this.diff_halfMatch_(text1, text2); + if (hm) { + // A half-match was found, sort out the return data. + var text1_a = hm[0]; + var text1_b = hm[1]; + var text2_a = hm[2]; + var text2_b = hm[3]; + var mid_common = hm[4]; + // Send both pairs off for separate processing. + var diffs_a = this.diff_main(text1_a, text2_a, checklines, deadline); + var diffs_b = this.diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + return diffs_a.concat([[DIFF_EQUAL, mid_common]], diffs_b); + } + + if (checklines && text1.length > 100 && text2.length > 100) { + return this.diff_lineMode_(text1, text2, deadline); + } + + return this.diff_bisect_(text1, text2, deadline); +}; + + +/** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} deadline Time when the diff should be complete by. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_lineMode_ = function(text1, text2, deadline) { + // Scan the text on a line-by-line basis first. + var a = this.diff_linesToChars_(text1, text2); + text1 = a.chars1; + text2 = a.chars2; + var linearray = a.lineArray; + + var diffs = this.diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + this.diff_charsToLines_(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + this.diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.push([DIFF_EQUAL, '']); + var pointer = 0; + var count_delete = 0; + var count_insert = 0; + var text_delete = ''; + var text_insert = ''; + while (pointer < diffs.length) { + switch (diffs[pointer][0]) { + case DIFF_INSERT: + count_insert++; + text_insert += diffs[pointer][1]; + break; + case DIFF_DELETE: + count_delete++; + text_delete += diffs[pointer][1]; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + diffs.splice(pointer - count_delete - count_insert, + count_delete + count_insert); + pointer = pointer - count_delete - count_insert; + var a = this.diff_main(text_delete, text_insert, false, deadline); + for (var j = a.length - 1; j >= 0; j--) { + diffs.splice(pointer, 0, a[j]); + } + pointer = pointer + a.length; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + pointer++; + } + diffs.pop(); // Remove the dummy entry at the end. + + return diffs; +}; + + +/** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) { + // Cache the text lengths to prevent multiple calls. + var text1_length = text1.length; + var text2_length = text2.length; + var max_d = Math.ceil((text1_length + text2_length) / 2); + var v_offset = max_d; + var v_length = 2 * max_d; + var v1 = new Array(v_length); + var v2 = new Array(v_length); + // Setting all elements to -1 is faster in Chrome & Firefox than mixing + // integers and undefined. + for (var x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + var delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will collide + // with the reverse path. + var front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + var k1start = 0; + var k1end = 0; + var k2start = 0; + var k2end = 0; + for (var d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if ((new Date()).getTime() > deadline) { + break; + } + + // Walk the front path one step. + for (var k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + var k1_offset = v_offset + k1; + var x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + var y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length && + text1.charAt(x1) == text2.charAt(y1)) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + var k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + var x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return this.diff_bisectSplit_(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (var k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + var k2_offset = v_offset + k2; + var x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + var y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length && + text1.charAt(text1_length - x2 - 1) == + text2.charAt(text2_length - y2 - 1)) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + var k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + var x1 = v1[k1_offset]; + var y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + return this.diff_bisectSplit_(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + return [[DIFF_DELETE, text1], [DIFF_INSERT, text2]]; +}; + + +/** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} x Index of split point in text1. + * @param {number} y Index of split point in text2. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {!Array.} Array of diff tuples. + * @private + */ +diff_match_patch.prototype.diff_bisectSplit_ = function(text1, text2, x, y, + deadline) { + var text1a = text1.substring(0, x); + var text2a = text2.substring(0, y); + var text1b = text1.substring(x); + var text2b = text2.substring(y); + + // Compute both diffs serially. + var diffs = this.diff_main(text1a, text2a, false, deadline); + var diffsb = this.diff_main(text1b, text2b, false, deadline); + + return diffs.concat(diffsb); +}; + + +/** + * Split two texts into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {{chars1: string, chars2: string, lineArray: !Array.}} + * An object containing the encoded text1, the encoded text2 and + * the array of unique strings. + * The zeroth element of the array of unique strings is intentionally blank. + * @private + */ +diff_match_patch.prototype.diff_linesToChars_ = function(text1, text2) { + var lineArray = []; // e.g. lineArray[4] == 'Hello\n' + var lineHash = {}; // e.g. lineHash['Hello\n'] == 4 + + // '\x00' is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray[0] = ''; + + /** + * Split a text into an array of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * Modifies linearray and linehash through being a closure. + * @param {string} text String to encode. + * @return {string} Encoded string. + * @private + */ + function diff_linesToCharsMunge_(text) { + var chars = ''; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + var lineStart = 0; + var lineEnd = -1; + // Keeping our own length variable is faster than looking it up. + var lineArrayLength = lineArray.length; + while (lineEnd < text.length - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length - 1; + } + var line = text.substring(lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (lineHash.hasOwnProperty ? lineHash.hasOwnProperty(line) : + (lineHash[line] !== undefined)) { + chars += String.fromCharCode(lineHash[line]); + } else { + chars += String.fromCharCode(lineArrayLength); + lineHash[line] = lineArrayLength; + lineArray[lineArrayLength++] = line; + } + } + return chars; + } + + var chars1 = diff_linesToCharsMunge_(text1); + var chars2 = diff_linesToCharsMunge_(text2); + return {chars1: chars1, chars2: chars2, lineArray: lineArray}; +}; + + +/** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param {!Array.} diffs Array of diff tuples. + * @param {!Array.} lineArray Array of unique strings. + * @private + */ +diff_match_patch.prototype.diff_charsToLines_ = function(diffs, lineArray) { + for (var x = 0; x < diffs.length; x++) { + var chars = diffs[x][1]; + var text = []; + for (var y = 0; y < chars.length; y++) { + text[y] = lineArray[chars.charCodeAt(y)]; + } + diffs[x][1] = text.join(''); + } +}; + + +/** + * Determine the common prefix of two strings. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the start of each + * string. + */ +diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) { + // Quick check for common null cases. + if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) { + return 0; + } + // Binary search. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + var pointermin = 0; + var pointermax = Math.min(text1.length, text2.length); + var pointermid = pointermax; + var pointerstart = 0; + while (pointermin < pointermid) { + if (text1.substring(pointerstart, pointermid) == + text2.substring(pointerstart, pointermid)) { + pointermin = pointermid; + pointerstart = pointermin; + } else { + pointermax = pointermid; + } + pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); + } + return pointermid; +}; + + +/** + * Determine the common suffix of two strings. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the end of each string. + */ +diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) { + // Quick check for common null cases. + if (!text1 || !text2 || + text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1)) { + return 0; + } + // Binary search. + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + var pointermin = 0; + var pointermax = Math.min(text1.length, text2.length); + var pointermid = pointermax; + var pointerend = 0; + while (pointermin < pointermid) { + if (text1.substring(text1.length - pointermid, text1.length - pointerend) == + text2.substring(text2.length - pointermid, text2.length - pointerend)) { + pointermin = pointermid; + pointerend = pointermin; + } else { + pointermax = pointermid; + } + pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); + } + return pointermid; +}; + + +/** + * Determine if the suffix of one string is the prefix of another. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {number} The number of characters common to the end of the first + * string and the start of the second string. + * @private + */ +diff_match_patch.prototype.diff_commonOverlap_ = function(text1, text2) { + // Cache the text lengths to prevent multiple calls. + var text1_length = text1.length; + var text2_length = text2.length; + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + if (text1_length > text2_length) { + text1 = text1.substring(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2 = text2.substring(0, text1_length); + } + var text_length = Math.min(text1_length, text2_length); + // Quick check for the worst case. + if (text1 == text2) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + var best = 0; + var length = 1; + while (true) { + var pattern = text1.substring(text_length - length); + var found = text2.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1.substring(text_length - length) == + text2.substring(0, length)) { + best = length; + length++; + } + } +}; + + +/** + * Do the two texts share a substring which is at least half the length of the + * longer text? + * This speedup can produce non-minimal diffs. + * @param {string} text1 First string. + * @param {string} text2 Second string. + * @return {Array.} Five element Array, containing the prefix of + * text1, the suffix of text1, the prefix of text2, the suffix of + * text2 and the common middle. Or null if there was no match. + * @private + */ +diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) { + if (this.Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return null; + } + var longtext = text1.length > text2.length ? text1 : text2; + var shorttext = text1.length > text2.length ? text2 : text1; + if (longtext.length < 4 || shorttext.length * 2 < longtext.length) { + return null; // Pointless. + } + var dmp = this; // 'this' becomes 'window' in a closure. + + /** + * Does a substring of shorttext exist within longtext such that the substring + * is at least half the length of longtext? + * Closure, but does not reference any external variables. + * @param {string} longtext Longer string. + * @param {string} shorttext Shorter string. + * @param {number} i Start index of quarter length substring within longtext. + * @return {Array.} Five element Array, containing the prefix of + * longtext, the suffix of longtext, the prefix of shorttext, the suffix + * of shorttext and the common middle. Or null if there was no match. + * @private + */ + function diff_halfMatchI_(longtext, shorttext, i) { + // Start with a 1/4 length substring at position i as a seed. + var seed = longtext.substring(i, i + Math.floor(longtext.length / 4)); + var j = -1; + var best_common = ''; + var best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + var prefixLength = dmp.diff_commonPrefix(longtext.substring(i), + shorttext.substring(j)); + var suffixLength = dmp.diff_commonSuffix(longtext.substring(0, i), + shorttext.substring(0, j)); + if (best_common.length < suffixLength + prefixLength) { + best_common = shorttext.substring(j - suffixLength, j) + + shorttext.substring(j, j + prefixLength); + best_longtext_a = longtext.substring(0, i - suffixLength); + best_longtext_b = longtext.substring(i + prefixLength); + best_shorttext_a = shorttext.substring(0, j - suffixLength); + best_shorttext_b = shorttext.substring(j + prefixLength); + } + } + if (best_common.length * 2 >= longtext.length) { + return [best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common]; + } else { + return null; + } + } + + // First check if the second quarter is the seed for a half-match. + var hm1 = diff_halfMatchI_(longtext, shorttext, + Math.ceil(longtext.length / 4)); + // Check again based on the third quarter. + var hm2 = diff_halfMatchI_(longtext, shorttext, + Math.ceil(longtext.length / 2)); + var hm; + if (!hm1 && !hm2) { + return null; + } else if (!hm2) { + hm = hm1; + } else if (!hm1) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length > hm2[4].length ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + var text1_a, text1_b, text2_a, text2_b; + if (text1.length > text2.length) { + text1_a = hm[0]; + text1_b = hm[1]; + text2_a = hm[2]; + text2_b = hm[3]; + } else { + text2_a = hm[0]; + text2_b = hm[1]; + text1_a = hm[2]; + text1_b = hm[3]; + } + var mid_common = hm[4]; + return [text1_a, text1_b, text2_a, text2_b, mid_common]; +}; + + +/** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupSemantic = function(diffs) { + var changes = false; + var equalities = []; // Stack of indices where equalities are found. + var equalitiesLength = 0; // Keeping our own length var is faster in JS. + /** @type {?string} */ + var lastequality = null; + // Always equal to diffs[equalities[equalitiesLength - 1]][1] + var pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + var length_insertions1 = 0; + var length_deletions1 = 0; + // Number of characters that changed after the equality. + var length_insertions2 = 0; + var length_deletions2 = 0; + while (pointer < diffs.length) { + if (diffs[pointer][0] == DIFF_EQUAL) { // Equality found. + equalities[equalitiesLength++] = pointer; + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = diffs[pointer][1]; + } else { // An insertion or deletion. + if (diffs[pointer][0] == DIFF_INSERT) { + length_insertions2 += diffs[pointer][1].length; + } else { + length_deletions2 += diffs[pointer][1].length; + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality && (lastequality.length <= + Math.max(length_insertions1, length_deletions1)) && + (lastequality.length <= Math.max(length_insertions2, + length_deletions2))) { + // Duplicate record. + diffs.splice(equalities[equalitiesLength - 1], 0, + [DIFF_DELETE, lastequality]); + // Change second copy to insert. + diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; + // Throw away the equality we just deleted. + equalitiesLength--; + // Throw away the previous equality (it needs to be reevaluated). + equalitiesLength--; + pointer = equalitiesLength > 0 ? equalities[equalitiesLength - 1] : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = null; + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + this.diff_cleanupMerge(diffs); + } + this.diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.length) { + if (diffs[pointer - 1][0] == DIFF_DELETE && + diffs[pointer][0] == DIFF_INSERT) { + var deletion = diffs[pointer - 1][1]; + var insertion = diffs[pointer][1]; + var overlap_length1 = this.diff_commonOverlap_(deletion, insertion); + var overlap_length2 = this.diff_commonOverlap_(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length / 2 || + overlap_length1 >= insertion.length / 2) { + // Overlap found. Insert an equality and trim the surrounding edits. + diffs.splice(pointer, 0, + [DIFF_EQUAL, insertion.substring(0, overlap_length1)]); + diffs[pointer - 1][1] = + deletion.substring(0, deletion.length - overlap_length1); + diffs[pointer + 1][1] = insertion.substring(overlap_length1); + pointer++; + } + } else { + if (overlap_length2 >= deletion.length / 2 || + overlap_length2 >= insertion.length / 2) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.splice(pointer, 0, + [DIFF_EQUAL, deletion.substring(0, overlap_length2)]); + diffs[pointer - 1][0] = DIFF_INSERT; + diffs[pointer - 1][1] = + insertion.substring(0, insertion.length - overlap_length2); + diffs[pointer + 1][0] = DIFF_DELETE; + diffs[pointer + 1][1] = + deletion.substring(overlap_length2); + pointer++; + } + } + pointer++; + } + pointer++; + } +}; + + +/** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupSemanticLossless = function(diffs) { + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * Closure, but does not reference any external variables. + * @param {string} one First string. + * @param {string} two Second string. + * @return {number} The score. + * @private + */ + function diff_cleanupSemanticScore_(one, two) { + if (!one || !two) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + var char1 = one.charAt(one.length - 1); + var char2 = two.charAt(0); + var nonAlphaNumeric1 = char1.match(diff_match_patch.nonAlphaNumericRegex_); + var nonAlphaNumeric2 = char2.match(diff_match_patch.nonAlphaNumericRegex_); + var whitespace1 = nonAlphaNumeric1 && + char1.match(diff_match_patch.whitespaceRegex_); + var whitespace2 = nonAlphaNumeric2 && + char2.match(diff_match_patch.whitespaceRegex_); + var lineBreak1 = whitespace1 && + char1.match(diff_match_patch.linebreakRegex_); + var lineBreak2 = whitespace2 && + char2.match(diff_match_patch.linebreakRegex_); + var blankLine1 = lineBreak1 && + one.match(diff_match_patch.blanklineEndRegex_); + var blankLine2 = lineBreak2 && + two.match(diff_match_patch.blanklineStartRegex_); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; + } + + var pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1][0] == DIFF_EQUAL && + diffs[pointer + 1][0] == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + var equality1 = diffs[pointer - 1][1]; + var edit = diffs[pointer][1]; + var equality2 = diffs[pointer + 1][1]; + + // First, shift the edit as far left as possible. + var commonOffset = this.diff_commonSuffix(equality1, edit); + if (commonOffset) { + var commonString = edit.substring(edit.length - commonOffset); + equality1 = equality1.substring(0, equality1.length - commonOffset); + edit = commonString + edit.substring(0, edit.length - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. + var bestEquality1 = equality1; + var bestEdit = edit; + var bestEquality2 = equality2; + var bestScore = diff_cleanupSemanticScore_(equality1, edit) + + diff_cleanupSemanticScore_(edit, equality2); + while (edit.charAt(0) === equality2.charAt(0)) { + equality1 += edit.charAt(0); + edit = edit.substring(1) + equality2.charAt(0); + equality2 = equality2.substring(1); + var score = diff_cleanupSemanticScore_(equality1, edit) + + diff_cleanupSemanticScore_(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (diffs[pointer - 1][1] != bestEquality1) { + // We have an improvement, save it back to the diff. + if (bestEquality1) { + diffs[pointer - 1][1] = bestEquality1; + } else { + diffs.splice(pointer - 1, 1); + pointer--; + } + diffs[pointer][1] = bestEdit; + if (bestEquality2) { + diffs[pointer + 1][1] = bestEquality2; + } else { + diffs.splice(pointer + 1, 1); + pointer--; + } + } + } + pointer++; + } +}; + +// Define some regex patterns for matching boundaries. +diff_match_patch.nonAlphaNumericRegex_ = /[^a-zA-Z0-9]/; +diff_match_patch.whitespaceRegex_ = /\s/; +diff_match_patch.linebreakRegex_ = /[\r\n]/; +diff_match_patch.blanklineEndRegex_ = /\n\r?\n$/; +diff_match_patch.blanklineStartRegex_ = /^\r?\n\r?\n/; + +/** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupEfficiency = function(diffs) { + var changes = false; + var equalities = []; // Stack of indices where equalities are found. + var equalitiesLength = 0; // Keeping our own length var is faster in JS. + /** @type {?string} */ + var lastequality = null; + // Always equal to diffs[equalities[equalitiesLength - 1]][1] + var pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + var pre_ins = false; + // Is there a deletion operation before the last equality. + var pre_del = false; + // Is there an insertion operation after the last equality. + var post_ins = false; + // Is there a deletion operation after the last equality. + var post_del = false; + while (pointer < diffs.length) { + if (diffs[pointer][0] == DIFF_EQUAL) { // Equality found. + if (diffs[pointer][1].length < this.Diff_EditCost && + (post_ins || post_del)) { + // Candidate found. + equalities[equalitiesLength++] = pointer; + pre_ins = post_ins; + pre_del = post_del; + lastequality = diffs[pointer][1]; + } else { + // Not a candidate, and can never become one. + equalitiesLength = 0; + lastequality = null; + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer][0] == DIFF_DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (lastequality && ((pre_ins && pre_del && post_ins && post_del) || + ((lastequality.length < this.Diff_EditCost / 2) && + (pre_ins + pre_del + post_ins + post_del) == 3))) { + // Duplicate record. + diffs.splice(equalities[equalitiesLength - 1], 0, + [DIFF_DELETE, lastequality]); + // Change second copy to insert. + diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; + equalitiesLength--; // Throw away the equality we just deleted; + lastequality = null; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalitiesLength = 0; + } else { + equalitiesLength--; // Throw away the previous equality. + pointer = equalitiesLength > 0 ? + equalities[equalitiesLength - 1] : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if (changes) { + this.diff_cleanupMerge(diffs); + } +}; + + +/** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param {!Array.} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupMerge = function(diffs) { + diffs.push([DIFF_EQUAL, '']); // Add a dummy entry at the end. + var pointer = 0; + var count_delete = 0; + var count_insert = 0; + var text_delete = ''; + var text_insert = ''; + var commonlength; + while (pointer < diffs.length) { + switch (diffs[pointer][0]) { + case DIFF_INSERT: + count_insert++; + text_insert += diffs[pointer][1]; + pointer++; + break; + case DIFF_DELETE: + count_delete++; + text_delete += diffs[pointer][1]; + pointer++; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete !== 0 && count_insert !== 0) { + // Factor out any common prefixies. + commonlength = this.diff_commonPrefix(text_insert, text_delete); + if (commonlength !== 0) { + if ((pointer - count_delete - count_insert) > 0 && + diffs[pointer - count_delete - count_insert - 1][0] == + DIFF_EQUAL) { + diffs[pointer - count_delete - count_insert - 1][1] += + text_insert.substring(0, commonlength); + } else { + diffs.splice(0, 0, [DIFF_EQUAL, + text_insert.substring(0, commonlength)]); + pointer++; + } + text_insert = text_insert.substring(commonlength); + text_delete = text_delete.substring(commonlength); + } + // Factor out any common suffixies. + commonlength = this.diff_commonSuffix(text_insert, text_delete); + if (commonlength !== 0) { + diffs[pointer][1] = text_insert.substring(text_insert.length - + commonlength) + diffs[pointer][1]; + text_insert = text_insert.substring(0, text_insert.length - + commonlength); + text_delete = text_delete.substring(0, text_delete.length - + commonlength); + } + } + // Delete the offending records and add the merged ones. + if (count_delete === 0) { + diffs.splice(pointer - count_insert, + count_delete + count_insert, [DIFF_INSERT, text_insert]); + } else if (count_insert === 0) { + diffs.splice(pointer - count_delete, + count_delete + count_insert, [DIFF_DELETE, text_delete]); + } else { + diffs.splice(pointer - count_delete - count_insert, + count_delete + count_insert, [DIFF_DELETE, text_delete], + [DIFF_INSERT, text_insert]); + } + pointer = pointer - count_delete - count_insert + + (count_delete ? 1 : 0) + (count_insert ? 1 : 0) + 1; + } else if (pointer !== 0 && diffs[pointer - 1][0] == DIFF_EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1][1] += diffs[pointer][1]; + diffs.splice(pointer, 1); + } else { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete = ''; + text_insert = ''; + break; + } + } + if (diffs[diffs.length - 1][1] === '') { + diffs.pop(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by equalities + // which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + var changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diffs.length - 1) { + if (diffs[pointer - 1][0] == DIFF_EQUAL && + diffs[pointer + 1][0] == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + if (diffs[pointer][1].substring(diffs[pointer][1].length - + diffs[pointer - 1][1].length) == diffs[pointer - 1][1]) { + // Shift the edit over the previous equality. + diffs[pointer][1] = diffs[pointer - 1][1] + + diffs[pointer][1].substring(0, diffs[pointer][1].length - + diffs[pointer - 1][1].length); + diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1]; + diffs.splice(pointer - 1, 1); + changes = true; + } else if (diffs[pointer][1].substring(0, diffs[pointer + 1][1].length) == + diffs[pointer + 1][1]) { + // Shift the edit over the next equality. + diffs[pointer - 1][1] += diffs[pointer + 1][1]; + diffs[pointer][1] = + diffs[pointer][1].substring(diffs[pointer + 1][1].length) + + diffs[pointer + 1][1]; + diffs.splice(pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + this.diff_cleanupMerge(diffs); + } +}; + + +/** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. 'The cat' vs 'The big cat', 1->1, 5->8 + * @param {!Array.} diffs Array of diff tuples. + * @param {number} loc Location within text1. + * @return {number} Location within text2. + */ +diff_match_patch.prototype.diff_xIndex = function(diffs, loc) { + var chars1 = 0; + var chars2 = 0; + var last_chars1 = 0; + var last_chars2 = 0; + var x; + for (x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_INSERT) { // Equality or deletion. + chars1 += diffs[x][1].length; + } + if (diffs[x][0] !== DIFF_DELETE) { // Equality or insertion. + chars2 += diffs[x][1].length; + } + if (chars1 > loc) { // Overshot the location. + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + // Was the location was deleted? + if (diffs.length != x && diffs[x][0] === DIFF_DELETE) { + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +}; + + +/** + * Convert a diff array into a pretty HTML report. + * @param {!Array.} diffs Array of diff tuples. + * @return {string} HTML representation. + */ +diff_match_patch.prototype.diff_prettyHtml = function(diffs) { + var html = []; + var pattern_amp = /&/g; + var pattern_lt = //g; + var pattern_para = /\n/g; + for (var x = 0; x < diffs.length; x++) { + var op = diffs[x][0]; // Operation (insert, delete, equal) + var data = diffs[x][1]; // Text of change. + var text = data.replace(pattern_amp, '&').replace(pattern_lt, '<') + .replace(pattern_gt, '>').replace(pattern_para, '¶
    '); + switch (op) { + case DIFF_INSERT: + html[x] = '' + text + ''; + break; + case DIFF_DELETE: + html[x] = '' + text + ''; + break; + case DIFF_EQUAL: + html[x] = '' + text + ''; + break; + } + } + return html.join(''); +}; + + +/** + * Compute and return the source text (all equalities and deletions). + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Source text. + */ +diff_match_patch.prototype.diff_text1 = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_INSERT) { + text[x] = diffs[x][1]; + } + } + return text.join(''); +}; + + +/** + * Compute and return the destination text (all equalities and insertions). + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Destination text. + */ +diff_match_patch.prototype.diff_text2 = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + if (diffs[x][0] !== DIFF_DELETE) { + text[x] = diffs[x][1]; + } + } + return text.join(''); +}; + + +/** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param {!Array.} diffs Array of diff tuples. + * @return {number} Number of changes. + */ +diff_match_patch.prototype.diff_levenshtein = function(diffs) { + var levenshtein = 0; + var insertions = 0; + var deletions = 0; + for (var x = 0; x < diffs.length; x++) { + var op = diffs[x][0]; + var data = diffs[x][1]; + switch (op) { + case DIFF_INSERT: + insertions += data.length; + break; + case DIFF_DELETE: + deletions += data.length; + break; + case DIFF_EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += Math.max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += Math.max(insertions, deletions); + return levenshtein; +}; + + +/** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param {!Array.} diffs Array of diff tuples. + * @return {string} Delta text. + */ +diff_match_patch.prototype.diff_toDelta = function(diffs) { + var text = []; + for (var x = 0; x < diffs.length; x++) { + switch (diffs[x][0]) { + case DIFF_INSERT: + text[x] = '+' + encodeURI(diffs[x][1]); + break; + case DIFF_DELETE: + text[x] = '-' + diffs[x][1].length; + break; + case DIFF_EQUAL: + text[x] = '=' + diffs[x][1].length; + break; + } + } + return text.join('\t').replace(/%20/g, ' '); +}; + + +/** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param {string} text1 Source string for the diff. + * @param {string} delta Delta text. + * @return {!Array.} Array of diff tuples. + * @throws {!Error} If invalid input. + */ +diff_match_patch.prototype.diff_fromDelta = function(text1, delta) { + var diffs = []; + var diffsLength = 0; // Keeping our own length var is faster in JS. + var pointer = 0; // Cursor in text1 + var tokens = delta.split(/\t/g); + for (var x = 0; x < tokens.length; x++) { + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + var param = tokens[x].substring(1); + switch (tokens[x].charAt(0)) { + case '+': + try { + diffs[diffsLength++] = [DIFF_INSERT, decodeURI(param)]; + } catch (ex) { + // Malformed URI sequence. + throw new Error('Illegal escape in diff_fromDelta: ' + param); + } + break; + case '-': + // Fall through. + case '=': + var n = parseInt(param, 10); + if (isNaN(n) || n < 0) { + throw new Error('Invalid number in diff_fromDelta: ' + param); + } + var text = text1.substring(pointer, pointer += n); + if (tokens[x].charAt(0) == '=') { + diffs[diffsLength++] = [DIFF_EQUAL, text]; + } else { + diffs[diffsLength++] = [DIFF_DELETE, text]; + } + break; + default: + // Blank tokens are ok (from a trailing \t). + // Anything else is an error. + if (tokens[x]) { + throw new Error('Invalid diff operation in diff_fromDelta: ' + + tokens[x]); + } + } + } + if (pointer != text1.length) { + throw new Error('Delta length (' + pointer + + ') does not equal source text length (' + text1.length + ').'); + } + return diffs; +}; + + +// MATCH FUNCTIONS + + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * @param {string} text The text to search. + * @param {string} pattern The pattern to search for. + * @param {number} loc The location to search around. + * @return {number} Best match index or -1. + */ +diff_match_patch.prototype.match_main = function(text, pattern, loc) { + // Check for null inputs. + if (text == null || pattern == null || loc == null) { + throw new Error('Null input. (match_main)'); + } + + loc = Math.max(0, Math.min(loc, text.length)); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (!text.length) { + // Nothing to match. + return -1; + } else if (text.substring(loc, loc + pattern.length) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return this.match_bitap_(text, pattern, loc); + } +}; + + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. + * @param {string} text The text to search. + * @param {string} pattern The pattern to search for. + * @param {number} loc The location to search around. + * @return {number} Best match index or -1. + * @private + */ +diff_match_patch.prototype.match_bitap_ = function(text, pattern, loc) { + if (pattern.length > this.Match_MaxBits) { + throw new Error('Pattern too long for this browser.'); + } + + // Initialise the alphabet. + var s = this.match_alphabet_(pattern); + + var dmp = this; // 'this' becomes 'window' in a closure. + + /** + * Compute and return the score for a match with e errors and x location. + * Accesses loc and pattern through being a closure. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + */ + function match_bitapScore_(e, x) { + var accuracy = e / pattern.length; + var proximity = Math.abs(loc - x); + if (!dmp.Match_Distance) { + // Dodge divide by zero error. + return proximity ? 1.0 : accuracy; + } + return accuracy + (proximity / dmp.Match_Distance); + } + + // Highest score beyond which we give up. + var score_threshold = this.Match_Threshold; + // Is there a nearby exact match? (speedup) + var best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = Math.min(match_bitapScore_(0, best_loc), score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length); + if (best_loc != -1) { + score_threshold = + Math.min(match_bitapScore_(0, best_loc), score_threshold); + } + } + + // Initialise the bit arrays. + var matchmask = 1 << (pattern.length - 1); + best_loc = -1; + + var bin_min, bin_mid; + var bin_max = pattern.length + text.length; + var last_rd; + for (var d = 0; d < pattern.length; d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at this + // error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore_(d, loc + bin_mid) <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = Math.floor((bin_max - bin_min) / 2 + bin_min); + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + var start = Math.max(1, loc - bin_mid + 1); + var finish = Math.min(loc + bin_mid, text.length) + pattern.length; + + var rd = Array(finish + 2); + rd[finish + 1] = (1 << d) - 1; + for (var j = finish; j >= start; j--) { + // The alphabet (s) is a sparse hash, so the following line generates + // warnings. + var charMatch = s[text.charAt(j - 1)]; + if (d === 0) { // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | + (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | + last_rd[j + 1]; + } + if (rd[j] & matchmask) { + var score = match_bitapScore_(d, j - 1); + // This match will almost certainly be better than any existing match. + // But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = Math.max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + // No hope for a (better) match at greater error levels. + if (match_bitapScore_(d + 1, loc) > score_threshold) { + break; + } + last_rd = rd; + } + return best_loc; +}; + + +/** + * Initialise the alphabet for the Bitap algorithm. + * @param {string} pattern The text to encode. + * @return {!Object} Hash of character locations. + * @private + */ +diff_match_patch.prototype.match_alphabet_ = function(pattern) { + var s = {}; + for (var i = 0; i < pattern.length; i++) { + s[pattern.charAt(i)] = 0; + } + for (var i = 0; i < pattern.length; i++) { + s[pattern.charAt(i)] |= 1 << (pattern.length - i - 1); + } + return s; +}; + + +// PATCH FUNCTIONS + + +/** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param {!diff_match_patch.patch_obj} patch The patch to grow. + * @param {string} text Source text. + * @private + */ +diff_match_patch.prototype.patch_addContext_ = function(patch, text) { + if (text.length == 0) { + return; + } + var pattern = text.substring(patch.start2, patch.start2 + patch.length1); + var padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) && + pattern.length < this.Match_MaxBits - this.Patch_Margin - + this.Patch_Margin) { + padding += this.Patch_Margin; + pattern = text.substring(patch.start2 - padding, + patch.start2 + patch.length1 + padding); + } + // Add one chunk for good luck. + padding += this.Patch_Margin; + + // Add the prefix. + var prefix = text.substring(patch.start2 - padding, patch.start2); + if (prefix) { + patch.diffs.unshift([DIFF_EQUAL, prefix]); + } + // Add the suffix. + var suffix = text.substring(patch.start2 + patch.length1, + patch.start2 + patch.length1 + padding); + if (suffix) { + patch.diffs.push([DIFF_EQUAL, suffix]); + } + + // Roll back the start points. + patch.start1 -= prefix.length; + patch.start2 -= prefix.length; + // Extend the lengths. + patch.length1 += prefix.length + suffix.length; + patch.length2 += prefix.length + suffix.length; +}; + + +/** + * Compute a list of patches to turn text1 into text2. + * Use diffs if provided, otherwise compute it ourselves. + * There are four ways to call this function, depending on what data is + * available to the caller: + * Method 1: + * a = text1, b = text2 + * Method 2: + * a = diffs + * Method 3 (optimal): + * a = text1, b = diffs + * Method 4 (deprecated, use method 3): + * a = text1, b = text2, c = diffs + * + * @param {string|!Array.} a text1 (methods 1,3,4) or + * Array of diff tuples for text1 to text2 (method 2). + * @param {string|!Array.} opt_b text2 (methods 1,4) or + * Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). + * @param {string|!Array.} opt_c Array of diff tuples + * for text1 to text2 (method 4) or undefined (methods 1,2,3). + * @return {!Array.} Array of Patch objects. + */ +diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) { + var text1, diffs; + if (typeof a == 'string' && typeof opt_b == 'string' && + typeof opt_c == 'undefined') { + // Method 1: text1, text2 + // Compute diffs from text1 and text2. + text1 = /** @type {string} */(a); + diffs = this.diff_main(text1, /** @type {string} */(opt_b), true); + if (diffs.length > 2) { + this.diff_cleanupSemantic(diffs); + this.diff_cleanupEfficiency(diffs); + } + } else if (a && typeof a == 'object' && typeof opt_b == 'undefined' && + typeof opt_c == 'undefined') { + // Method 2: diffs + // Compute text1 from diffs. + diffs = /** @type {!Array.} */(a); + text1 = this.diff_text1(diffs); + } else if (typeof a == 'string' && opt_b && typeof opt_b == 'object' && + typeof opt_c == 'undefined') { + // Method 3: text1, diffs + text1 = /** @type {string} */(a); + diffs = /** @type {!Array.} */(opt_b); + } else if (typeof a == 'string' && typeof opt_b == 'string' && + opt_c && typeof opt_c == 'object') { + // Method 4: text1, text2, diffs + // text2 is not used. + text1 = /** @type {string} */(a); + diffs = /** @type {!Array.} */(opt_c); + } else { + throw new Error('Unknown call format to patch_make.'); + } + + if (diffs.length === 0) { + return []; // Get rid of the null case. + } + var patches = []; + var patch = new diff_match_patch.patch_obj(); + var patchDiffLength = 0; // Keeping our own length var is faster in JS. + var char_count1 = 0; // Number of characters into the text1 string. + var char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + var prepatch_text = text1; + var postpatch_text = text1; + for (var x = 0; x < diffs.length; x++) { + var diff_type = diffs[x][0]; + var diff_text = diffs[x][1]; + + if (!patchDiffLength && diff_type !== DIFF_EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (diff_type) { + case DIFF_INSERT: + patch.diffs[patchDiffLength++] = diffs[x]; + patch.length2 += diff_text.length; + postpatch_text = postpatch_text.substring(0, char_count2) + diff_text + + postpatch_text.substring(char_count2); + break; + case DIFF_DELETE: + patch.length1 += diff_text.length; + patch.diffs[patchDiffLength++] = diffs[x]; + postpatch_text = postpatch_text.substring(0, char_count2) + + postpatch_text.substring(char_count2 + + diff_text.length); + break; + case DIFF_EQUAL: + if (diff_text.length <= 2 * this.Patch_Margin && + patchDiffLength && diffs.length != x + 1) { + // Small equality inside a patch. + patch.diffs[patchDiffLength++] = diffs[x]; + patch.length1 += diff_text.length; + patch.length2 += diff_text.length; + } else if (diff_text.length >= 2 * this.Patch_Margin) { + // Time for a new patch. + if (patchDiffLength) { + this.patch_addContext_(patch, prepatch_text); + patches.push(patch); + patch = new diff_match_patch.patch_obj(); + patchDiffLength = 0; + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (diff_type !== DIFF_INSERT) { + char_count1 += diff_text.length; + } + if (diff_type !== DIFF_DELETE) { + char_count2 += diff_text.length; + } + } + // Pick up the leftover patch if not empty. + if (patchDiffLength) { + this.patch_addContext_(patch, prepatch_text); + patches.push(patch); + } + + return patches; +}; + + +/** + * Given an array of patches, return another array that is identical. + * @param {!Array.} patches Array of Patch objects. + * @return {!Array.} Array of Patch objects. + */ +diff_match_patch.prototype.patch_deepCopy = function(patches) { + // Making deep copies is hard in JavaScript. + var patchesCopy = []; + for (var x = 0; x < patches.length; x++) { + var patch = patches[x]; + var patchCopy = new diff_match_patch.patch_obj(); + patchCopy.diffs = []; + for (var y = 0; y < patch.diffs.length; y++) { + patchCopy.diffs[y] = patch.diffs[y].slice(); + } + patchCopy.start1 = patch.start1; + patchCopy.start2 = patch.start2; + patchCopy.length1 = patch.length1; + patchCopy.length2 = patch.length2; + patchesCopy[x] = patchCopy; + } + return patchesCopy; +}; + + +/** + * Merge a set of patches onto the text. Return a patched text, as well + * as a list of true/false values indicating which patches were applied. + * @param {!Array.} patches Array of Patch objects. + * @param {string} text Old text. + * @return {!Array.>} Two element Array, containing the + * new text and an array of boolean values. + */ +diff_match_patch.prototype.patch_apply = function(patches, text) { + if (patches.length == 0) { + return [text, []]; + } + + // Deep copy the patches so that no changes are made to originals. + patches = this.patch_deepCopy(patches); + + var nullPadding = this.patch_addPadding(patches); + text = nullPadding + text + nullPadding; + + this.patch_splitMax(patches); + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + var delta = 0; + var results = []; + for (var x = 0; x < patches.length; x++) { + var expected_loc = patches[x].start2 + delta; + var text1 = this.diff_text1(patches[x].diffs); + var start_loc; + var end_loc = -1; + if (text1.length > this.Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = this.match_main(text, text1.substring(0, this.Match_MaxBits), + expected_loc); + if (start_loc != -1) { + end_loc = this.match_main(text, + text1.substring(text1.length - this.Match_MaxBits), + expected_loc + text1.length - this.Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = this.match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= patches[x].length2 - patches[x].length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + var text2; + if (end_loc == -1) { + text2 = text.substring(start_loc, start_loc + text1.length); + } else { + text2 = text.substring(start_loc, end_loc + this.Match_MaxBits); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.substring(0, start_loc) + + this.diff_text2(patches[x].diffs) + + text.substring(start_loc + text1.length); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + var diffs = this.diff_main(text1, text2, false); + if (text1.length > this.Match_MaxBits && + this.diff_levenshtein(diffs) / text1.length > + this.Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + this.diff_cleanupSemanticLossless(diffs); + var index1 = 0; + var index2; + for (var y = 0; y < patches[x].diffs.length; y++) { + var mod = patches[x].diffs[y]; + if (mod[0] !== DIFF_EQUAL) { + index2 = this.diff_xIndex(diffs, index1); + } + if (mod[0] === DIFF_INSERT) { // Insertion + text = text.substring(0, start_loc + index2) + mod[1] + + text.substring(start_loc + index2); + } else if (mod[0] === DIFF_DELETE) { // Deletion + text = text.substring(0, start_loc + index2) + + text.substring(start_loc + this.diff_xIndex(diffs, + index1 + mod[1].length)); + } + if (mod[0] !== DIFF_DELETE) { + index1 += mod[1].length; + } + } + } + } + } + } + // Strip the padding off. + text = text.substring(nullPadding.length, text.length - nullPadding.length); + return [text, results]; +}; + + +/** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param {!Array.} patches Array of Patch objects. + * @return {string} The padding string added to each side. + */ +diff_match_patch.prototype.patch_addPadding = function(patches) { + var paddingLength = this.Patch_Margin; + var nullPadding = ''; + for (var x = 1; x <= paddingLength; x++) { + nullPadding += String.fromCharCode(x); + } + + // Bump all the patches forward. + for (var x = 0; x < patches.length; x++) { + patches[x].start1 += paddingLength; + patches[x].start2 += paddingLength; + } + + // Add some padding on start of first diff. + var patch = patches[0]; + var diffs = patch.diffs; + if (diffs.length == 0 || diffs[0][0] != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.unshift([DIFF_EQUAL, nullPadding]); + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs[0][1].length) { + // Grow first equality. + var extraLength = paddingLength - diffs[0][1].length; + diffs[0][1] = nullPadding.substring(diffs[0][1].length) + diffs[0][1]; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches[patches.length - 1]; + diffs = patch.diffs; + if (diffs.length == 0 || diffs[diffs.length - 1][0] != DIFF_EQUAL) { + // Add nullPadding equality. + diffs.push([DIFF_EQUAL, nullPadding]); + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > diffs[diffs.length - 1][1].length) { + // Grow last equality. + var extraLength = paddingLength - diffs[diffs.length - 1][1].length; + diffs[diffs.length - 1][1] += nullPadding.substring(0, extraLength); + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; +}; + + +/** + * Look through the patches and break up any which are longer than the maximum + * limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param {!Array.} patches Array of Patch objects. + */ +diff_match_patch.prototype.patch_splitMax = function(patches) { + var patch_size = this.Match_MaxBits; + for (var x = 0; x < patches.length; x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + var bigpatch = patches[x]; + // Remove the big old patch. + patches.splice(x--, 1); + var start1 = bigpatch.start1; + var start2 = bigpatch.start2; + var precontext = ''; + while (bigpatch.diffs.length !== 0) { + // Create one of several smaller patches. + var patch = new diff_match_patch.patch_obj(); + var empty = true; + patch.start1 = start1 - precontext.length; + patch.start2 = start2 - precontext.length; + if (precontext !== '') { + patch.length1 = patch.length2 = precontext.length; + patch.diffs.push([DIFF_EQUAL, precontext]); + } + while (bigpatch.diffs.length !== 0 && + patch.length1 < patch_size - this.Patch_Margin) { + var diff_type = bigpatch.diffs[0][0]; + var diff_text = bigpatch.diffs[0][1]; + if (diff_type === DIFF_INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length; + start2 += diff_text.length; + patch.diffs.push(bigpatch.diffs.shift()); + empty = false; + } else if (diff_type === DIFF_DELETE && patch.diffs.length == 1 && + patch.diffs[0][0] == DIFF_EQUAL && + diff_text.length > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length; + start1 += diff_text.length; + empty = false; + patch.diffs.push([diff_type, diff_text]); + bigpatch.diffs.shift(); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substring(0, + patch_size - patch.length1 - this.Patch_Margin); + patch.length1 += diff_text.length; + start1 += diff_text.length; + if (diff_type === DIFF_EQUAL) { + patch.length2 += diff_text.length; + start2 += diff_text.length; + } else { + empty = false; + } + patch.diffs.push([diff_type, diff_text]); + if (diff_text == bigpatch.diffs[0][1]) { + bigpatch.diffs.shift(); + } else { + bigpatch.diffs[0][1] = + bigpatch.diffs[0][1].substring(diff_text.length); + } + } + } + // Compute the head context for the next patch. + precontext = this.diff_text2(patch.diffs); + precontext = + precontext.substring(precontext.length - this.Patch_Margin); + // Append the end context for this patch. + var postcontext = this.diff_text1(bigpatch.diffs) + .substring(0, this.Patch_Margin); + if (postcontext !== '') { + patch.length1 += postcontext.length; + patch.length2 += postcontext.length; + if (patch.diffs.length !== 0 && + patch.diffs[patch.diffs.length - 1][0] === DIFF_EQUAL) { + patch.diffs[patch.diffs.length - 1][1] += postcontext; + } else { + patch.diffs.push([DIFF_EQUAL, postcontext]); + } + } + if (!empty) { + patches.splice(++x, 0, patch); + } + } + } +}; + + +/** + * Take a list of patches and return a textual representation. + * @param {!Array.} patches Array of Patch objects. + * @return {string} Text representation of patches. + */ +diff_match_patch.prototype.patch_toText = function(patches) { + var text = []; + for (var x = 0; x < patches.length; x++) { + text[x] = patches[x]; + } + return text.join(''); +}; + + +/** + * Parse a textual representation of patches and return a list of Patch objects. + * @param {string} textline Text representation of patches. + * @return {!Array.} Array of Patch objects. + * @throws {!Error} If invalid input. + */ +diff_match_patch.prototype.patch_fromText = function(textline) { + var patches = []; + if (!textline) { + return patches; + } + var text = textline.split('\n'); + var textPointer = 0; + var patchHeader = /^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/; + while (textPointer < text.length) { + var m = text[textPointer].match(patchHeader); + if (!m) { + throw new Error('Invalid patch string: ' + text[textPointer]); + } + var patch = new diff_match_patch.patch_obj(); + patches.push(patch); + patch.start1 = parseInt(m[1], 10); + if (m[2] === '') { + patch.start1--; + patch.length1 = 1; + } else if (m[2] == '0') { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = parseInt(m[2], 10); + } + + patch.start2 = parseInt(m[3], 10); + if (m[4] === '') { + patch.start2--; + patch.length2 = 1; + } else if (m[4] == '0') { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = parseInt(m[4], 10); + } + textPointer++; + + while (textPointer < text.length) { + var sign = text[textPointer].charAt(0); + try { + var line = decodeURI(text[textPointer].substring(1)); + } catch (ex) { + // Malformed URI sequence. + throw new Error('Illegal escape in patch_fromText: ' + line); + } + if (sign == '-') { + // Deletion. + patch.diffs.push([DIFF_DELETE, line]); + } else if (sign == '+') { + // Insertion. + patch.diffs.push([DIFF_INSERT, line]); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.push([DIFF_EQUAL, line]); + } else if (sign == '@') { + // Start of next patch. + break; + } else if (sign === '') { + // Blank line? Whatever. + } else { + // WTF? + throw new Error('Invalid patch mode "' + sign + '" in: ' + line); + } + textPointer++; + } + } + return patches; +}; + + +/** + * Class representing one patch operation. + * @constructor + */ +diff_match_patch.patch_obj = function() { + /** @type {!Array.} */ + this.diffs = []; + /** @type {?number} */ + this.start1 = null; + /** @type {?number} */ + this.start2 = null; + /** @type {number} */ + this.length1 = 0; + /** @type {number} */ + this.length2 = 0; +}; + + +/** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return {string} The GNU diff string. + */ +diff_match_patch.patch_obj.prototype.toString = function() { + var coords1, coords2; + if (this.length1 === 0) { + coords1 = this.start1 + ',0'; + } else if (this.length1 == 1) { + coords1 = this.start1 + 1; + } else { + coords1 = (this.start1 + 1) + ',' + this.length1; + } + if (this.length2 === 0) { + coords2 = this.start2 + ',0'; + } else if (this.length2 == 1) { + coords2 = this.start2 + 1; + } else { + coords2 = (this.start2 + 1) + ',' + this.length2; + } + var text = ['@@ -' + coords1 + ' +' + coords2 + ' @@\n']; + var op; + // Escape the body of the patch with %xx notation. + for (var x = 0; x < this.diffs.length; x++) { + switch (this.diffs[x][0]) { + case DIFF_INSERT: + op = '+'; + break; + case DIFF_DELETE: + op = '-'; + break; + case DIFF_EQUAL: + op = ' '; + break; + } + text[x + 1] = op + encodeURI(this.diffs[x][1]) + '\n'; + } + return text.join('').replace(/%20/g, ' '); +}; + + +// Export these global variables so that they survive Google's JS compiler. +// In a browser, 'this' will be 'window'. +// Users of node.js should 'require' the uncompressed version since Google's +// JS compiler may break the following exports for non-browser environments. +this['diff_match_patch'] = diff_match_patch; +this['DIFF_DELETE'] = DIFF_DELETE; +this['DIFF_INSERT'] = DIFF_INSERT; +this['DIFF_EQUAL'] = DIFF_EQUAL; diff --git a/lua/diff_match_patch.lua b/lua/diff_match_patch.lua new file mode 100644 index 0000000..ec56577 --- /dev/null +++ b/lua/diff_match_patch.lua @@ -0,0 +1,2196 @@ +--[[ +* Diff Match and Patch +* Copyright 2018 The diff-match-patch Authors. +* https://github.com/google/diff-match-patch +* +* Based on the JavaScript implementation by Neil Fraser. +* Ported to Lua by Duncan Cross. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +--]] + +--[[ +-- Lua 5.1 and earlier requires the external BitOp library. +-- This library is built-in from Lua 5.2 and later as 'bit32'. +require 'bit' -- +local band, bor, lshift + = bit.band, bit.bor, bit.lshift +--]] + +local band, bor, lshift + = bit32.band, bit32.bor, bit32.lshift +local type, setmetatable, ipairs, select + = type, setmetatable, ipairs, select +local unpack, tonumber, error + = unpack, tonumber, error +local strsub, strbyte, strchar, gmatch, gsub + = string.sub, string.byte, string.char, string.gmatch, string.gsub +local strmatch, strfind, strformat + = string.match, string.find, string.format +local tinsert, tremove, tconcat + = table.insert, table.remove, table.concat +local max, min, floor, ceil, abs + = math.max, math.min, math.floor, math.ceil, math.abs +local clock = os.clock + + +-- Utility functions. + +local percentEncode_pattern = '[^A-Za-z0-9%-=;\',./~!@#$%&*%(%)_%+ %?]' +local function percentEncode_replace(v) + return strformat('%%%02X', strbyte(v)) +end + +local function tsplice(t, idx, deletions, ...) + local insertions = select('#', ...) + for i = 1, deletions do + tremove(t, idx) + end + for i = insertions, 1, -1 do + -- do not remove parentheses around select + tinsert(t, idx, (select(i, ...))) + end +end + +local function strelement(str, i) + return strsub(str, i, i) +end + +local function indexOf(a, b, start) + if (#b == 0) then + return nil + end + return strfind(a, b, start, true) +end + +local htmlEncode_pattern = '[&<>\n]' +local htmlEncode_replace = { + ['&'] = '&', ['<'] = '<', ['>'] = '>', ['\n'] = '¶
    ' +} + +-- Public API Functions +-- (Exported at the end of the script) + +local diff_main, + diff_cleanupSemantic, + diff_cleanupEfficiency, + diff_levenshtein, + diff_prettyHtml + +local match_main + +local patch_make, + patch_toText, + patch_fromText, + patch_apply + +--[[ +* The data structure representing a diff is an array of tuples: +* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}} +* which means: delete 'Hello', add 'Goodbye' and keep ' world.' +--]] +local DIFF_DELETE = -1 +local DIFF_INSERT = 1 +local DIFF_EQUAL = 0 + +-- Number of seconds to map a diff before giving up (0 for infinity). +local Diff_Timeout = 1.0 +-- Cost of an empty edit operation in terms of edit characters. +local Diff_EditCost = 4 +-- At what point is no match declared (0.0 = perfection, 1.0 = very loose). +local Match_Threshold = 0.5 +-- How far to search for a match (0 = exact location, 1000+ = broad match). +-- A match this many characters away from the expected location will add +-- 1.0 to the score (0.0 is a perfect match). +local Match_Distance = 1000 +-- When deleting a large block of text (over ~64 characters), how close do +-- the contents have to be to match the expected contents. (0.0 = perfection, +-- 1.0 = very loose). Note that Match_Threshold controls how closely the +-- end points of a delete need to match. +local Patch_DeleteThreshold = 0.5 +-- Chunk size for context length. +local Patch_Margin = 4 +-- The number of bits in an int. +local Match_MaxBits = 32 + +function settings(new) + if new then + Diff_Timeout = new.Diff_Timeout or Diff_Timeout + Diff_EditCost = new.Diff_EditCost or Diff_EditCost + Match_Threshold = new.Match_Threshold or Match_Threshold + Match_Distance = new.Match_Distance or Match_Distance + Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold + Patch_Margin = new.Patch_Margin or Patch_Margin + Match_MaxBits = new.Match_MaxBits or Match_MaxBits + else + return { + Diff_Timeout = Diff_Timeout; + Diff_EditCost = Diff_EditCost; + Match_Threshold = Match_Threshold; + Match_Distance = Match_Distance; + Patch_DeleteThreshold = Patch_DeleteThreshold; + Patch_Margin = Patch_Margin; + Match_MaxBits = Match_MaxBits; + } + end +end + +-- --------------------------------------------------------------------------- +-- DIFF API +-- --------------------------------------------------------------------------- + +-- The private diff functions +local _diff_compute, + _diff_bisect, + _diff_halfMatchI, + _diff_halfMatch, + _diff_cleanupSemanticScore, + _diff_cleanupSemanticLossless, + _diff_cleanupMerge, + _diff_commonPrefix, + _diff_commonSuffix, + _diff_commonOverlap, + _diff_xIndex, + _diff_text1, + _diff_text2, + _diff_toDelta, + _diff_fromDelta + +--[[ +* Find the differences between two texts. Simplifies the problem by stripping +* any common prefix or suffix off the texts before diffing. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} opt_checklines Has no effect in Lua. +* @param {number} opt_deadline Optional time when the diff should be complete +* by. Used internally for recursive calls. Users should set DiffTimeout +* instead. +* @return {Array.>} Array of diff tuples. +--]] +function diff_main(text1, text2, opt_checklines, opt_deadline) + -- Set a deadline by which time the diff must be complete. + if opt_deadline == nil then + if Diff_Timeout <= 0 then + opt_deadline = 2 ^ 31 + else + opt_deadline = clock() + Diff_Timeout + end + end + local deadline = opt_deadline + + -- Check for null inputs. + if text1 == nil or text1 == nil then + error('Null inputs. (diff_main)') + end + + -- Check for equality (speedup). + if text1 == text2 then + if #text1 > 0 then + return {{DIFF_EQUAL, text1}} + end + return {} + end + + -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of + -- implementing the line-mode speedup. + local checklines = false + + -- Trim off common prefix (speedup). + local commonlength = _diff_commonPrefix(text1, text2) + local commonprefix + if commonlength > 0 then + commonprefix = strsub(text1, 1, commonlength) + text1 = strsub(text1, commonlength + 1) + text2 = strsub(text2, commonlength + 1) + end + + -- Trim off common suffix (speedup). + commonlength = _diff_commonSuffix(text1, text2) + local commonsuffix + if commonlength > 0 then + commonsuffix = strsub(text1, -commonlength) + text1 = strsub(text1, 1, -commonlength - 1) + text2 = strsub(text2, 1, -commonlength - 1) + end + + -- Compute the diff on the middle block. + local diffs = _diff_compute(text1, text2, checklines, deadline) + + -- Restore the prefix and suffix. + if commonprefix then + tinsert(diffs, 1, {DIFF_EQUAL, commonprefix}) + end + if commonsuffix then + diffs[#diffs + 1] = {DIFF_EQUAL, commonsuffix} + end + + _diff_cleanupMerge(diffs) + return diffs +end + +--[[ +* Reduce the number of edits by eliminating semantically trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupSemantic(diffs) + local changes = false + local equalities = {} -- Stack of indices where equalities are found. + local equalitiesLength = 0 -- Keeping our own length var is faster. + local lastequality = nil + -- Always equal to diffs[equalities[equalitiesLength]][2] + local pointer = 1 -- Index of current position. + -- Number of characters that changed prior to the equality. + local length_insertions1 = 0 + local length_deletions1 = 0 + -- Number of characters that changed after the equality. + local length_insertions2 = 0 + local length_deletions2 = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + length_insertions1 = length_insertions2 + length_deletions1 = length_deletions2 + length_insertions2 = 0 + length_deletions2 = 0 + lastequality = diffs[pointer][2] + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_INSERT then + length_insertions2 = length_insertions2 + #(diffs[pointer][2]) + else + length_deletions2 = length_deletions2 + #(diffs[pointer][2]) + end + -- Eliminate an equality that is smaller or equal to the edits on both + -- sides of it. + if lastequality + and (#lastequality <= max(length_insertions1, length_deletions1)) + and (#lastequality <= max(length_insertions2, length_deletions2)) then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], + {DIFF_DELETE, lastequality}) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + -- Throw away the previous equality (it needs to be reevaluated). + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + length_insertions1, length_deletions1 = 0, 0 -- Reset the counters. + length_insertions2, length_deletions2 = 0, 0 + lastequality = nil + changes = true + end + end + pointer = pointer + 1 + end + + -- Normalize the diff. + if changes then + _diff_cleanupMerge(diffs) + end + _diff_cleanupSemanticLossless(diffs) + + -- Find any overlaps between deletions and insertions. + -- e.g: abcxxxxxxdef + -- -> abcxxxdef + -- e.g: xxxabcdefxxx + -- -> defxxxabc + -- Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 2 + while diffs[pointer] do + if (diffs[pointer - 1][1] == DIFF_DELETE and + diffs[pointer][1] == DIFF_INSERT) then + local deletion = diffs[pointer - 1][2] + local insertion = diffs[pointer][2] + local overlap_length1 = _diff_commonOverlap(deletion, insertion) + local overlap_length2 = _diff_commonOverlap(insertion, deletion) + if (overlap_length1 >= overlap_length2) then + if (overlap_length1 >= #deletion / 2 or + overlap_length1 >= #insertion / 2) then + -- Overlap found. Insert an equality and trim the surrounding edits. + tinsert(diffs, pointer, + {DIFF_EQUAL, strsub(insertion, 1, overlap_length1)}) + diffs[pointer - 1][2] = + strsub(deletion, 1, #deletion - overlap_length1) + diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1) + pointer = pointer + 1 + end + else + if (overlap_length2 >= #deletion / 2 or + overlap_length2 >= #insertion / 2) then + -- Reverse overlap found. + -- Insert an equality and swap and trim the surrounding edits. + tinsert(diffs, pointer, + {DIFF_EQUAL, strsub(deletion, 1, overlap_length2)}) + diffs[pointer - 1] = {DIFF_INSERT, + strsub(insertion, 1, #insertion - overlap_length2)} + diffs[pointer + 1] = {DIFF_DELETE, + strsub(deletion, overlap_length2 + 1)} + pointer = pointer + 1 + end + end + pointer = pointer + 1 + end + pointer = pointer + 1 + end +end + +--[[ +* Reduce the number of edits by eliminating operationally trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupEfficiency(diffs) + local changes = false + -- Stack of indices where equalities are found. + local equalities = {} + -- Keeping our own length var is faster. + local equalitiesLength = 0 + -- Always equal to diffs[equalities[equalitiesLength]][2] + local lastequality = nil + -- Index of current position. + local pointer = 1 + + -- The following four are really booleans but are stored as numbers because + -- they are used at one point like this: + -- + -- (pre_ins + pre_del + post_ins + post_del) == 3 + -- + -- ...i.e. checking that 3 of them are true and 1 of them is false. + + -- Is there an insertion operation before the last equality. + local pre_ins = 0 + -- Is there a deletion operation before the last equality. + local pre_del = 0 + -- Is there an insertion operation after the last equality. + local post_ins = 0 + -- Is there a deletion operation after the last equality. + local post_del = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + local diffText = diffs[pointer][2] + if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then + -- Candidate found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + pre_ins, pre_del = post_ins, post_del + lastequality = diffText + else + -- Not a candidate, and can never become one. + equalitiesLength = 0 + lastequality = nil + end + post_ins, post_del = 0, 0 + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_DELETE then + post_del = 1 + else + post_ins = 1 + end + --[[ + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + --]] + if lastequality and ( + (pre_ins+pre_del+post_ins+post_del == 4) + or + ( + (#lastequality < Diff_EditCost / 2) + and + (pre_ins+pre_del+post_ins+post_del == 3) + )) then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], + {DIFF_DELETE, lastequality}) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + lastequality = nil + if (pre_ins == 1) and (pre_del == 1) then + -- No changes made which could affect previous entry, keep going. + post_ins, post_del = 1, 1 + equalitiesLength = 0 + else + -- Throw away the previous equality. + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + post_ins, post_del = 0, 0 + end + changes = true + end + end + pointer = pointer + 1 + end + + if changes then + _diff_cleanupMerge(diffs) + end +end + +--[[ +* Compute the Levenshtein distance; the number of inserted, deleted or +* substituted characters. +* @param {Array.>} diffs Array of diff tuples. +* @return {number} Number of changes. +--]] +function diff_levenshtein(diffs) + local levenshtein = 0 + local insertions, deletions = 0, 0 + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if (op == DIFF_INSERT) then + insertions = insertions + #data + elseif (op == DIFF_DELETE) then + deletions = deletions + #data + elseif (op == DIFF_EQUAL) then + -- A deletion and an insertion is one substitution. + levenshtein = levenshtein + max(insertions, deletions) + insertions = 0 + deletions = 0 + end + end + levenshtein = levenshtein + max(insertions, deletions) + return levenshtein +end + +--[[ +* Convert a diff array into a pretty HTML report. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} HTML representation. +--]] +function diff_prettyHtml(diffs) + local html = {} + for x, diff in ipairs(diffs) do + local op = diff[1] -- Operation (insert, delete, equal) + local data = diff[2] -- Text of change. + local text = gsub(data, htmlEncode_pattern, htmlEncode_replace) + if op == DIFF_INSERT then + html[x] = '' .. text .. '' + elseif op == DIFF_DELETE then + html[x] = '' .. text .. '' + elseif op == DIFF_EQUAL then + html[x] = '' .. text .. '' + end + end + return tconcat(html) +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE DIFF FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Find the differences between two texts. Assumes that the texts do not +* have any common prefix or suffix. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} checklines Has no effect in Lua. +* @param {number} deadline Time when the diff should be complete by. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_compute(text1, text2, checklines, deadline) + if #text1 == 0 then + -- Just add some text (speedup). + return {{DIFF_INSERT, text2}} + end + + if #text2 == 0 then + -- Just delete some text (speedup). + return {{DIFF_DELETE, text1}} + end + + local diffs + + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + local i = indexOf(longtext, shorttext) + + if i ~= nil then + -- Shorter text is inside the longer text (speedup). + diffs = { + {DIFF_INSERT, strsub(longtext, 1, i - 1)}, + {DIFF_EQUAL, shorttext}, + {DIFF_INSERT, strsub(longtext, i + #shorttext)} + } + -- Swap insertions for deletions if diff is reversed. + if #text1 > #text2 then + diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE + end + return diffs + end + + if #shorttext == 1 then + -- Single character string. + -- After the previous speedup, the character can't be an equality. + return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}} + end + + -- Check to see if the problem can be split in two. + do + local + text1_a, text1_b, + text2_a, text2_b, + mid_common = _diff_halfMatch(text1, text2) + + if text1_a then + -- A half-match was found, sort out the return data. + -- Send both pairs off for separate processing. + local diffs_a = diff_main(text1_a, text2_a, checklines, deadline) + local diffs_b = diff_main(text1_b, text2_b, checklines, deadline) + -- Merge the results. + local diffs_a_len = #diffs_a + diffs = diffs_a + diffs[diffs_a_len + 1] = {DIFF_EQUAL, mid_common} + for i, b_diff in ipairs(diffs_b) do + diffs[diffs_a_len + 1 + i] = b_diff + end + return diffs + end + end + + return _diff_bisect(text1, text2, deadline) +end + +--[[ +* Find the 'middle snake' of a diff, split the problem in two +* and return the recursively constructed diff. +* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {number} deadline Time at which to bail if not yet complete. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_bisect(text1, text2, deadline) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + local _sub, _element + local max_d = ceil((text1_length + text2_length) / 2) + local v_offset = max_d + local v_length = 2 * max_d + local v1 = {} + local v2 = {} + -- Setting all elements to -1 is faster in Lua than mixing integers and nil. + for x = 0, v_length - 1 do + v1[x] = -1 + v2[x] = -1 + end + v1[v_offset + 1] = 0 + v2[v_offset + 1] = 0 + local delta = text1_length - text2_length + -- If the total number of characters is odd, then + -- the front path will collide with the reverse path. + local front = (delta % 2 ~= 0) + -- Offsets for start and end of k loop. + -- Prevents mapping of space beyond the grid. + local k1start = 0 + local k1end = 0 + local k2start = 0 + local k2end = 0 + for d = 0, max_d - 1 do + -- Bail out if deadline is reached. + if clock() > deadline then + break + end + + -- Walk the front path one step. + for k1 = -d + k1start, d - k1end, 2 do + local k1_offset = v_offset + k1 + local x1 + if (k1 == -d) or ((k1 ~= d) and + (v1[k1_offset - 1] < v1[k1_offset + 1])) then + x1 = v1[k1_offset + 1] + else + x1 = v1[k1_offset - 1] + 1 + end + local y1 = x1 - k1 + while (x1 <= text1_length) and (y1 <= text2_length) + and (strelement(text1, x1) == strelement(text2, y1)) do + x1 = x1 + 1 + y1 = y1 + 1 + end + v1[k1_offset] = x1 + if x1 > text1_length + 1 then + -- Ran off the right of the graph. + k1end = k1end + 2 + elseif y1 > text2_length + 1 then + -- Ran off the bottom of the graph. + k1start = k1start + 2 + elseif front then + local k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then + -- Mirror x2 onto top-left coordinate system. + local x2 = text1_length - v2[k2_offset] + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + + -- Walk the reverse path one step. + for k2 = -d + k2start, d - k2end, 2 do + local k2_offset = v_offset + k2 + local x2 + if (k2 == -d) or ((k2 ~= d) and + (v2[k2_offset - 1] < v2[k2_offset + 1])) then + x2 = v2[k2_offset + 1] + else + x2 = v2[k2_offset - 1] + 1 + end + local y2 = x2 - k2 + while (x2 <= text1_length) and (y2 <= text2_length) + and (strelement(text1, -x2) == strelement(text2, -y2)) do + x2 = x2 + 1 + y2 = y2 + 1 + end + v2[k2_offset] = x2 + if x2 > text1_length + 1 then + -- Ran off the left of the graph. + k2end = k2end + 2 + elseif y2 > text2_length + 1 then + -- Ran off the top of the graph. + k2start = k2start + 2 + elseif not front then + local k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then + local x1 = v1[k1_offset] + local y1 = v_offset + x1 - k1_offset + -- Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + end + -- Diff took too long and hit the deadline or + -- number of diffs equals number of characters, no commonality at all. + return {{DIFF_DELETE, text1}, {DIFF_INSERT, text2}} +end + +--[[ + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} x Index of split point in text1. + * @param {number} y Index of split point in text2. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {Array.>} Array of diff tuples. + * @private +--]] +function _diff_bisectSplit(text1, text2, x, y, deadline) + local text1a = strsub(text1, 1, x - 1) + local text2a = strsub(text2, 1, y - 1) + local text1b = strsub(text1, x) + local text2b = strsub(text2, y) + + -- Compute both diffs serially. + local diffs = diff_main(text1a, text2a, false, deadline) + local diffsb = diff_main(text1b, text2b, false, deadline) + + local diffs_len = #diffs + for i, v in ipairs(diffsb) do + diffs[diffs_len + i] = v + end + return diffs +end + +--[[ +* Determine the common prefix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the start of each +* string. +--]] +function _diff_commonPrefix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1)) + then + return 0 + end + -- Binary search. + -- Performance analysis: http://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerstart = 1 + while (pointermin < pointermid) do + if (strsub(text1, pointerstart, pointermid) + == strsub(text2, pointerstart, pointermid)) then + pointermin = pointermid + pointerstart = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine the common suffix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of each string. +--]] +function _diff_commonSuffix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) + or (strbyte(text1, -1) ~= strbyte(text2, -1)) then + return 0 + end + -- Binary search. + -- Performance analysis: http://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerend = 1 + while (pointermin < pointermid) do + if (strsub(text1, -pointermid, -pointerend) + == strsub(text2, -pointermid, -pointerend)) then + pointermin = pointermid + pointerend = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine if the suffix of one string is the prefix of another. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of the first +* string and the start of the second string. +* @private +--]] +function _diff_commonOverlap(text1, text2) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + -- Eliminate the null case. + if text1_length == 0 or text2_length == 0 then + return 0 + end + -- Truncate the longer string. + if text1_length > text2_length then + text1 = strsub(text1, text1_length - text2_length + 1) + elseif text1_length < text2_length then + text2 = strsub(text2, 1, text1_length) + end + local text_length = min(text1_length, text2_length) + -- Quick check for the worst case. + if text1 == text2 then + return text_length + end + + -- Start by looking for a single character match + -- and increase length until no match is found. + -- Performance analysis: http://neil.fraser.name/news/2010/11/04/ + local best = 0 + local length = 1 + while true do + local pattern = strsub(text1, text_length - length + 1) + local found = strfind(text2, pattern, 1, true) + if found == nil then + return best + end + length = length + found - 1 + if found == 1 or strsub(text1, text_length - length + 1) == + strsub(text2, 1, length) then + best = length + length = length + 1 + end + end +end + +--[[ +* Does a substring of shorttext exist within longtext such that the substring +* is at least half the length of longtext? +* This speedup can produce non-minimal diffs. +* Closure, but does not reference any external variables. +* @param {string} longtext Longer string. +* @param {string} shorttext Shorter string. +* @param {number} i Start index of quarter length substring within longtext. +* @return {?Array.} Five element Array, containing the prefix of +* longtext, the suffix of longtext, the prefix of shorttext, the suffix +* of shorttext and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatchI(longtext, shorttext, i) + -- Start with a 1/4 length substring at position i as a seed. + local seed = strsub(longtext, i, i + floor(#longtext / 4)) + local j = 0 -- LUANOTE: do not change to 1, was originally -1 + local best_common = '' + local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b + while true do + j = indexOf(shorttext, seed, j + 1) + if (j == nil) then + break + end + local prefixLength = _diff_commonPrefix(strsub(longtext, i), + strsub(shorttext, j)) + local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1), + strsub(shorttext, 1, j - 1)) + if #best_common < suffixLength + prefixLength then + best_common = strsub(shorttext, j - suffixLength, j - 1) + .. strsub(shorttext, j, j + prefixLength - 1) + best_longtext_a = strsub(longtext, 1, i - suffixLength - 1) + best_longtext_b = strsub(longtext, i + prefixLength) + best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1) + best_shorttext_b = strsub(shorttext, j + prefixLength) + end + end + if #best_common * 2 >= #longtext then + return {best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common} + else + return nil + end +end + +--[[ +* Do the two texts share a substring which is at least half the length of the +* longer text? +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {?Array.} Five element Array, containing the prefix of +* text1, the suffix of text1, the prefix of text2, the suffix of +* text2 and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatch(text1, text2) + if Diff_Timeout <= 0 then + -- Don't risk returning a non-optimal diff if we have unlimited time. + return nil + end + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + if (#longtext < 4) or (#shorttext * 2 < #longtext) then + return nil -- Pointless. + end + + -- First check if the second quarter is the seed for a half-match. + local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4)) + -- Check again based on the third quarter. + local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2)) + local hm + if not hm1 and not hm2 then + return nil + elseif not hm2 then + hm = hm1 + elseif not hm1 then + hm = hm2 + else + -- Both matched. Select the longest. + hm = (#hm1[5] > #hm2[5]) and hm1 or hm2 + end + + -- A half-match was found, sort out the return data. + local text1_a, text1_b, text2_a, text2_b + if (#text1 > #text2) then + text1_a, text1_b = hm[1], hm[2] + text2_a, text2_b = hm[3], hm[4] + else + text2_a, text2_b = hm[1], hm[2] + text1_a, text1_b = hm[3], hm[4] + end + local mid_common = hm[5] + return text1_a, text1_b, text2_a, text2_b, mid_common +end + +--[[ +* Given two strings, compute a score representing whether the internal +* boundary falls on logical boundaries. +* Scores range from 6 (best) to 0 (worst). +* @param {string} one First string. +* @param {string} two Second string. +* @return {number} The score. +* @private +--]] +function _diff_cleanupSemanticScore(one, two) + if (#one == 0) or (#two == 0) then + -- Edges are the best. + return 6 + end + + -- Each port of this function behaves slightly differently due to + -- subtle differences in each language's definition of things like + -- 'whitespace'. Since this function's purpose is largely cosmetic, + -- the choice has been made to use each language's native features + -- rather than force total conformity. + local char1 = strsub(one, -1) + local char2 = strsub(two, 1, 1) + local nonAlphaNumeric1 = strmatch(char1, '%W') + local nonAlphaNumeric2 = strmatch(char2, '%W') + local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s') + local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s') + local lineBreak1 = whitespace1 and strmatch(char1, '%c') + local lineBreak2 = whitespace2 and strmatch(char2, '%c') + local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$') + local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n') + + if blankLine1 or blankLine2 then + -- Five points for blank lines. + return 5 + elseif lineBreak1 or lineBreak2 then + -- Four points for line breaks. + return 4 + elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then + -- Three points for end of sentences. + return 3 + elseif whitespace1 or whitespace2 then + -- Two points for whitespace. + return 2 + elseif nonAlphaNumeric1 or nonAlphaNumeric2 then + -- One point for non-alphanumeric. + return 1 + end + return 0 +end + +--[[ +* Look for single edits surrounded on both sides by equalities +* which can be shifted sideways to align the edit to a word boundary. +* e.g: The cat came. -> The cat came. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupSemanticLossless(diffs) + local pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while diffs[pointer + 1] do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + + local equality1 = prevDiff[2] + local edit = diff[2] + local equality2 = nextDiff[2] + + -- First, shift the edit as far left as possible. + local commonOffset = _diff_commonSuffix(equality1, edit) + if commonOffset > 0 then + local commonString = strsub(edit, -commonOffset) + equality1 = strsub(equality1, 1, -commonOffset - 1) + edit = commonString .. strsub(edit, 1, -commonOffset - 1) + equality2 = commonString .. equality2 + end + + -- Second, step character by character right, looking for the best fit. + local bestEquality1 = equality1 + local bestEdit = edit + local bestEquality2 = equality2 + local bestScore = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2) + + while strbyte(edit, 1) == strbyte(equality2, 1) do + equality1 = equality1 .. strsub(edit, 1, 1) + edit = strsub(edit, 2) .. strsub(equality2, 1, 1) + equality2 = strsub(equality2, 2) + local score = _diff_cleanupSemanticScore(equality1, edit) + + _diff_cleanupSemanticScore(edit, equality2) + -- The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore then + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + end + end + if prevDiff[2] ~= bestEquality1 then + -- We have an improvement, save it back to the diff. + if #bestEquality1 > 0 then + diffs[pointer - 1][2] = bestEquality1 + else + tremove(diffs, pointer - 1) + pointer = pointer - 1 + end + diffs[pointer][2] = bestEdit + if #bestEquality2 > 0 then + diffs[pointer + 1][2] = bestEquality2 + else + tremove(diffs, pointer + 1, 1) + pointer = pointer - 1 + end + end + end + pointer = pointer + 1 + end +end + +--[[ +* Reorder and merge like edit sections. Merge equalities. +* Any edit section can move as long as it doesn't cross an equality. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupMerge(diffs) + diffs[#diffs + 1] = {DIFF_EQUAL, ''} -- Add a dummy entry at the end. + local pointer = 1 + local count_delete, count_insert = 0, 0 + local text_delete, text_insert = '', '' + local commonlength + while diffs[pointer] do + local diff_type = diffs[pointer][1] + if diff_type == DIFF_INSERT then + count_insert = count_insert + 1 + text_insert = text_insert .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_DELETE then + count_delete = count_delete + 1 + text_delete = text_delete .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_EQUAL then + -- Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1 then + if (count_delete > 0) and (count_insert > 0) then + -- Factor out any common prefixies. + commonlength = _diff_commonPrefix(text_insert, text_delete) + if commonlength > 0 then + local back_pointer = pointer - count_delete - count_insert + if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL) + then + diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2] + .. strsub(text_insert, 1, commonlength) + else + tinsert(diffs, 1, + {DIFF_EQUAL, strsub(text_insert, 1, commonlength)}) + pointer = pointer + 1 + end + text_insert = strsub(text_insert, commonlength + 1) + text_delete = strsub(text_delete, commonlength + 1) + end + -- Factor out any common suffixies. + commonlength = _diff_commonSuffix(text_insert, text_delete) + if commonlength ~= 0 then + diffs[pointer][2] = + strsub(text_insert, -commonlength) .. diffs[pointer][2] + text_insert = strsub(text_insert, 1, -commonlength - 1) + text_delete = strsub(text_delete, 1, -commonlength - 1) + end + end + -- Delete the offending records and add the merged ones. + if count_delete == 0 then + tsplice(diffs, pointer - count_insert, + count_insert, {DIFF_INSERT, text_insert}) + elseif count_insert == 0 then + tsplice(diffs, pointer - count_delete, + count_delete, {DIFF_DELETE, text_delete}) + else + tsplice(diffs, pointer - count_delete - count_insert, + count_delete + count_insert, + {DIFF_DELETE, text_delete}, {DIFF_INSERT, text_insert}) + end + pointer = pointer - count_delete - count_insert + + (count_delete>0 and 1 or 0) + (count_insert>0 and 1 or 0) + 1 + elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then + -- Merge this equality with the previous one. + diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2] + tremove(diffs, pointer) + else + pointer = pointer + 1 + end + count_insert, count_delete = 0, 0 + text_delete, text_insert = '', '' + end + end + if diffs[#diffs][2] == '' then + diffs[#diffs] = nil -- Remove the dummy entry at the end. + end + + -- Second pass: look for single edits surrounded on both sides by equalities + -- which can be shifted sideways to eliminate an equality. + -- e.g: ABAC -> ABAC + local changes = false + pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while pointer < #diffs do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + local currentText = diff[2] + local prevText = prevDiff[2] + local nextText = nextDiff[2] + if strsub(currentText, -#prevText) == prevText then + -- Shift the edit over the previous equality. + diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1) + nextDiff[2] = prevText .. nextDiff[2] + tremove(diffs, pointer - 1) + changes = true + elseif strsub(currentText, 1, #nextText) == nextText then + -- Shift the edit over the next equality. + prevDiff[2] = prevText .. nextText + diff[2] = strsub(currentText, #nextText + 1) .. nextText + tremove(diffs, pointer + 1) + changes = true + end + end + pointer = pointer + 1 + end + -- If shifts were made, the diff needs reordering and another shift sweep. + if changes then + -- LUANOTE: no return value, but necessary to use 'return' to get + -- tail calls. + return _diff_cleanupMerge(diffs) + end +end + +--[[ +* loc is a location in text1, compute and return the equivalent location in +* text2. +* e.g. 'The cat' vs 'The big cat', 1->1, 5->8 +* @param {Array.>} diffs Array of diff tuples. +* @param {number} loc Location within text1. +* @return {number} Location within text2. +--]] +function _diff_xIndex(diffs, loc) + local chars1 = 1 + local chars2 = 1 + local last_chars1 = 1 + local last_chars2 = 1 + local x + for _x, diff in ipairs(diffs) do + x = _x + if diff[1] ~= DIFF_INSERT then -- Equality or deletion. + chars1 = chars1 + #diff[2] + end + if diff[1] ~= DIFF_DELETE then -- Equality or insertion. + chars2 = chars2 + #diff[2] + end + if chars1 > loc then -- Overshot the location. + break + end + last_chars1 = chars1 + last_chars2 = chars2 + end + -- Was the location deleted? + if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then + return last_chars2 + end + -- Add the remaining character length. + return last_chars2 + (loc - last_chars1) +end + +--[[ +* Compute and return the source text (all equalities and deletions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Source text. +--]] +function _diff_text1(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_INSERT then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Compute and return the destination text (all equalities and insertions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Destination text. +--]] +function _diff_text2(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_DELETE then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Crush the diff into an encoded string which describes the operations +* required to transform text1 into text2. +* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. +* Operations are tab-separated. Inserted text is escaped using %xx notation. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Delta text. +--]] +function _diff_toDelta(diffs) + local text = {} + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if op == DIFF_INSERT then + text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace) + elseif op == DIFF_DELETE then + text[x] = '-' .. #data + elseif op == DIFF_EQUAL then + text[x] = '=' .. #data + end + end + return tconcat(text, '\t') +end + +--[[ +* Given the original text1, and an encoded string which describes the +* operations required to transform text1 into text2, compute the full diff. +* @param {string} text1 Source string for the diff. +* @param {string} delta Delta text. +* @return {Array.>} Array of diff tuples. +* @throws {Errorend If invalid input. +--]] +function _diff_fromDelta(text1, delta) + local diffs = {} + local diffsLength = 0 -- Keeping our own length var is faster + local pointer = 1 -- Cursor in text1 + for token in gmatch(delta, '[^\t]+') do + -- Each token begins with a one character parameter which specifies the + -- operation of this token (delete, insert, equality). + local tokenchar, param = strsub(token, 1, 1), strsub(token, 2) + if (tokenchar == '+') then + local invalidDecode = false + local decoded = gsub(param, '%%(.?.?)', + function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in _diff_fromDelta: ' .. param) + end + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_INSERT, decoded} + elseif (tokenchar == '-') or (tokenchar == '=') then + local n = tonumber(param) + if (n == nil) or (n < 0) then + error('Invalid number in _diff_fromDelta: ' .. param) + end + local text = strsub(text1, pointer, pointer + n - 1) + pointer = pointer + n + if (tokenchar == '=') then + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_EQUAL, text} + else + diffsLength = diffsLength + 1 + diffs[diffsLength] = {DIFF_DELETE, text} + end + else + error('Invalid diff operation in _diff_fromDelta: ' .. token) + end + end + if (pointer ~= #text1 + 1) then + error('Delta length (' .. (pointer - 1) + .. ') does not equal source text length (' .. #text1 .. ').') + end + return diffs +end + +-- --------------------------------------------------------------------------- +-- MATCH API +-- --------------------------------------------------------------------------- + +local _match_bitap, _match_alphabet + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc'. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +--]] +function match_main(text, pattern, loc) + -- Check for null inputs. + if text == nil or pattern == nil or loc == nil then + error('Null inputs. (match_main)') + end + + if text == pattern then + -- Shortcut (potentially not guaranteed by the algorithm) + return 1 + elseif #text == 0 then + -- Nothing to match. + return -1 + end + loc = max(1, min(loc, #text)) + if strsub(text, loc, loc + #pattern - 1) == pattern then + -- Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else + -- Do a fuzzy compare. + return _match_bitap(text, pattern, loc) + end +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE MATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Initialise the alphabet for the Bitap algorithm. +* @param {string} pattern The text to encode. +* @return {Object} Hash of character locations. +* @private +--]] +function _match_alphabet(pattern) + local s = {} + local i = 0 + for c in gmatch(pattern, '.') do + s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1)) + i = i + 1 + end + return s +end + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc' using the +* Bitap algorithm. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +* @private +--]] +function _match_bitap(text, pattern, loc) + if #pattern > Match_MaxBits then + error('Pattern too long.') + end + + -- Initialise the alphabet. + local s = _match_alphabet(pattern) + + --[[ + * Compute and return the score for a match with e errors and x location. + * Accesses loc and pattern through being a closure. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + --]] + local function _match_bitapScore(e, x) + local accuracy = e / #pattern + local proximity = abs(loc - x) + if (Match_Distance == 0) then + -- Dodge divide by zero error. + return (proximity == 0) and 1 or accuracy + end + return accuracy + (proximity / Match_Distance) + end + + -- Highest score beyond which we give up. + local score_threshold = Match_Threshold + -- Is there a nearby exact match? (speedup) + local best_loc = indexOf(text, pattern, loc) + if best_loc then + score_threshold = min(_match_bitapScore(0, best_loc), score_threshold) + -- LUANOTE: Ideally we'd also check from the other direction, but Lua + -- doesn't have an efficent lastIndexOf function. + end + + -- Initialise the bit arrays. + local matchmask = lshift(1, #pattern - 1) + best_loc = -1 + + local bin_min, bin_mid + local bin_max = #pattern + #text + local last_rd + for d = 0, #pattern - 1, 1 do + -- Scan for the best match; each iteration allows for one more error. + -- Run a binary search to determine how far from 'loc' we can stray at this + -- error level. + bin_min = 0 + bin_mid = bin_max + while (bin_min < bin_mid) do + if (_match_bitapScore(d, loc + bin_mid) <= score_threshold) then + bin_min = bin_mid + else + bin_max = bin_mid + end + bin_mid = floor(bin_min + (bin_max - bin_min) / 2) + end + -- Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + local start = max(1, loc - bin_mid + 1) + local finish = min(loc + bin_mid, #text) + #pattern + + local rd = {} + for j = start, finish do + rd[j] = 0 + end + rd[finish + 1] = lshift(1, d) - 1 + for j = finish, start, -1 do + local charMatch = s[strsub(text, j - 1, j - 1)] or 0 + if (d == 0) then -- First pass: exact match. + rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch) + else + -- Subsequent passes: fuzzy match. + -- Functions instead of operators make this hella messy. + rd[j] = bor( + band( + bor( + lshift(rd[j + 1], 1), + 1 + ), + charMatch + ), + bor( + bor( + lshift(bor(last_rd[j + 1], last_rd[j]), 1), + 1 + ), + last_rd[j + 1] + ) + ) + end + if (band(rd[j], matchmask) ~= 0) then + local score = _match_bitapScore(d, j - 1) + -- This match will almost certainly be better than any existing match. + -- But check anyway. + if (score <= score_threshold) then + -- Told you so. + score_threshold = score + best_loc = j - 1 + if (best_loc > loc) then + -- When passing loc, don't exceed our current distance from loc. + start = max(1, loc * 2 - best_loc) + else + -- Already passed loc, downhill from here on in. + break + end + end + end + end + -- No hope for a (better) match at greater error levels. + if (_match_bitapScore(d + 1, loc) > score_threshold) then + break + end + last_rd = rd + end + return best_loc +end + +-- ----------------------------------------------------------------------------- +-- PATCH API +-- ----------------------------------------------------------------------------- + +local _patch_addContext, + _patch_deepCopy, + _patch_addPadding, + _patch_splitMax, + _patch_appendText, + _new_patch_obj + +--[[ +* Compute a list of patches to turn text1 into text2. +* Use diffs if provided, otherwise compute it ourselves. +* There are four ways to call this function, depending on what data is +* available to the caller: +* Method 1: +* a = text1, b = text2 +* Method 2: +* a = diffs +* Method 3 (optimal): +* a = text1, b = diffs +* Method 4 (deprecated, use method 3): +* a = text1, b = text2, c = diffs +* +* @param {string|Array.>} a text1 (methods 1,3,4) or +* Array of diff tuples for text1 to text2 (method 2). +* @param {string|Array.>} opt_b text2 (methods 1,4) or +* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). +* @param {string|Array.>} opt_c Array of diff tuples for +* text1 to text2 (method 4) or undefined (methods 1,2,3). +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function patch_make(a, opt_b, opt_c) + local text1, diffs + local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c) + if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then + -- Method 1: text1, text2 + -- Compute diffs from text1 and text2. + text1 = a + diffs = diff_main(text1, opt_b, true) + if (#diffs > 2) then + diff_cleanupSemantic(diffs) + diff_cleanupEfficiency(diffs) + end + elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then + -- Method 2: diffs + -- Compute text1 from diffs. + diffs = a + text1 = _diff_text1(diffs) + elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then + -- Method 3: text1, diffs + text1 = a + diffs = opt_b + elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table') + then + -- Method 4: text1, text2, diffs + -- text2 is not used. + text1 = a + diffs = opt_c + else + error('Unknown call format to patch_make.') + end + + if (diffs[1] == nil) then + return {} -- Get rid of the null case. + end + + local patches = {} + local patch = _new_patch_obj() + local patchDiffLength = 0 -- Keeping our own length var is faster. + local char_count1 = 0 -- Number of characters into the text1 string. + local char_count2 = 0 -- Number of characters into the text2 string. + -- Start with text1 (prepatch_text) and apply the diffs until we arrive at + -- text2 (postpatch_text). We recreate the patches one by one to determine + -- context info. + local prepatch_text, postpatch_text = text1, text1 + for x, diff in ipairs(diffs) do + local diff_type, diff_text = diff[1], diff[2] + + if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then + -- A new patch starts here. + patch.start1 = char_count1 + 1 + patch.start2 = char_count2 + 1 + end + + if (diff_type == DIFF_INSERT) then + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length2 = patch.length2 + #diff_text + postpatch_text = strsub(postpatch_text, 1, char_count2) + .. diff_text .. strsub(postpatch_text, char_count2 + 1) + elseif (diff_type == DIFF_DELETE) then + patch.length1 = patch.length1 + #diff_text + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + postpatch_text = strsub(postpatch_text, 1, char_count2) + .. strsub(postpatch_text, char_count2 + #diff_text + 1) + elseif (diff_type == DIFF_EQUAL) then + if (#diff_text <= Patch_Margin * 2) + and (patchDiffLength ~= 0) and (#diffs ~= x) then + -- Small equality inside a patch. + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length1 = patch.length1 + #diff_text + patch.length2 = patch.length2 + #diff_text + elseif (#diff_text >= Patch_Margin * 2) then + -- Time for a new patch. + if (patchDiffLength ~= 0) then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + patch = _new_patch_obj() + patchDiffLength = 0 + -- Unlike Unidiff, our patch lists have a rolling context. + -- http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + -- Update prepatch text & pos to reflect the application of the + -- just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + end + end + end + + -- Update the current character count. + if (diff_type ~= DIFF_INSERT) then + char_count1 = char_count1 + #diff_text + end + if (diff_type ~= DIFF_DELETE) then + char_count2 = char_count2 + #diff_text + end + end + + -- Pick up the leftover patch if not empty. + if (patchDiffLength > 0) then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + end + + return patches +end + +--[[ +* Merge a set of patches onto the text. Return a patched text, as well +* as a list of true/false values indicating which patches were applied. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @param {string} text Old text. +* @return {Array.>} Two return values, the +* new text and an array of boolean values. +--]] +function patch_apply(patches, text) + if patches[1] == nil then + return text, {} + end + + -- Deep copy the patches so that no changes are made to originals. + patches = _patch_deepCopy(patches) + + local nullPadding = _patch_addPadding(patches) + text = nullPadding .. text .. nullPadding + + _patch_splitMax(patches) + -- delta keeps track of the offset between the expected and actual location + -- of the previous patch. If there are patches expected at positions 10 and + -- 20, but the first patch was found at 12, delta is 2 and the second patch + -- has an effective expected position of 22. + local delta = 0 + local results = {} + for x, patch in ipairs(patches) do + local expected_loc = patch.start2 + delta + local text1 = _diff_text1(patch.diffs) + local start_loc + local end_loc = -1 + if #text1 > Match_MaxBits then + -- _patch_splitMax will only provide an oversized pattern in + -- the case of a monster delete. + start_loc = match_main(text, + strsub(text1, 1, Match_MaxBits), expected_loc) + if start_loc ~= -1 then + end_loc = match_main(text, strsub(text1, -Match_MaxBits), + expected_loc + #text1 - Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc then + -- Can't find valid trailing context. Drop this patch. + start_loc = -1 + end + end + else + start_loc = match_main(text, text1, expected_loc) + end + if start_loc == -1 then + -- No match found. :( + results[x] = false + -- Subtract the delta for this failed patch from subsequent patches. + delta = delta - patch.length2 - patch.length1 + else + -- Found a match. :) + results[x] = true + delta = start_loc - expected_loc + local text2 + if end_loc == -1 then + text2 = strsub(text, start_loc, start_loc + #text1 - 1) + else + text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1) + end + if text1 == text2 then + -- Perfect match, just shove the replacement text in. + text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs) + .. strsub(text, start_loc + #text1) + else + -- Imperfect match. Run a diff to get a framework of equivalent + -- indices. + local diffs = diff_main(text1, text2, false) + if (#text1 > Match_MaxBits) + and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then + -- The end points match, but the content is unacceptably bad. + results[x] = false + else + _diff_cleanupSemanticLossless(diffs) + local index1 = 1 + local index2 + for y, mod in ipairs(patch.diffs) do + if mod[1] ~= DIFF_EQUAL then + index2 = _diff_xIndex(diffs, index1) + end + if mod[1] == DIFF_INSERT then + text = strsub(text, 1, start_loc + index2 - 2) + .. mod[2] .. strsub(text, start_loc + index2 - 1) + elseif mod[1] == DIFF_DELETE then + text = strsub(text, 1, start_loc + index2 - 2) .. strsub(text, + start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1)) + end + if mod[1] ~= DIFF_DELETE then + index1 = index1 + #mod[2] + end + end + end + end + end + end + -- Strip the padding off. + text = strsub(text, #nullPadding + 1, -#nullPadding - 1) + return text, results +end + +--[[ +* Take a list of patches and return a textual representation. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} Text representation of patches. +--]] +function patch_toText(patches) + local text = {} + for x, patch in ipairs(patches) do + _patch_appendText(patch, text) + end + return tconcat(text) +end + +--[[ +* Parse a textual representation of patches and return a list of patch objects. +* @param {string} textline Text representation of patches. +* @return {Array.<_new_patch_obj>} Array of patch objects. +* @throws {Error} If invalid input. +--]] +function patch_fromText(textline) + local patches = {} + if (#textline == 0) then + return patches + end + local text = {} + for line in gmatch(textline, '([^\n]*)') do + text[#text + 1] = line + end + local textPointer = 1 + while (textPointer <= #text) do + local start1, length1, start2, length2 + = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$') + if (start1 == nil) then + error('Invalid patch string: "' .. text[textPointer] .. '"') + end + local patch = _new_patch_obj() + patches[#patches + 1] = patch + + start1 = tonumber(start1) + length1 = tonumber(length1) or 1 + if (length1 == 0) then + start1 = start1 + 1 + end + patch.start1 = start1 + patch.length1 = length1 + + start2 = tonumber(start2) + length2 = tonumber(length2) or 1 + if (length2 == 0) then + start2 = start2 + 1 + end + patch.start2 = start2 + patch.length2 = length2 + + textPointer = textPointer + 1 + + while true do + local line = text[textPointer] + if (line == nil) then + break + end + local sign; sign, line = strsub(line, 1, 1), strsub(line, 2) + + local invalidDecode = false + local decoded = gsub(line, '%%(.?.?)', + function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in patch_fromText: ' .. line) + end + + line = decoded + + if (sign == '-') then + -- Deletion. + patch.diffs[#patch.diffs + 1] = {DIFF_DELETE, line} + elseif (sign == '+') then + -- Insertion. + patch.diffs[#patch.diffs + 1] = {DIFF_INSERT, line} + elseif (sign == ' ') then + -- Minor equality. + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, line} + elseif (sign == '@') then + -- Start of next patch. + break + elseif (sign == '') then + -- Blank line? Whatever. + else + -- WTF? + error('Invalid patch mode "' .. sign .. '" in: ' .. line) + end + textPointer = textPointer + 1 + end + end + return patches +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE PATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +local patch_meta = { + __tostring = function(patch) + local buf = {} + _patch_appendText(patch, buf) + return tconcat(buf) + end +} + +--[[ +* Class representing one patch operation. +* @constructor +--]] +function _new_patch_obj() + return setmetatable({ + --[[ @type {Array.>} ]] + diffs = {}; + --[[ @type {?number} ]] + start1 = 1; -- nil; + --[[ @type {?number} ]] + start2 = 1; -- nil; + --[[ @type {number} ]] + length1 = 0; + --[[ @type {number} ]] + length2 = 0; + }, patch_meta) +end + +--[[ +* Increase the context until it is unique, +* but don't let the pattern expand beyond Match_MaxBits. +* @param {_new_patch_obj} patch The patch to grow. +* @param {string} text Source text. +* @private +--]] +function _patch_addContext(patch, text) + if (#text == 0) then + return + end + local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1) + local padding = 0 + + -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly + -- different logic here than in other language ports. + -- Look for the first two matches of pattern in text. If two are found, + -- increase the pattern length. + local firstMatch = indexOf(text, pattern) + local secondMatch = nil + if (firstMatch ~= nil) then + secondMatch = indexOf(text, pattern, firstMatch + 1) + end + while (#pattern == 0 or secondMatch ~= nil) + and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do + padding = padding + Patch_Margin + pattern = strsub(text, max(1, patch.start2 - padding), + patch.start2 + patch.length1 - 1 + padding) + firstMatch = indexOf(text, pattern) + if (firstMatch ~= nil) then + secondMatch = indexOf(text, pattern, firstMatch + 1) + else + secondMatch = nil + end + end + -- Add one chunk for good luck. + padding = padding + Patch_Margin + + -- Add the prefix. + local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1) + if (#prefix > 0) then + tinsert(patch.diffs, 1, {DIFF_EQUAL, prefix}) + end + -- Add the suffix. + local suffix = strsub(text, patch.start2 + patch.length1, + patch.start2 + patch.length1 - 1 + padding) + if (#suffix > 0) then + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, suffix} + end + + -- Roll back the start points. + patch.start1 = patch.start1 - #prefix + patch.start2 = patch.start2 - #prefix + -- Extend the lengths. + patch.length1 = patch.length1 + #prefix + #suffix + patch.length2 = patch.length2 + #prefix + #suffix +end + +--[[ +* Given an array of patches, return another array that is identical. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function _patch_deepCopy(patches) + local patchesCopy = {} + for x, patch in ipairs(patches) do + local patchCopy = _new_patch_obj() + local diffsCopy = {} + for i, diff in ipairs(patch.diffs) do + diffsCopy[i] = {diff[1], diff[2]} + end + patchCopy.diffs = diffsCopy + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy[x] = patchCopy + end + return patchesCopy +end + +--[[ +* Add some padding on text start and end so that edges can match something. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} The padding string added to each side. +--]] +function _patch_addPadding(patches) + local paddingLength = Patch_Margin + local nullPadding = '' + for x = 1, paddingLength do + nullPadding = nullPadding .. strchar(x) + end + + -- Bump all the patches forward. + for x, patch in ipairs(patches) do + patch.start1 = patch.start1 + paddingLength + patch.start2 = patch.start2 + paddingLength + end + + -- Add some padding on start of first diff. + local patch = patches[1] + local diffs = patch.diffs + local firstDiff = diffs[1] + if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + tinsert(diffs, 1, {DIFF_EQUAL, nullPadding}) + patch.start1 = patch.start1 - paddingLength -- Should be 0. + patch.start2 = patch.start2 - paddingLength -- Should be 0. + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif (paddingLength > #firstDiff[2]) then + -- Grow first equality. + local extraLength = paddingLength - #firstDiff[2] + firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2] + patch.start1 = patch.start1 - extraLength + patch.start2 = patch.start2 - extraLength + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + -- Add some padding on end of last diff. + patch = patches[#patches] + diffs = patch.diffs + local lastDiff = diffs[#diffs] + if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + diffs[#diffs + 1] = {DIFF_EQUAL, nullPadding} + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif (paddingLength > #lastDiff[2]) then + -- Grow last equality. + local extraLength = paddingLength - #lastDiff[2] + lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength) + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + return nullPadding +end + +--[[ +* Look through the patches and break up any which are longer than the maximum +* limit of the match algorithm. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +--]] +function _patch_splitMax(patches) + local patch_size = Match_MaxBits + local x = 1 + while true do + local patch = patches[x] + if patch == nil then + return + end + if patch.length1 > patch_size then + local bigpatch = patch + -- Remove the big old patch. + tremove(patches, x) + x = x - 1 + local start1 = bigpatch.start1 + local start2 = bigpatch.start2 + local precontext = '' + while bigpatch.diffs[1] do + -- Create one of several smaller patches. + local patch = _new_patch_obj() + local empty = true + patch.start1 = start1 - #precontext + patch.start2 = start2 - #precontext + if precontext ~= '' then + patch.length1, patch.length2 = #precontext, #precontext + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, precontext} + end + while bigpatch.diffs[1] and (patch.length1 < patch_size-Patch_Margin) do + local diff_type = bigpatch.diffs[1][1] + local diff_text = bigpatch.diffs[1][2] + if (diff_type == DIFF_INSERT) then + -- Insertions are harmless. + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + patch.diffs[#(patch.diffs) + 1] = bigpatch.diffs[1] + tremove(bigpatch.diffs, 1) + empty = false + elseif (diff_type == DIFF_DELETE) and (#patch.diffs == 1) + and (patch.diffs[1][1] == DIFF_EQUAL) + and (#diff_text > 2 * patch_size) then + -- This is a large deletion. Let it pass in one chunk. + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + empty = false + patch.diffs[#patch.diffs + 1] = {diff_type, diff_text} + tremove(bigpatch.diffs, 1) + else + -- Deletion or equality. + -- Only take as much as we can stomach. + diff_text = strsub(diff_text, 1, + patch_size - patch.length1 - Patch_Margin) + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + if (diff_type == DIFF_EQUAL) then + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + else + empty = false + end + patch.diffs[#patch.diffs + 1] = {diff_type, diff_text} + if (diff_text == bigpatch.diffs[1][2]) then + tremove(bigpatch.diffs, 1) + else + bigpatch.diffs[1][2] + = strsub(bigpatch.diffs[1][2], #diff_text + 1) + end + end + end + -- Compute the head context for the next patch. + precontext = _diff_text2(patch.diffs) + precontext = strsub(precontext, -Patch_Margin) + -- Append the end context for this patch. + local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin) + if postcontext ~= '' then + patch.length1 = patch.length1 + #postcontext + patch.length2 = patch.length2 + #postcontext + if patch.diffs[1] + and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then + patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2] + .. postcontext + else + patch.diffs[#patch.diffs + 1] = {DIFF_EQUAL, postcontext} + end + end + if not empty then + x = x + 1 + tinsert(patches, x, patch) + end + end + end + x = x + 1 + end +end + +--[[ +* Emulate GNU diff's format. +* Header: @@ -382,8 +481,9 @@ +* @return {string} The GNU diff string. +--]] +function _patch_appendText(patch, text) + local coords1, coords2 + local length1, length2 = patch.length1, patch.length2 + local start1, start2 = patch.start1, patch.start2 + local diffs = patch.diffs + + if length1 == 1 then + coords1 = start1 + else + coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1 + end + + if length2 == 1 then + coords2 = start2 + else + coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2 + end + text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n' + + local op + -- Escape the body of the patch with %xx notation. + for x, diff in ipairs(patch.diffs) do + local diff_type = diff[1] + if diff_type == DIFF_INSERT then + op = '+' + elseif diff_type == DIFF_DELETE then + op = '-' + elseif diff_type == DIFF_EQUAL then + op = ' ' + end + text[#text + 1] = op + .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace) + .. '\n' + end + + return text +end + +-- Expose the API +local _M = {} + +_M.DIFF_DELETE = DIFF_DELETE +_M.DIFF_INSERT = DIFF_INSERT +_M.DIFF_EQUAL = DIFF_EQUAL + +_M.diff_main = diff_main +_M.diff_cleanupSemantic = diff_cleanupSemantic +_M.diff_cleanupEfficiency = diff_cleanupEfficiency +_M.diff_levenshtein = diff_levenshtein +_M.diff_prettyHtml = diff_prettyHtml + +_M.match_main = match_main + +_M.patch_make = patch_make +_M.patch_toText = patch_toText +_M.patch_fromText = patch_fromText +_M.patch_apply = patch_apply + +-- Expose some non-API functions as well, for testing purposes etc. +_M.diff_commonPrefix = _diff_commonPrefix +_M.diff_commonSuffix = _diff_commonSuffix +_M.diff_commonOverlap = _diff_commonOverlap +_M.diff_halfMatch = _diff_halfMatch +_M.diff_bisect = _diff_bisect +_M.diff_cleanupMerge = _diff_cleanupMerge +_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless +_M.diff_text1 = _diff_text1 +_M.diff_text2 = _diff_text2 +_M.diff_toDelta = _diff_toDelta +_M.diff_fromDelta = _diff_fromDelta +_M.diff_xIndex = _diff_xIndex +_M.match_alphabet = _match_alphabet +_M.match_bitap = _match_bitap +_M.new_patch_obj = _new_patch_obj +_M.patch_addContext = _patch_addContext +_M.patch_splitMax = _patch_splitMax +_M.patch_addPadding = _patch_addPadding +_M.settings = settings + +return _M diff --git a/lua/diff_match_patch_test.lua b/lua/diff_match_patch_test.lua new file mode 100644 index 0000000..943fced --- /dev/null +++ b/lua/diff_match_patch_test.lua @@ -0,0 +1,1201 @@ +--[[ +* Diff Match and Patch -- Test Harness +* Copyright 2018 The diff-match-patch Authors. +* https://github.com/google/diff-match-patch +* +* Based on the JavaScript implementation by Neil Fraser +* Ported to Lua by Duncan Cross +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +--]] + +local dmp = require 'diff_match_patch' + +local DIFF_INSERT = dmp.DIFF_INSERT +local DIFF_DELETE = dmp.DIFF_DELETE +local DIFF_EQUAL = dmp.DIFF_EQUAL + +-- Utility functions. + +local function pretty(v) + if (type(v) == 'string') then + return string.format('%q', v):gsub('\\\n', '\\n') + elseif (type(v) == 'table') then + local str = {} + local next_i = 1 + for i, v in pairs(v) do + if (i == next_i) then + next_i = next_i + 1 + str[#str + 1] = pretty(v) + else + str[#str + 1] = '[' .. pretty(i) .. ']=' .. pretty(v) + end + end + return '{' .. table.concat(str, ',') .. '}' + else + return tostring(v) + end +end + +function assertEquals(...) + local msg, expected, actual + if (select('#', ...) == 2) then + expected, actual = ... + msg = 'Expected: \'' .. pretty(expected) + .. '\' Actual: \'' .. pretty(actual) .. '\'' + else + msg, expected, actual = ... + end + assert(expected == actual, msg) +end + +function assertTrue(...) + local msg, actual + if (select('#', ...) == 1) then + actual = ... + assertEquals(true, actual) + else + msg, actual = ... + assertEquals(msg, true, actual) + end +end + +function assertFalse(...) + local msg, actual + if (select('#', ...) == 1) then + actual = ... + assertEquals(flase, actual) + else + msg, actual = ... + assertEquals(msg, false, actual) + end +end + +-- If expected and actual are the equivalent, pass the test. +function assertEquivalent(...) + local msg, expected, actual + expected, actual = ... + msg = 'Expected: \'' .. pretty(expected) + .. '\' Actual: \'' .. pretty(actual) .. '\'' + if (_equivalent(expected, actual)) then + assertEquals(msg, pretty(expected), pretty(actual)) + else + assertEquals(msg, expected, actual) + end +end + +-- Are a and b the equivalent? -- Recursive. +function _equivalent(a, b) + if (a == b) then + return true + end + if (type(a) == 'table') and (type(b) == 'table') then + for k, v in pairs(a) do + if not _equivalent(v, b[k]) then + return false + end + end + for k, v in pairs(b) do + if not _equivalent(v, a[k]) then + return false + end + end + return true + end + return false +end + +function diff_rebuildtexts(diffs) + -- Construct the two texts which made up the diff originally. + local text1, text2 = {}, {} + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if (op ~= DIFF_INSERT) then + text1[#text1 + 1] = data + end + if (op ~= DIFF_DELETE) then + text2[#text2 + 1] = data + end + end + return table.concat(text1), table.concat(text2) +end + + +-- DIFF TEST FUNCTIONS + + +function testDiffCommonPrefix() + -- Detect any common prefix. + + -- Null case. + assertEquals(0, dmp.diff_commonPrefix('abc', 'xyz')) + -- Non-null case. + assertEquals(4, dmp.diff_commonPrefix('1234abcdef', '1234xyz')) + -- Whole case. + assertEquals(4, dmp.diff_commonPrefix('1234', '1234xyz')) +end + +function testDiffCommonSuffix() + -- Detect any common suffix. + + -- Null case. + assertEquals(0, dmp.diff_commonSuffix('abc', 'xyz')) + -- Non-null case. + assertEquals(4, dmp.diff_commonSuffix('abcdef1234', 'xyz1234')) + -- Whole case. + assertEquals(4, dmp.diff_commonSuffix('1234', 'xyz1234')) +end + +function testDiffCommonOverlap() + -- Detect any suffix/prefix overlap. + + -- Null case. + assertEquals(0, dmp.diff_commonOverlap('', 'abcd')); + -- Whole case. + assertEquals(3, dmp.diff_commonOverlap('abc', 'abcd')); + -- No overlap. + assertEquals(0, dmp.diff_commonOverlap('123456', 'abcd')); + -- Overlap. + assertEquals(3, dmp.diff_commonOverlap('123456xxx', 'xxxabcd')); + --[[ + -- Unicode. + -- Some overly clever languages (C#) may treat ligatures as equal to their + -- component letters. E.g. U+FB01 == 'fi' + -- LUANOTE: No ability to handle Unicode. + assertEquals(0, dmp.diff_commonOverlap('fi', '\ufb01i')); + --]] +end + +function testDiffHalfMatch() + -- Detect a halfmatch. + dmp.settings{Diff_Timeout = 1} + + -- No match. + assertEquivalent({nil}, {dmp.diff_halfMatch('1234567890', 'abcdef')}) + assertEquivalent({nil}, {dmp.diff_halfMatch('12345', '23')}) + -- Single Match. + assertEquivalent({'12', '90', 'a', 'z', '345678'}, + {dmp.diff_halfMatch('1234567890', 'a345678z')}) + assertEquivalent({'a', 'z', '12', '90', '345678'}, + {dmp.diff_halfMatch('a345678z', '1234567890')}) + assertEquivalent({'abc', 'z', '1234', '0', '56789'}, + {dmp.diff_halfMatch('abc56789z', '1234567890')}) + assertEquivalent({'a', 'xyz', '1', '7890', '23456'}, + {dmp.diff_halfMatch('a23456xyz', '1234567890')}) + -- Multiple Matches. + assertEquivalent({'12123', '123121', 'a', 'z', '1234123451234'}, + {dmp.diff_halfMatch('121231234123451234123121', 'a1234123451234z')}) + assertEquivalent({'', '-=-=-=-=-=', 'x', '', 'x-=-=-=-=-=-=-='}, + {dmp.diff_halfMatch('x-=-=-=-=-=-=-=-=-=-=-=-=', 'xx-=-=-=-=-=-=-=')}) + assertEquivalent({'-=-=-=-=-=', '', '', 'y', '-=-=-=-=-=-=-=y'}, + {dmp.diff_halfMatch('-=-=-=-=-=-=-=-=-=-=-=-=y', '-=-=-=-=-=-=-=yy')}) + + -- Non-optimal halfmatch. + -- Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquivalent({'qHillo', 'w', 'x', 'Hulloy', 'HelloHe'}, + {dmp.diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy')}) + -- Optimal no halfmatch. + dmp.settings{Diff_Timeout = 0} + assertEquivalent({nill}, {dmp.diff_halfMatch('qHilloHelloHew', 'xHelloHeHulloy')}) +end + +function testDiffCleanupMerge() + -- Cleanup a messy diff. + + -- Null case. + local diffs = {} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({}, diffs) + -- No change case. + diffs = {{DIFF_EQUAL, 'a'}, {DIFF_DELETE, 'b'}, {DIFF_INSERT, 'c'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'a'}, {DIFF_DELETE, 'b'}, {DIFF_INSERT, 'c'}}, + diffs) + -- Merge equalities. + diffs = {{DIFF_EQUAL, 'a'}, {DIFF_EQUAL, 'b'}, {DIFF_EQUAL, 'c'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'abc'}}, diffs) + -- Merge deletions. + diffs = {{DIFF_DELETE, 'a'}, {DIFF_DELETE, 'b'}, {DIFF_DELETE, 'c'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_DELETE, 'abc'}}, diffs) + -- Merge insertions. + diffs = {{DIFF_INSERT, 'a'}, {DIFF_INSERT, 'b'}, {DIFF_INSERT, 'c'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_INSERT, 'abc'}}, diffs) + -- Merge interweave. + diffs = {{DIFF_DELETE, 'a'}, {DIFF_INSERT, 'b'}, {DIFF_DELETE, 'c'}, + {DIFF_INSERT, 'd'}, {DIFF_EQUAL, 'e'}, {DIFF_EQUAL, 'f'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_DELETE, 'ac'}, {DIFF_INSERT, 'bd'}, {DIFF_EQUAL, 'ef'}}, + diffs) + -- Prefix and suffix detection. + diffs = {{DIFF_DELETE, 'a'}, {DIFF_INSERT, 'abc'}, {DIFF_DELETE, 'dc'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'a'}, {DIFF_DELETE, 'd'}, + {DIFF_INSERT, 'b'}, {DIFF_EQUAL, 'c'}}, diffs) + -- Prefix and suffix detection with equalities. + diffs = {{DIFF_EQUAL, 'x'}, {DIFF_DELETE, 'a'}, {DIFF_INSERT, 'abc'}, + {DIFF_DELETE, 'dc'}, {DIFF_EQUAL, 'y'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'xa'}, {DIFF_DELETE, 'd'}, + {DIFF_INSERT, 'b'}, {DIFF_EQUAL, 'cy'}}, diffs) + -- Slide edit left. + diffs = {{DIFF_EQUAL, 'a'}, {DIFF_INSERT, 'ba'}, {DIFF_EQUAL, 'c'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_INSERT, 'ab'}, {DIFF_EQUAL, 'ac'}}, diffs) + -- Slide edit right. + diffs = {{DIFF_EQUAL, 'c'}, {DIFF_INSERT, 'ab'}, {DIFF_EQUAL, 'a'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'ca'}, {DIFF_INSERT, 'ba'}}, diffs) + -- Slide edit left recursive. + diffs = {{DIFF_EQUAL, 'a'}, {DIFF_DELETE, 'b'}, {DIFF_EQUAL, 'c'}, + {DIFF_DELETE, 'ac'}, {DIFF_EQUAL, 'x'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_DELETE, 'abc'}, {DIFF_EQUAL, 'acx'}}, diffs) + -- Slide edit right recursive. + diffs = {{DIFF_EQUAL, 'x'}, {DIFF_DELETE, 'ca'}, {DIFF_EQUAL, 'c'}, + {DIFF_DELETE, 'b'}, {DIFF_EQUAL, 'a'}} + dmp.diff_cleanupMerge(diffs) + assertEquivalent({{DIFF_EQUAL, 'xca'}, {DIFF_DELETE, 'cba'}}, diffs) +end + +function testDiffCleanupSemanticLossless() + -- Slide diffs to match logical boundaries. + + -- Null case. + local diffs = {} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({}, diffs) + -- Blank lines. + diffs = {{DIFF_EQUAL, 'AAA\r\n\r\nBBB'}, {DIFF_INSERT, '\r\nDDD\r\n\r\nBBB'}, + {DIFF_EQUAL, '\r\nEEE'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'AAA\r\n\r\n'}, + {DIFF_INSERT, 'BBB\r\nDDD\r\n\r\n'}, {DIFF_EQUAL, 'BBB\r\nEEE'}}, diffs) + -- Line boundaries. + diffs = {{DIFF_EQUAL, 'AAA\r\nBBB'}, {DIFF_INSERT, ' DDD\r\nBBB'}, + {DIFF_EQUAL, ' EEE'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'AAA\r\n'}, {DIFF_INSERT, 'BBB DDD\r\n'}, + {DIFF_EQUAL, 'BBB EEE'}}, diffs) + -- Word boundaries. + diffs = {{DIFF_EQUAL, 'The c'}, {DIFF_INSERT, 'ow and the c'}, + {DIFF_EQUAL, 'at.'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'The '}, {DIFF_INSERT, 'cow and the '}, + {DIFF_EQUAL, 'cat.'}}, diffs) + -- Alphanumeric boundaries. + diffs = {{DIFF_EQUAL, 'The-c'}, {DIFF_INSERT, 'ow-and-the-c'}, + {DIFF_EQUAL, 'at.'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'The-'}, {DIFF_INSERT, 'cow-and-the-'}, + {DIFF_EQUAL, 'cat.'}}, diffs) + -- Hitting the start. + diffs = {{DIFF_EQUAL, 'a'}, {DIFF_DELETE, 'a'}, {DIFF_EQUAL, 'ax'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_DELETE, 'a'}, {DIFF_EQUAL, 'aax'}}, diffs) + -- Hitting the end. + diffs = {{DIFF_EQUAL, 'xa'}, {DIFF_DELETE, 'a'}, {DIFF_EQUAL, 'a'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'xaa'}, {DIFF_DELETE, 'a'}}, diffs) + -- Sentence boundaries. + diffs = {{DIFF_EQUAL, 'The xxx. The '}, {DIFF_INSERT, 'zzz. The '}, + {DIFF_EQUAL, 'yyy.'}} + dmp.diff_cleanupSemanticLossless(diffs) + assertEquivalent({{DIFF_EQUAL, 'The xxx.'}, {DIFF_INSERT, ' The zzz.'}, + {DIFF_EQUAL, ' The yyy.'}}, diffs) +end + +function testDiffCleanupSemantic() + -- Cleanup semantically trivial equalities. + + -- Null case. + local diffs = {} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({}, diffs) + -- No elimination #1. + diffs = {{DIFF_DELETE, 'ab'}, {DIFF_INSERT, 'cd'}, {DIFF_EQUAL, '12'}, + {DIFF_DELETE, 'e'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'ab'}, {DIFF_INSERT, 'cd'}, {DIFF_EQUAL, '12'}, + {DIFF_DELETE, 'e'}}, diffs) + -- No elimination #2. + diffs = {{DIFF_DELETE, 'abc'}, {DIFF_INSERT, 'ABC'}, {DIFF_EQUAL, '1234'}, + {DIFF_DELETE, 'wxyz'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abc'}, {DIFF_INSERT, 'ABC'}, {DIFF_EQUAL, '1234'}, + {DIFF_DELETE, 'wxyz'}}, diffs) + -- Simple elimination. + diffs = {{DIFF_DELETE, 'a'}, {DIFF_EQUAL, 'b'}, {DIFF_DELETE, 'c'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abc'}, {DIFF_INSERT, 'b'}}, diffs) + -- Backpass elimination. + diffs = {{DIFF_DELETE, 'ab'}, {DIFF_EQUAL, 'cd'}, {DIFF_DELETE, 'e'}, + {DIFF_EQUAL, 'f'}, {DIFF_INSERT, 'g'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abcdef'}, {DIFF_INSERT, 'cdfg'}}, diffs) + -- Multiple eliminations. + diffs = {{DIFF_INSERT, '1'}, {DIFF_EQUAL, 'A'}, {DIFF_DELETE, 'B'}, + {DIFF_INSERT, '2'}, {DIFF_EQUAL, '_'}, {DIFF_INSERT, '1'}, + {DIFF_EQUAL, 'A'}, {DIFF_DELETE, 'B'}, {DIFF_INSERT, '2'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'AB_AB'}, {DIFF_INSERT, '1A2_1A2'}}, diffs) + -- Word boundaries. + diffs = {{DIFF_EQUAL, 'The c'}, {DIFF_DELETE, 'ow and the c'}, + {DIFF_EQUAL, 'at.'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_EQUAL, 'The '}, {DIFF_DELETE, 'cow and the '}, + {DIFF_EQUAL, 'cat.'}}, diffs) + -- No overlap elimination. + diffs = {{DIFF_DELETE, 'abcxx'}, {DIFF_INSERT, 'xxdef'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abcxx'}, {DIFF_INSERT, 'xxdef'}}, diffs) + -- Overlap elimination. + diffs = {{DIFF_DELETE, 'abcxxx'}, {DIFF_INSERT, 'xxxdef'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abc'}, {DIFF_EQUAL, 'xxx'}, {DIFF_INSERT, 'def'}}, diffs) + -- Reverse overlap elimination. + diffs = {{DIFF_DELETE, 'xxxabc'}, {DIFF_INSERT, 'defxxx'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_INSERT, 'def'}, {DIFF_EQUAL, 'xxx'}, {DIFF_DELETE, 'abc'}}, diffs) + -- Two overlap eliminations. + diffs = {{DIFF_DELETE, 'abcd1212'}, {DIFF_INSERT, '1212efghi'}, {DIFF_EQUAL, '----'}, {DIFF_DELETE, 'A3'}, {DIFF_INSERT, '3BC'}} + dmp.diff_cleanupSemantic(diffs) + assertEquivalent({{DIFF_DELETE, 'abcd'}, {DIFF_EQUAL, '1212'}, {DIFF_INSERT, 'efghi'}, {DIFF_EQUAL, '----'}, {DIFF_DELETE, 'A'}, {DIFF_EQUAL, '3'}, {DIFF_INSERT, 'BC'}}, diffs) +end + +function testDiffCleanupEfficiency() + -- Cleanup operationally trivial equalities. + local diffs + dmp.settings{Diff_EditCost = 4} + + -- Null case. + diffs = {} + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({}, diffs) + -- No elimination. + diffs = {{DIFF_DELETE, 'ab'}, {DIFF_INSERT, '12'}, {DIFF_EQUAL, 'wxyz'}, + {DIFF_DELETE, 'cd'}, {DIFF_INSERT, '34'}} + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({{DIFF_DELETE, 'ab'}, {DIFF_INSERT, '12'}, + {DIFF_EQUAL, 'wxyz'}, {DIFF_DELETE, 'cd'}, {DIFF_INSERT, '34'}}, diffs) + -- Four-edit elimination. + diffs = {{DIFF_DELETE, 'ab'}, {DIFF_INSERT, '12'}, {DIFF_EQUAL, 'xyz'}, + {DIFF_DELETE, 'cd'}, {DIFF_INSERT, '34'}} + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({ + {DIFF_DELETE, 'abxyzcd'}, + {DIFF_INSERT, '12xyz34'} + }, diffs) + + -- Three-edit elimination. + diffs = { + {DIFF_INSERT, '12'}, + {DIFF_EQUAL, 'x'}, + {DIFF_DELETE, 'cd'}, + {DIFF_INSERT, '34'} + } + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({ + {DIFF_DELETE, 'xcd'}, + {DIFF_INSERT, '12x34'} + }, diffs) + + -- Backpass elimination. + diffs = { + {DIFF_DELETE, 'ab'}, + {DIFF_INSERT, '12'}, + {DIFF_EQUAL, 'xy'}, + {DIFF_INSERT, '34'}, + {DIFF_EQUAL, 'z'}, + {DIFF_DELETE, 'cd'}, + {DIFF_INSERT, '56'} + } + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({ + {DIFF_DELETE, 'abxyzcd'}, + {DIFF_INSERT, '12xy34z56'} + }, diffs) + + -- High cost elimination. + dmp.settings{Diff_EditCost = 5} + diffs = { + {DIFF_DELETE, 'ab'}, + {DIFF_INSERT, '12'}, + {DIFF_EQUAL, 'wxyz'}, + {DIFF_DELETE, 'cd'}, + {DIFF_INSERT, '34'} + } + dmp.diff_cleanupEfficiency(diffs) + assertEquivalent({ + {DIFF_DELETE, 'abwxyzcd'}, + {DIFF_INSERT, '12wxyz34'} + }, diffs) + + dmp.settings{Diff_EditCost = 4} +end + +function testDiffPrettyHtml() + -- Pretty print. + local diffs = { + {DIFF_EQUAL, 'a\n'}, + {DIFF_DELETE, 'b'}, + {DIFF_INSERT, 'c&d'} + } + assertEquals( + '
    ' + .. '<B>b</B>' + .. 'c&d', + dmp.diff_prettyHtml(diffs) + ) +end + +function testDiffText() + -- Compute the source and destination texts. + local diffs = { + {DIFF_EQUAL, 'jump'}, + {DIFF_DELETE, 's'}, + {DIFF_INSERT, 'ed'}, + {DIFF_EQUAL, ' over '}, + {DIFF_DELETE, 'the'}, + {DIFF_INSERT, 'a'}, + {DIFF_EQUAL, ' lazy'} + } + assertEquals('jumps over the lazy', dmp.diff_text1(diffs)) + assertEquals('jumped over a lazy', dmp.diff_text2(diffs)) +end + +function testDiffDelta() + -- Convert a diff into delta string. + local diffs = { + {DIFF_EQUAL, 'jump'}, + {DIFF_DELETE, 's'}, + {DIFF_INSERT, 'ed'}, + {DIFF_EQUAL, ' over '}, + {DIFF_DELETE, 'the'}, + {DIFF_INSERT, 'a'}, + {DIFF_EQUAL, ' lazy'}, + {DIFF_INSERT, 'old dog'} + } + local text1 = dmp.diff_text1(diffs) + assertEquals('jumps over the lazy', text1) + + local delta = dmp.diff_toDelta(diffs) + assertEquals('=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog', delta) + + -- Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta)) + + -- Generates error (19 ~= 20). + success, result = pcall(dmp.diff_fromDelta, text1 .. 'x', delta) + assertEquals(false, success) + + -- Generates error (19 ~= 18). + success, result = pcall(dmp.diff_fromDelta, string.sub(text1, 2), delta) + assertEquals(false, success) + + -- Generates error (%c3%xy invalid Unicode). + success, result = pcall(dmp.patch_fromDelta, '', '+%c3%xy') + assertEquals(false, success) + + --[[ + -- Test deltas with special characters. + -- LUANOTE: No ability to handle Unicode. + diffs = {{DIFF_EQUAL, '\u0680 \000 \t %'}, {DIFF_DELETE, '\u0681 \x01 \n ^'}, {DIFF_INSERT, '\u0682 \x02 \\ |'}} + text1 = dmp.diff_text1(diffs) + assertEquals('\u0680 \x00 \t %\u0681 \x01 \n ^', text1) + + delta = dmp.diff_toDelta(diffs) + assertEquals('=7\t-7\t+%DA%82 %02 %5C %7C', delta) + --]] + + -- Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta)) + + -- Verify pool of unchanged characters. + diffs = { + {DIFF_INSERT, 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? = @ & = + $ , # '} + } + local text2 = dmp.diff_text2(diffs) + assertEquals( + 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? = @ & = + $ , # ', + text2 + ) + + delta = dmp.diff_toDelta(diffs) + assertEquals( + '+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? = @ & = + $ , # ', + delta + ) + + -- Convert delta string into a diff. + assertEquivalent(diffs, dmp.diff_fromDelta('', delta)) +end + +function testDiffXIndex() + -- Translate a location in text1 to text2. + + -- Translation on equality. + assertEquals(6, dmp.diff_xIndex({ + {DIFF_DELETE, 'a'}, + {DIFF_INSERT, '1234'}, + {DIFF_EQUAL, 'xyz'} + }, 3)) + + -- Translation on deletion. + assertEquals(2, dmp.diff_xIndex({ + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, '1234'}, + {DIFF_EQUAL, 'xyz'} + }, 4)) +end + +function testDiffLevenshtein() + -- Levenshtein with trailing equality. + assertEquals(4, dmp.diff_levenshtein({ + {DIFF_DELETE, 'abc'}, + {DIFF_INSERT, '1234'}, + {DIFF_EQUAL, 'xyz'} + })) + -- Levenshtein with leading equality. + assertEquals(4, dmp.diff_levenshtein({ + {DIFF_EQUAL, 'xyz'}, + {DIFF_DELETE, 'abc'}, + {DIFF_INSERT, '1234'} + })) + -- Levenshtein with middle equality. + assertEquals(7, dmp.diff_levenshtein({ + {DIFF_DELETE, 'abc'}, + {DIFF_EQUAL, 'xyz'}, + {DIFF_INSERT, '1234'} + })) +end + +function testDiffBisect() + -- Normal. + local a = 'cat' + local b = 'map' + -- Since the resulting diff hasn't been normalized, it would be ok if + -- the insertion and deletion pairs are swapped. + -- If the order changes, tweak this test as required. + assertEquivalent({ + {DIFF_DELETE, 'c'}, + {DIFF_INSERT, 'm'}, + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, 't'}, + {DIFF_INSERT, 'p'} + }, dmp.diff_bisect(a, b, 2 ^ 31)) + + -- Timeout. + assertEquivalent({ + {DIFF_DELETE, 'cat'}, + {DIFF_INSERT, 'map'} + }, dmp.diff_bisect(a, b, 0)) +end + +function testDiffMain() + -- Perform a trivial diff. + local a,b + + -- Null case. + assertEquivalent({}, dmp.diff_main('', '', false)) + + -- Equality. + assertEquivalent({ + {DIFF_EQUAL, 'abc'} + }, dmp.diff_main('abc', 'abc', false)) + + -- Simple insertion. + assertEquivalent({ + {DIFF_EQUAL, 'ab'}, + {DIFF_INSERT, '123'}, + {DIFF_EQUAL, 'c'} + }, dmp.diff_main('abc', 'ab123c', false)) + + -- Simple deletion. + assertEquivalent({ + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, '123'}, + {DIFF_EQUAL, 'bc'} + }, dmp.diff_main('a123bc', 'abc', false)) + + -- Two insertions. + assertEquivalent({ + {DIFF_EQUAL, 'a'}, + {DIFF_INSERT, '123'}, + {DIFF_EQUAL, 'b'}, + {DIFF_INSERT, '456'}, + {DIFF_EQUAL, 'c'} + }, dmp.diff_main('abc', 'a123b456c', false)) + + -- Two deletions. + assertEquivalent({ + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, '123'}, + {DIFF_EQUAL, 'b'}, + {DIFF_DELETE, '456'}, + {DIFF_EQUAL, 'c'} + }, dmp.diff_main('a123b456c', 'abc', false)) + + -- Perform a real diff. + -- Switch off the timeout. + dmp.settings{ Diff_Timeout=0 } + + -- Simple cases. + assertEquivalent({ + {DIFF_DELETE, 'a'}, + {DIFF_INSERT, 'b'} + }, dmp.diff_main('a', 'b', false)) + + assertEquivalent({ + {DIFF_DELETE, 'Apple'}, + {DIFF_INSERT, 'Banana'}, + {DIFF_EQUAL, 's are a'}, + {DIFF_INSERT, 'lso'}, + {DIFF_EQUAL, ' fruit.'} + }, dmp.diff_main('Apples are a fruit.', 'Bananas are also fruit.', false)) + + --[[ + -- LUANOTE: No ability to handle Unicode. + assertEquivalent({ + {DIFF_DELETE, 'a'}, + {DIFF_INSERT, '\u0680'}, + {DIFF_EQUAL, 'x'}, + {DIFF_DELETE, '\t'}, + {DIFF_INSERT, '\0'} + }, dmp.diff_main('ax\t', '\u0680x\0', false)) + ]]-- + + -- Overlaps. + assertEquivalent({ + {DIFF_DELETE, '1'}, + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, 'y'}, + {DIFF_EQUAL, 'b'}, + {DIFF_DELETE, '2'}, + {DIFF_INSERT, 'xab'} + }, dmp.diff_main('1ayb2', 'abxab', false)) + + assertEquivalent({ + {DIFF_INSERT, 'xaxcx'}, + {DIFF_EQUAL, 'abc'}, + {DIFF_DELETE, 'y'} + }, dmp.diff_main('abcy', 'xaxcxabc', false)) + + assertEquivalent({ + {DIFF_DELETE, 'ABCD'}, + {DIFF_EQUAL, 'a'}, + {DIFF_DELETE, '='}, + {DIFF_INSERT, '-'}, + {DIFF_EQUAL, 'bcd'}, + {DIFF_DELETE, '='}, + {DIFF_INSERT, '-'}, + {DIFF_EQUAL, 'efghijklmnopqrs'}, + {DIFF_DELETE, 'EFGHIJKLMNOefg'} + }, dmp.diff_main('ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg', + 'a-bcd-efghijklmnopqrs', false)) + + -- Large equality. + assertEquivalent({ + {DIFF_INSERT, ' '}, + {DIFF_EQUAL, 'a'}, + {DIFF_INSERT, 'nd'}, + {DIFF_EQUAL, ' [[Pennsylvania]]'}, + {DIFF_DELETE, ' and [[New'} + }, dmp.diff_main('a [[Pennsylvania]] and [[New', + ' and [[Pennsylvania]]', false)) + + -- Timeout. + dmp.settings{Diff_Timeout = 0.1} -- 100ms + -- Increase the text lengths by 1024 times to ensure a timeout. + a = string.rep([[ +`Twas brillig, and the slithy toves +Did gyre and gimble in the wabe: +All mimsy were the borogoves, +And the mome raths outgrabe. +]], 1024) + b = string.rep([[ +I am the very model of a modern major general, +I've information vegetable, animal, and mineral, +I know the kings of England, and I quote the fights historical, +From Marathon to Waterloo, in order categorical. +]], 1024) + local startTime = os.clock() + dmp.diff_main(a, b) + local endTime = os.clock() + -- Test that we took at least the timeout period. + assertTrue(0.1 <= endTime - startTime) + -- Test that we didn't take forever (be forgiving). + -- Theoretically this test could fail very occasionally if the + -- OS task swaps or locks up for a second at the wrong moment. + assertTrue(0.1 * 2 > endTime - startTime) + dmp.settings{Diff_Timeout = 0} + + -- Test the linemode speedup. + -- Must be long to pass the 100 char cutoff. + -- Simple line-mode. + a = string.rep('1234567890\n', 13) + b = string.rep('abcdefghij\n', 13) + assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)) + + -- Single line-mode. + a = string.rep('1234567890', 13) + b = string.rep('abcdefghij', 13) + assertEquivalent(dmp.diff_main(a, b, false), dmp.diff_main(a, b, true)) + + -- Overlap line-mode. + a = string.rep('1234567890\n', 13) + b = [[ +abcdefghij +1234567890 +1234567890 +1234567890 +abcdefghij +1234567890 +1234567890 +1234567890 +abcdefghij +1234567890 +1234567890 +1234567890 +abcdefghij +]] + local texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)) + local texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)) + assertEquivalent(texts_textmode, texts_linemode) + + -- Test null inputs. + success, result = pcall(dmp.diff_main, nil, nil) + assertEquals(false, success) +end + + +-- MATCH TEST FUNCTIONS + + +function testMatchAlphabet() + -- Initialise the bitmasks for Bitap. + -- Unique. + assertEquivalent({a=4, b=2, c=1}, dmp.match_alphabet('abc')) + + -- Duplicates. + assertEquivalent({a=37, b=18, c=8}, dmp.match_alphabet('abcaba')) +end + +function testMatchBitap() + -- Bitap algorithm. + dmp.settings{Match_Distance=100, Match_Threshold=0.5} + + -- Exact matches. + assertEquals(6, dmp.match_bitap('abcdefghijk', 'fgh', 6)) + + assertEquals(6, dmp.match_bitap('abcdefghijk', 'fgh', 1)) + + -- Fuzzy matches. + assertEquals(5, dmp.match_bitap('abcdefghijk', 'efxhi', 1)) + + assertEquals(3, dmp.match_bitap('abcdefghijk', 'cdefxyhijk', 6)) + + assertEquals(-1, dmp.match_bitap('abcdefghijk', 'bxy', 2)) + + -- Overflow. + assertEquals(3, dmp.match_bitap('123456789xx0', '3456789x0', 3)) + + -- Threshold test. + dmp.settings{Match_Threshold = 0.4} + assertEquals(5, dmp.match_bitap('abcdefghijk', 'efxyhi', 2)) + + dmp.settings{Match_Threshold = 0.3} + assertEquals(-1, dmp.match_bitap('abcdefghijk', 'efxyhi', 2)) + + dmp.settings{Match_Threshold = 0.0} + assertEquals(2, dmp.match_bitap('abcdefghijk', 'bcdef', 2)) + dmp.settings{Match_Threshold = 0.5} + + -- Multiple select. + assertEquals(1, dmp.match_bitap('abcdexyzabcde', 'abccde', 4)) + + assertEquals(9, dmp.match_bitap('abcdexyzabcde', 'abccde', 6)) + + -- Distance test. + + dmp.settings{Match_Distance = 10} -- Strict location. + + assertEquals(-1, + dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 25)) + + assertEquals(1, + dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdxxefg', 2)) + + dmp.settings{Match_Distance = 1000} -- Loose location. + + assertEquals(1, + dmp.match_bitap('abcdefghijklmnopqrstuvwxyz', 'abcdefg', 25)) +end + +function testMatchMain() + -- Full match. + -- Shortcut matches. + assertEquals(1, dmp.match_main('abcdef', 'abcdef', 1000)) + + assertEquals(-1, dmp.match_main('', 'abcdef', 2)) + + assertEquals(4, dmp.match_main('abcdef', '', 4)) + + assertEquals(4, dmp.match_main('abcdef', 'de', 4)) + + -- Beyond end match. + assertEquals(4, dmp.match_main("abcdef", "defy", 5)) + + -- Oversized pattern. + assertEquals(1, dmp.match_main("abcdef", "abcdefy", 1)) + + -- Complex match. + assertEquals(5, dmp.match_main( + 'I am the very model of a modern major general.', + ' that berry ', + 6 + )) + + -- Test null inputs. + success, result = pcall(dmp.match_main, nil, nil, 0) + assertEquals(false, success) +end + + +-- PATCH TEST FUNCTIONS + + +function testPatchObj() + -- Patch Object. + local p = dmp.new_patch_obj() + p.start1 = 21 + p.start2 = 22 + p.length1 = 18 + p.length2 = 17 + p.diffs = { + {DIFF_EQUAL, 'jump'}, + {DIFF_DELETE, 's'}, + {DIFF_INSERT, 'ed'}, + {DIFF_EQUAL, ' over '}, + {DIFF_DELETE, 'the'}, + {DIFF_INSERT, 'a'}, + {DIFF_EQUAL, '\nlaz'} + } + local strp = tostring(p) + assertEquals( + '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n', + strp + ) +end + +function testPatchFromText() + local strp + + strp = '' + assertEquivalent({}, dmp.patch_fromText(strp)) + + strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n' + assertEquals(strp, tostring(dmp.patch_fromText(strp)[1])) + + assertEquals( + '@@ -1 +1 @@\n-a\n+b\n', + tostring(dmp.patch_fromText('@@ -1 +1 @@\n-a\n+b\n')[1]) + ) + + assertEquals( + '@@ -1,3 +0,0 @@\n-abc\n', + tostring(dmp.patch_fromText('@@ -1,3 +0,0 @@\n-abc\n')[1]) + ) + + assertEquals( + '@@ -0,0 +1,3 @@\n+abc\n', + tostring(dmp.patch_fromText('@@ -0,0 +1,3 @@\n+abc\n')[1]) + ) + + -- Generates error. + success, result = pcall(dmp.patch_fromText, 'Bad\nPatch\n') + assertEquals(false, success) +end + +function testPatchToText() + local strp, p + + strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n' + p = dmp.patch_fromText(strp) + assertEquals(strp, dmp.patch_toText(p)) + + strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n' + .. '@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n' + p = dmp.patch_fromText(strp) + assertEquals(strp, dmp.patch_toText(p)) +end + +function testPatchAddContext() + local p + dmp.settings{Patch_Margin = 4} + + p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[1] + + dmp.patch_addContext(p, 'The quick brown fox jumps over the lazy dog.') + + assertEquals( + '@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n', + tostring(p) + ) + + -- Same, but not enough trailing context. + p = dmp.patch_fromText('@@ -21,4 +21,10 @@\n-jump\n+somersault\n')[1] + dmp.patch_addContext(p, 'The quick brown fox jumps.') + assertEquals( + '@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n', + tostring(p) + ) + + -- Same, but not enough leading context. + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[1] + dmp.patch_addContext(p, 'The quick brown fox jumps.') + assertEquals('@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n', tostring(p)) + + -- Same, but with ambiguity. + p = dmp.patch_fromText('@@ -3 +3,2 @@\n-e\n+at\n')[1] + dmp.patch_addContext(p, 'The quick brown fox jumps. The quick brown fox crashes.') + assertEquals('@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n', tostring(p)) +end + +function testPatchMake() + -- Null case. + local patches = dmp.patch_make('', '') + assertEquals('', dmp.patch_toText(patches)) + + local text1 = 'The quick brown fox jumps over the lazy dog.' + local text2 = 'That quick brown fox jumped over a lazy dog.' + -- Text2+Text1 inputs. + local expectedPatch = '@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n' + .. '@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n' + -- The second patch must be "-21,17 +21,18", + -- not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Text1+Text2 inputs. + expectedPatch = '@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n' + .. '@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n' + patches = dmp.patch_make(text1, text2) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Diff input. + local diffs = dmp.diff_main(text1, text2, false) + patches = dmp.patch_make(diffs) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Text1+Diff inputs. + patches = dmp.patch_make(text1, diffs) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Text1+Text2+Diff inputs (deprecated). + patches = dmp.patch_make(text1, text2, diffs) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Character encoding. + patches = dmp.patch_make('`1234567890-=[]\\;\',./', '~!@#$%^&*()_+{}|="<>?') + assertEquals('@@ -1,21 +1,21 @@\n' + .. '-%601234567890-=%5B%5D%5C;\',./\n' + .. '+~!@#$%25%5E&*()_+%7B%7D%7C=%22%3C%3E?\n', dmp.patch_toText(patches)) + + -- Character decoding. + diffs = { + {DIFF_DELETE, '`1234567890-=[]\\;\',./'}, + {DIFF_INSERT, '~!@#$%^&*()_+{}|="<>?'} + } + assertEquivalent(diffs, dmp.patch_fromText( + '@@ -1,21 +1,21 @@' + .. '\n-%601234567890-=%5B%5D%5C;\',./' + .. '\n+~!@#$%25%5E&*()_+%7B%7D%7C=%22%3C%3E?\n' + )[1].diffs) + + -- Long string with repeats. + text1 = string.rep('abcdef', 100) + text2 = text1 .. '123' + expectedPatch = '@@ -573,28 +573,31 @@\n' + .. ' cdefabcdefabcdefabcdefabcdef\n+123\n' + patches = dmp.patch_make(text1, text2) + assertEquals(expectedPatch, dmp.patch_toText(patches)) + + -- Test null inputs. + success, result = pcall(dmp.patch_make, nil, nil) + assertEquals(false, success) +end + +function testPatchSplitMax() + -- Assumes that dmp.Match_MaxBits is 32. + local patches = dmp.patch_make('abcdefghijklmnopqrstuvwxyz01234567890', + 'XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0') + dmp.patch_splitMax(patches) + assertEquals('@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n', dmp.patch_toText(patches)) + + patches = dmp.patch_make('abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz', 'abcdefuvwxyz') + local oldToText = dmp.patch_toText(patches) + dmp.patch_splitMax(patches) + assertEquals(oldToText, dmp.patch_toText(patches)) + + patches = dmp.patch_make('1234567890123456789012345678901234567890123456789012345678901234567890', 'abc') + dmp.patch_splitMax(patches) + assertEquals('@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n', dmp.patch_toText(patches)) + + patches = dmp.patch_make('abcdefghij , h = 0 , t = 1 abcdefghij , h = 0 , t = 1 abcdefghij , h = 0 , t = 1', 'abcdefghij , h = 1 , t = 1 abcdefghij , h = 1 , t = 1 abcdefghij , h = 0 , t = 1') + dmp.patch_splitMax(patches) + assertEquals('@@ -2,32 +2,32 @@\n bcdefghij , h = \n-0\n+1\n , t = 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h = \n-0\n+1\n , t = 1 abcdef\n', dmp.patch_toText(patches)) +end + +function testPatchAddPadding() + -- Both edges full. + local patches = dmp.patch_make('', 'test') + assertEquals('@@ -0,0 +1,4 @@\n+test\n', dmp.patch_toText(patches)) + dmp.patch_addPadding(patches) + assertEquals('@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n', dmp.patch_toText(patches)) + + -- Both edges partial. + patches = dmp.patch_make('XY', 'XtestY') + assertEquals('@@ -1,2 +1,6 @@\n X\n+test\n Y\n', dmp.patch_toText(patches)) + dmp.patch_addPadding(patches) + assertEquals('@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n', dmp.patch_toText(patches)) + + -- Both edges none. + patches = dmp.patch_make('XXXXYYYY', 'XXXXtestYYYY') + assertEquals('@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches)) + dmp.patch_addPadding(patches) + assertEquals('@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n', dmp.patch_toText(patches)) +end + +function testPatchApply() + local patches + + dmp.settings{Match_Distance = 1000} + dmp.settings{Match_Threshold = 0.5} + dmp.settings{Patch_DeleteThreshold = 0.5} + -- Null case. + patches = dmp.patch_make('', '') + assertEquivalent({'Hello world.', {}}, + {dmp.patch_apply(patches, 'Hello world.')}) + + -- Exact match. + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', + 'That quick brown fox jumped over a lazy dog.') + assertEquivalent( + {'That quick brown fox jumped over a lazy dog.', {true, true}}, + {dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.')}) + -- Partial match. + assertEquivalent( + {'That quick red rabbit jumped over a tired tiger.', {true, true}}, + {dmp.patch_apply(patches, 'The quick red rabbit jumps over the tired tiger.')}) + -- Failed match. + assertEquivalent( + {'I am the very model of a modern major general.', {false, false}}, + {dmp.patch_apply(patches, 'I am the very model of a modern major general.')}) + -- Big delete, small change. + patches = dmp.patch_make( + 'x1234567890123456789012345678901234567890123456789012345678901234567890y', + 'xabcy') + assertEquivalent({'xabcy', {true, true}}, {dmp.patch_apply(patches, + 'x123456789012345678901234567890-----++++++++++-----' + .. '123456789012345678901234567890y')}) + -- Big delete, big change 1. + patches = dmp.patch_make('x1234567890123456789012345678901234567890123456789' + .. '012345678901234567890y', 'xabcy') + assertEquivalent({'xabc12345678901234567890' + .. '---------------++++++++++---------------' + .. '12345678901234567890y', {false, true}}, + {dmp.patch_apply(patches, 'x12345678901234567890' + .. '---------------++++++++++---------------' + .. '12345678901234567890y' + )}) + -- Big delete, big change 2. + dmp.settings{Patch_DeleteThreshold = 0.6} + patches = dmp.patch_make( + 'x1234567890123456789012345678901234567890123456789' + .. '012345678901234567890y', + 'xabcy' + ) + assertEquivalent({'xabcy', {true, true}}, {dmp.patch_apply( + patches, + 'x12345678901234567890---------------++++++++++---------------' + .. '12345678901234567890y' + )} +) + dmp.settings{Patch_DeleteThreshold = 0.5} + + -- Compensate for failed patch. + dmp.settings{Match_Threshold = 0, Match_Distance = 0} + patches = dmp.patch_make( + 'abcdefghijklmnopqrstuvwxyz--------------------1234567890', + 'abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------' + .. '1234567YYYYYYYYYY890' + ) + assertEquivalent({ + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890', + {false, true} + }, {dmp.patch_apply( + patches, + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890' + )}) + + dmp.settings{Match_Threshold = 0.5} + dmp.settings{Match_Distance = 1000} + + -- No side effects. + patches = dmp.patch_make('', 'test') + local patchstr = dmp.patch_toText(patches) + dmp.patch_apply(patches, '') + assertEquals(patchstr, dmp.patch_toText(patches)) + -- No side effects with major delete. + patches = dmp.patch_make('The quick brown fox jumps over the lazy dog.', + 'Woof') + patchstr = dmp.patch_toText(patches) + dmp.patch_apply(patches, 'The quick brown fox jumps over the lazy dog.') + assertEquals(patchstr, dmp.patch_toText(patches)) + -- Edge exact match. + patches = dmp.patch_make('', 'test') + assertEquivalent({'test', {true}}, {dmp.patch_apply(patches, '')}) + -- Near edge exact match. + patches = dmp.patch_make('XY', 'XtestY') + assertEquivalent({'XtestY', {true}}, {dmp.patch_apply(patches, 'XY')}) + -- Edge partial match. + patches = dmp.patch_make('y', 'y123') + assertEquivalent({'x123', {true}}, {dmp.patch_apply(patches, 'x')}) +end + +function runTests() + local passed = 0 + local failed = 0 + for name, func in pairs(_G) do + if (type(func) == 'function') and tostring(name):match("^test") then + local success, message = pcall(func) + if success then + print(name .. ' Ok.') + passed = passed + 1 + else + print('** ' .. name .. ' FAILED: ' .. tostring(message)) + failed = failed + 1 + end + end + end + print('Tests passed: ' .. passed) + print('Tests failed: ' .. failed) + if failed ~= 0 then + os.exit(1) + end +end + +runTests() diff --git a/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current-src.jar b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current-src.jar new file mode 100644 index 0000000..a92f711 Binary files /dev/null and b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current-src.jar differ diff --git a/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.jar b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.jar new file mode 100755 index 0000000..a4be2d1 Binary files /dev/null and b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.jar differ diff --git a/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.pom b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.pom new file mode 100644 index 0000000..23c0cd7 --- /dev/null +++ b/maven/diff_match_patch/diff_match_patch/current/diff_match_patch-current.pom @@ -0,0 +1 @@ +4.0.0diff_match_patchdiff_match_patchcurrentGoogleDiffMatchPatchhttp://code.google.com/p/google-diff-match-patchThe Diff Match and Patch libraries offer robust algorithms to perform the operations required for synchronizing plain text. Googlehttp://www.google.comApache 2.0http://www.apache.org/licenses/LICENSE-2.0.html diff --git a/maven/diff_match_patch/diff_match_patch/maven-metadata.xml b/maven/diff_match_patch/diff_match_patch/maven-metadata.xml new file mode 100644 index 0000000..bb4e93b --- /dev/null +++ b/maven/diff_match_patch/diff_match_patch/maven-metadata.xml @@ -0,0 +1 @@ +diff_match_patchdiff_match_patchcurrentcurrent diff --git a/objectivec/Configurations/Base+SnowLeopard.xcconfig b/objectivec/Configurations/Base+SnowLeopard.xcconfig new file mode 100755 index 0000000..2226123 --- /dev/null +++ b/objectivec/Configurations/Base+SnowLeopard.xcconfig @@ -0,0 +1,3 @@ +#include "Base.xcconfig" + +SDKROOT = macosx10.6 diff --git a/objectivec/Configurations/Base.xcconfig b/objectivec/Configurations/Base.xcconfig new file mode 100755 index 0000000..a491c46 --- /dev/null +++ b/objectivec/Configurations/Base.xcconfig @@ -0,0 +1,35 @@ +#include "Version.xcconfig" + +SDKROOT = macosx10.5 +PREBINDING = NO +ARCHS = i386 x86_64 +//ARCHS = ppc i386 x86_64 // PPC works, but fails the timing tests under Rosetta + +GCC_VERSION = com.apple.compilers.llvm.clang.1_0 +GCC_C_LANGUAGE_STANDARD = gnu99 + +GCC_WARN_CHECK_SWITCH_STATEMENTS = YES +GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO +GCC_WARN_SHADOW = YES +GCC_TREAT_WARNINGS_AS_ERRORS = YES +GCC_WARN_64_TO_32_BIT_CONVERSION = YES +GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS = YES +GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES +GCC_WARN_ABOUT_RETURN_TYPE = YES +GCC_WARN_MISSING_PARENTHESES = YES +GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES +GCC_WARN_ABOUT_MISSING_NEWLINE = YES +GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES +GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES +GCC_WARN_SIGN_COMPARE = YES +GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES +GCC_WARN_UNDECLARED_SELECTOR = YES +GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES +GCC_WARN_UNINITIALIZED_AUTOS = YES +GCC_WARN_UNKNOWN_PRAGMAS = YES +GCC_WARN_UNUSED_FUNCTION = YES +GCC_WARN_UNUSED_LABEL = YES +GCC_WARN_UNUSED_PARAMETER = NO +GCC_WARN_UNUSED_VALUE = YES +GCC_WARN_UNUSED_VARIABLE = YES + diff --git a/objectivec/Configurations/Version.xcconfig b/objectivec/Configurations/Version.xcconfig new file mode 100755 index 0000000..03512e7 --- /dev/null +++ b/objectivec/Configurations/Version.xcconfig @@ -0,0 +1,2 @@ +MARKETING_VERSION = 1.0.3 +PROJECT_VERSION = 1000 diff --git a/objectivec/DiffMatchPatch.h b/objectivec/DiffMatchPatch.h new file mode 100755 index 0000000..b2dc083 --- /dev/null +++ b/objectivec/DiffMatchPatch.h @@ -0,0 +1,174 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + */ + +/* + * The data structure representing a diff is an NSMutableArray of Diff objects: + * {Diff(Operation.DIFF_DELETE, "Hello"), + * Diff(Operation.DIFF_INSERT, "Goodbye"), + * Diff(Operation.DIFF_EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ + +typedef enum { + DIFF_DELETE = 1, + DIFF_INSERT = 2, + DIFF_EQUAL = 3 +} Operation; + + +/* + * Class representing one diff operation. + */ +@interface Diff : NSObject { + Operation operation; // One of: DIFF_INSERT, DIFF_DELETE or DIFF_EQUAL. + NSString *text; // The text associated with this diff operation. +} + +@property (nonatomic, assign) Operation operation; +@property (nonatomic, copy) NSString *text; + ++ (id)diffWithOperation:(Operation)anOperation andText:(NSString *)aText; + +- (id)initWithOperation:(Operation)anOperation andText:(NSString *)aText; + +@end + +/* + * Class representing one patch operation. + */ +@interface Patch : NSObject { + NSMutableArray *diffs; + NSUInteger start1; + NSUInteger start2; + NSUInteger length1; + NSUInteger length2; +} + +@property (nonatomic, retain) NSMutableArray *diffs; +@property (nonatomic, assign) NSUInteger start1; +@property (nonatomic, assign) NSUInteger start2; +@property (nonatomic, assign) NSUInteger length1; +@property (nonatomic, assign) NSUInteger length2; + +@end + + +/* + * Class containing the diff, match and patch methods. + * Also Contains the behaviour settings. + */ +@interface DiffMatchPatch : NSObject { + // Number of seconds to map a diff before giving up (0 for infinity). + NSTimeInterval Diff_Timeout; + + // Cost of an empty edit operation in terms of edit characters. + NSUInteger Diff_EditCost; + + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + double Match_Threshold; + + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + NSInteger Match_Distance; + + // When deleting a large block of text (over ~64 characters), how close + // do the contents have to be to match the expected contents. (0.0 = + // perfection, 1.0 = very loose). Note that Match_Threshold controls + // how closely the end points of a delete need to match. + float Patch_DeleteThreshold; + + // Chunk size for context length. + uint16_t Patch_Margin; + + // The number of bits in an int. + NSUInteger Match_MaxBits; +} + +@property (nonatomic, assign) NSTimeInterval Diff_Timeout; +@property (nonatomic, assign) NSUInteger Diff_EditCost; +@property (nonatomic, assign) double Match_Threshold; +@property (nonatomic, assign) NSInteger Match_Distance; +@property (nonatomic, assign) float Patch_DeleteThreshold; +@property (nonatomic, assign) uint16_t Patch_Margin; + +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 andNewString:(NSString *)text2; +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 andNewString:(NSString *)text2 checkLines:(BOOL)checklines; +- (NSUInteger)diff_commonPrefixOfFirstString:(NSString *)text1 andSecondString:(NSString *)text2; +- (NSUInteger)diff_commonSuffixOfFirstString:(NSString *)text1 andSecondString:(NSString *)text2; +- (void)diff_cleanupSemantic:(NSMutableArray *)diffs; +- (void)diff_cleanupSemanticLossless:(NSMutableArray *)diffs; +- (void)diff_cleanupEfficiency:(NSMutableArray *)diffs; +- (void)diff_cleanupMerge:(NSMutableArray *)diffs; +- (NSUInteger)diff_xIndexIn:(NSMutableArray *)diffs location:(NSUInteger) loc; +- (NSString *)diff_prettyHtml:(NSMutableArray *)diffs; +- (NSString *)diff_text1:(NSMutableArray *)diffs; +- (NSString *)diff_text2:(NSMutableArray *)diffs; +- (NSUInteger)diff_levenshtein:(NSMutableArray *)diffs; +- (NSString *)diff_toDelta:(NSMutableArray *)diffs; +- (NSMutableArray *)diff_fromDeltaWithText:(NSString *)text1 andDelta:(NSString *)delta error:(NSError **)error; + +- (NSUInteger)match_mainForText:(NSString *)text pattern:(NSString *)pattern near:(NSUInteger)loc; +- (NSMutableDictionary *)match_alphabet:(NSString *)pattern; + +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 andNewString:(NSString *)text2; +- (NSMutableArray *)patch_makeFromDiffs:(NSMutableArray *)diffs; +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 newString:(NSString *)text2 diffs:(NSMutableArray *)diffs; +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 andDiffs:(NSMutableArray *)diffs; +- (NSMutableArray *)patch_deepCopy:(NSArray *)patches; // Copy rule applies! +- (NSArray *)patch_apply:(NSArray *)sourcePatches toString:(NSString *)text; +- (NSString *)patch_addPadding:(NSMutableArray *)patches; +- (void)patch_splitMax:(NSMutableArray *)patches; +- (NSString *)patch_toText:(NSMutableArray *)patches; +- (NSMutableArray *)patch_fromText:(NSString *)textline error:(NSError **)error; + +@end + + +@interface DiffMatchPatch (PrivateMethods) + +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 andNewString:(NSString *)text2 checkLines:(BOOL)checklines deadline:(NSTimeInterval)deadline; +- (NSMutableArray *)diff_computeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 checkLines:(BOOL)checklines deadline:(NSTimeInterval)deadline; +- (NSMutableArray *)diff_lineModeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline; +- (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1 andSecondString:(NSString *)text1; +- (NSString *)diff_linesToCharsMungeOfText:(NSString *)text lineArray:(NSMutableArray *)lineArray lineHash:(NSMutableDictionary *)lineHash; +- (void)diff_chars:(NSArray *)diffs toLines:(NSMutableArray *)lineArray; +- (NSMutableArray *)diff_bisectOfOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline; +- (NSMutableArray *)diff_bisectSplitOfOldString:(NSString *)text1 andNewString:(NSString *)text2 x:(NSUInteger)x y:(NSUInteger)y deadline:(NSTimeInterval)deadline; +- (NSUInteger)diff_commonOverlapOfFirstString:(NSString *)text1 andSecondString:(NSString *)text2; +- (NSArray *)diff_halfMatchOfFirstString:(NSString *)text1 andSecondString:(NSString *)text2; +- (NSArray *)diff_halfMatchIOfLongString:(NSString *)longtext andShortString:(NSString *)shorttext; +- (NSInteger)diff_cleanupSemanticScoreOfFirstString:(NSString *)one andSecondString:(NSString *)two; + +- (NSUInteger)match_bitapOfText:(NSString *)text andPattern:(NSString *)pattern near:(NSUInteger)loc; +- (double)match_bitapScoreForErrorCount:(NSUInteger)e location:(NSUInteger)x near:(NSUInteger)loc pattern:(NSString *)pattern; + +- (void)patch_addContextToPatch:(Patch *)patch sourceText:(NSString *)text; + +@end diff --git a/objectivec/DiffMatchPatch.m b/objectivec/DiffMatchPatch.m new file mode 100755 index 0000000..cfe0c66 --- /dev/null +++ b/objectivec/DiffMatchPatch.m @@ -0,0 +1,2559 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "DiffMatchPatch.h" + +#import "NSString+JavaSubstring.h" +#import "NSString+UriCompatibility.h" +#import "NSMutableDictionary+DMPExtensions.h" +#import "DiffMatchPatchCFUtilities.h" + + +#if !defined(MAX_OF_CONST_AND_DIFF) + // Determines the maximum of two expressions: + // The first is a constant (first parameter) while the second expression is + // the difference between the second and third parameter. The way this is + // calculated prevents integer overflow in the result of the difference. + #define MAX_OF_CONST_AND_DIFF(A,B,C) ((B) <= (C) ? (A) : (B) - (C) + (A)) +#endif + + +// JavaScript-style splice function +void splice(NSMutableArray *input, NSUInteger start, NSUInteger count, NSArray *objects); + +/* NSMutableArray * */ void splice(NSMutableArray *input, NSUInteger start, NSUInteger count, NSArray *objects) { + NSRange deletionRange = NSMakeRange(start, count); + if (objects == nil) { + [input removeObjectsInRange:deletionRange]; + } else { + [input replaceObjectsInRange:deletionRange withObjectsFromArray:objects]; + } +} + +@implementation Diff + +@synthesize operation; +@synthesize text; + +/** + * Constructor. Initializes the diff with the provided values. + * @param operation One of DIFF_INSERT, DIFF_DELETE or DIFF_EQUAL. + * @param text The text being applied. + */ ++ (id)diffWithOperation:(Operation)anOperation + andText:(NSString *)aText; +{ + return [[[self alloc] initWithOperation:anOperation andText:aText] autorelease]; +} + +- (id)initWithOperation:(Operation)anOperation + andText:(NSString *)aText; +{ + self = [super init]; + if (self) { + self.operation = anOperation; + self.text = aText; + } + return self; + +} + +- (void)dealloc +{ + self.text = nil; + + [super dealloc]; +} + +- (id)copyWithZone:(NSZone *)zone +{ + id newDiff = [[[self class] allocWithZone:zone] + initWithOperation:self.operation + andText:self.text]; + + return newDiff; +} + + +/** + * Display a human-readable version of this Diff. + * @return text version. + */ +- (NSString *)description +{ + NSString *prettyText = [self.text stringByReplacingOccurrencesOfString:@"\n" withString:@"\u00b6"]; + NSString *operationName = nil; + switch (self.operation) { + case DIFF_DELETE: + operationName = @"DIFF_DELETE"; + break; + case DIFF_INSERT: + operationName = @"DIFF_INSERT"; + break; + case DIFF_EQUAL: + operationName = @"DIFF_EQUAL"; + break; + default: + break; + } + + return [NSString stringWithFormat:@"Diff(%@,\"%@\")", operationName, prettyText]; +} + +/** + * Is this Diff equivalent to another Diff? + * @param obj Another Diff to compare against. + * @return YES or NO. + */ +- (BOOL)isEqual:(id)obj +{ + // If parameter is nil return NO. + if (obj == nil) { + return NO; + } + + // If parameter cannot be cast to Diff return NO. + if (![obj isKindOfClass:[Diff class]]) { + return NO; + } + + // Return YES if the fields match. + Diff *p = (Diff *)obj; + return p.operation == self.operation && [p.text isEqualToString:self.text]; +} + +- (BOOL)isEqualToDiff:(Diff *)obj +{ + // If parameter is nil return NO. + if (obj == nil) { + return NO; + } + + // Return YES if the fields match. + return obj.operation == self.operation && [obj.text isEqualToString:self.text]; +} + +- (NSUInteger)hash +{ + return ([text hash] ^ (NSUInteger)operation); +} + +@end + + +@implementation Patch + +@synthesize diffs; +@synthesize start1; +@synthesize start2; +@synthesize length1; +@synthesize length2; + +- (id)init +{ + self = [super init]; + + if (self) { + self.diffs = [NSMutableArray array]; + } + + return self; +} + +- (void)dealloc +{ + self.diffs = nil; + + [super dealloc]; +} + +- (id)copyWithZone:(NSZone *)zone +{ + Patch *newPatch = [[[self class] allocWithZone:zone] init]; + + newPatch.diffs = [[NSMutableArray alloc] initWithArray:self.diffs copyItems:YES]; + newPatch.start1 = self.start1; + newPatch.start2 = self.start2; + newPatch.length1 = self.length1; + newPatch.length2 = self.length2; + + return newPatch; +} + + +/** + * Emulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return The GNU diff NSString. + */ +- (NSString *)description +{ + NSString *coords1; + NSString *coords2; + + if (self.length1 == 0) { + coords1 = [NSString stringWithFormat:@"%lu,0", + (unsigned long)self.start1]; + } else if (self.length1 == 1) { + coords1 = [NSString stringWithFormat:@"%lu", + (unsigned long)self.start1 + 1]; + } else { + coords1 = [NSString stringWithFormat:@"%lu,%lu", + (unsigned long)self.start1 + 1, (unsigned long)self.length1]; + } + if (self.length2 == 0) { + coords2 = [NSString stringWithFormat:@"%lu,0", + (unsigned long)self.start2]; + } else if (self.length2 == 1) { + coords2 = [NSString stringWithFormat:@"%lu", + (unsigned long)self.start2 + 1]; + } else { + coords2 = [NSString stringWithFormat:@"%lu,%lu", + (unsigned long)self.start2 + 1, (unsigned long)self.length2]; + } + + NSMutableString *text = [NSMutableString stringWithFormat:@"@@ -%@ +%@ @@\n", + coords1, coords2]; + // Escape the body of the patch with %xx notation. + for (Diff *aDiff in self.diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + [text appendString:@"+"]; + break; + case DIFF_DELETE: + [text appendString:@"-"]; + break; + case DIFF_EQUAL: + [text appendString:@" "]; + break; + } + + [text appendString:[aDiff.text diff_stringByAddingPercentEscapesForEncodeUriCompatibility]]; + [text appendString:@"\n"]; + } + + return text; +} + +@end + + +@implementation DiffMatchPatch + +@synthesize Diff_Timeout; +@synthesize Diff_EditCost; +@synthesize Match_Threshold; +@synthesize Match_Distance; +@synthesize Patch_DeleteThreshold; +@synthesize Patch_Margin; + +- (id)init +{ + self = [super init]; + + if (self) { + Diff_Timeout = 1.0f; + Diff_EditCost = 4; + Match_Threshold = 0.5f; + Match_Distance = 1000; + Patch_DeleteThreshold = 0.5f; + Patch_Margin = 4; + + Match_MaxBits = 32; + } + + return self; +} + +- (void)dealloc +{ + [super dealloc]; +} + + +#pragma mark Diff Functions +// DIFF FUNCTIONS + + +/** + * Find the differences between two texts. + * Run a faster, slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to YES. + * @param text1 Old NSString to be diffed. + * @param text2 New NSString to be diffed. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 + andNewString:(NSString *)text2; +{ + return [self diff_mainOfOldString:text1 andNewString:text2 checkLines:YES]; +} + +/** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If NO, then don't run a + * line-level diff first to identify the changed areas. + * If YES, then run a faster slightly less optimal diff. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 + andNewString:(NSString *)text2 + checkLines:(BOOL)checklines; +{ + // Set a deadline by which time the diff must be complete. + NSTimeInterval deadline; + if (Diff_Timeout <= 0) { + deadline = [[NSDate distantFuture] timeIntervalSinceReferenceDate]; + } else { + deadline = [[NSDate dateWithTimeIntervalSinceNow:Diff_Timeout] timeIntervalSinceReferenceDate]; + } + return [self diff_mainOfOldString:text1 andNewString:text2 checkLines:YES deadline:deadline]; +} + +/** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old NSString to be diffed. + * @param text2 New NSString to be diffed. + * @param checklines Speedup flag. If NO, then don't run a + * line-level diff first to identify the changed areas. + * If YES, then run a faster slightly less optimal diff + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout + * instead. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_mainOfOldString:(NSString *)text1 + andNewString:(NSString *)text2 + checkLines:(BOOL)checklines + deadline:(NSTimeInterval)deadline; +{ + // Check for null inputs. + if (text1 == nil || text2 == nil) { + NSLog(@"Null inputs. (diff_main)"); + return nil; + } + + // Check for equality (speedup). + NSMutableArray *diffs; + if ([text1 isEqualToString:text2]) { + diffs = [NSMutableArray array]; + if (text1.length != 0) { + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:text1]]; + } + return diffs; + } + + // Trim off common prefix (speedup). + NSUInteger commonlength = (NSUInteger)diff_commonPrefix((CFStringRef)text1, (CFStringRef)text2); + NSString *commonprefix = [text1 substringWithRange:NSMakeRange(0, commonlength)]; + text1 = [text1 substringFromIndex:commonlength]; + text2 = [text2 substringFromIndex:commonlength]; + + // Trim off common suffix (speedup). + commonlength = (NSUInteger)diff_commonSuffix((CFStringRef)text1, (CFStringRef)text2); + NSString *commonsuffix = [text1 substringFromIndex:text1.length - commonlength]; + text1 = [text1 substringWithRange:NSMakeRange(0, text1.length - commonlength)]; + text2 = [text2 substringWithRange:NSMakeRange(0, text2.length - commonlength)]; + + // Compute the diff on the middle block. + diffs = [self diff_computeFromOldString:text1 andNewString:text2 checkLines:checklines deadline:deadline]; + + // Restore the prefix and suffix. + if (commonprefix.length != 0) { + [diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL andText:commonprefix] atIndex:0]; + } + if (commonsuffix.length != 0) { + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:commonsuffix]]; + } + + [self diff_cleanupMerge:diffs]; + return diffs; +} + +/** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ +- (NSUInteger)diff_commonPrefixOfFirstString:(NSString *)text1 + andSecondString:(NSString *)text2; +{ + return (NSUInteger)diff_commonPrefix((CFStringRef)text1, (CFStringRef)text2); +} + +/** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ +- (NSUInteger)diff_commonSuffixOfFirstString:(NSString *)text1 + andSecondString:(NSString *)text2; +{ + return (NSUInteger)diff_commonSuffix((CFStringRef)text1, (CFStringRef)text2); +} + +/** + * Determine if the suffix of one CFStringRef is the prefix of another. + * @param text1 First NSString. + * @param text2 Second NSString. + * @return The number of characters common to the end of the first + * CFStringRef and the start of the second CFStringRef. + */ +- (NSUInteger)diff_commonOverlapOfFirstString:(NSString *)text1 + andSecondString:(NSString *)text2; +{ + return (NSUInteger)diff_commonOverlap((CFStringRef)text1, (CFStringRef)text2); +} + +/** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First NSString. + * @param text2 Second NSString. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or NULL if there was no match. + */ +- (NSArray *)diff_halfMatchOfFirstString:(NSString *)text1 + andSecondString:(NSString *)text2; +{ + return [(NSArray *)diff_halfMatchCreate((CFStringRef)text1, (CFStringRef)text2, Diff_Timeout) autorelease]; +} + +/** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer NSString. + * @param shorttext Shorter NSString. + * @param i Start index of quarter length substring within longtext. + * @return Five element NSArray, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or nil if there was no match. + */ +- (NSArray *)diff_halfMatchIOfLongString:(NSString *)longtext + andShortString:(NSString *)shorttext + index:(NSInteger)index; +{ + return [((NSArray *)diff_halfMatchICreate((CFStringRef)longtext, (CFStringRef)shorttext, (CFIndex)index)) autorelease]; +} + +/** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old NSString to be diffed. + * @param text2 New NSString to be diffed. + * @param checklines Speedup flag. If NO, then don't run a + * line-level diff first to identify the changed areas. + * If YES, then run a faster slightly less optimal diff. + * @param deadline Time the diff should be complete by. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_computeFromOldString:(NSString *)text1 + andNewString:(NSString *)text2 + checkLines:(BOOL)checklines + deadline:(NSTimeInterval)deadline; +{ + NSMutableArray *diffs = [NSMutableArray array]; + + if (text1.length == 0) { + // Just add some text (speedup). + [diffs addObject:[Diff diffWithOperation:DIFF_INSERT andText:text2]]; + return diffs; + } + + if (text2.length == 0) { + // Just delete some text (speedup). + [diffs addObject:[Diff diffWithOperation:DIFF_DELETE andText:text1]]; + return diffs; + } + + NSString *longtext = text1.length > text2.length ? text1 : text2; + NSString *shorttext = text1.length > text2.length ? text2 : text1; + NSUInteger i = [longtext rangeOfString:shorttext].location; + if (i != NSNotFound) { + // Shorter text is inside the longer text (speedup). + Operation op = (text1.length > text2.length) ? DIFF_DELETE : DIFF_INSERT; + [diffs addObject:[Diff diffWithOperation:op andText:[longtext substringWithRange:NSMakeRange(0, i)]]]; + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:shorttext]]; + [diffs addObject:[Diff diffWithOperation:op andText:[longtext substringFromIndex:(i + shorttext.length)]]]; + return diffs; + } + + if (shorttext.length == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + [diffs addObject:[Diff diffWithOperation:DIFF_DELETE andText:text1]]; + [diffs addObject:[Diff diffWithOperation:DIFF_INSERT andText:text2]]; + return diffs; + } + + // Check to see if the problem can be split in two. + NSArray *hm = [(NSArray *)diff_halfMatchCreate((CFStringRef)text1, (CFStringRef)text2, Diff_Timeout) autorelease]; + if (hm != nil) { + NSAutoreleasePool *splitPool = [NSAutoreleasePool new]; + // A half-match was found, sort out the return data. + NSString *text1_a = [hm objectAtIndex:0]; + NSString *text1_b = [hm objectAtIndex:1]; + NSString *text2_a = [hm objectAtIndex:2]; + NSString *text2_b = [hm objectAtIndex:3]; + NSString *mid_common = [hm objectAtIndex:4]; + // Send both pairs off for separate processing. + NSMutableArray *diffs_a = [self diff_mainOfOldString:text1_a andNewString:text2_a checkLines:checklines deadline:deadline]; + NSMutableArray *diffs_b = [self diff_mainOfOldString:text1_b andNewString:text2_b checkLines:checklines deadline:deadline]; + // Merge the results. + diffs = [diffs_a retain]; + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:mid_common]]; + [diffs addObjectsFromArray:diffs_b]; + [splitPool drain]; + return [diffs autorelease]; + } + + if (checklines && text1.length > 100 && text2.length > 100) { + return [self diff_lineModeFromOldString:text1 andNewString:text2 deadline:deadline]; + } + + NSAutoreleasePool *bisectPool = [NSAutoreleasePool new]; + diffs = [self diff_bisectOfOldString:text1 andNewString:text2 deadline:deadline]; + [diffs retain]; + [bisectPool drain]; + + return [diffs autorelease]; +} + +/** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old NSString to be diffed. + * @param text2 New NSString to be diffed. + * @param deadline Time when the diff should be complete by. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_lineModeFromOldString:(NSString *)text1 + andNewString:(NSString *)text2 + deadline:(NSTimeInterval)deadline; +{ + // Scan the text on a line-by-line basis first. + NSArray *b = [self diff_linesToCharsForFirstString:text1 andSecondString:text2]; + text1 = (NSString *)[b objectAtIndex:0]; + text2 = (NSString *)[b objectAtIndex:1]; + NSMutableArray *linearray = (NSMutableArray *)[b objectAtIndex:2]; + + NSAutoreleasePool *recursePool = [NSAutoreleasePool new]; + NSMutableArray *diffs = [self diff_mainOfOldString:text1 andNewString:text2 checkLines:NO deadline:deadline]; + [diffs retain]; + [recursePool drain]; + + [diffs autorelease]; + + // Convert the diff back to original text. + [self diff_chars:diffs toLines:linearray]; + // Eliminate freak matches (e.g. blank lines) + [self diff_cleanupSemantic:diffs]; + + // Rediff any Replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:@""]]; + NSUInteger thisPointer = 0; + NSUInteger count_delete = 0; + NSUInteger count_insert = 0; + NSString *text_delete = @""; + NSString *text_insert = @""; + while (thisPointer < diffs.count) { + switch (((Diff *)[diffs objectAtIndex:thisPointer]).operation) { + case DIFF_INSERT: + count_insert++; + text_insert = [text_insert stringByAppendingString:((Diff *)[diffs objectAtIndex:thisPointer]).text]; + break; + case DIFF_DELETE: + count_delete++; + text_delete = [text_delete stringByAppendingString:((Diff *)[diffs objectAtIndex:thisPointer]).text]; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + NSMutableArray *a = [self diff_mainOfOldString:text_delete andNewString:text_insert checkLines:NO deadline:deadline]; + [diffs removeObjectsInRange:NSMakeRange(thisPointer - count_delete - count_insert, + count_delete + count_insert)]; + thisPointer = thisPointer - count_delete - count_insert; + NSUInteger insertionIndex = thisPointer; + for (Diff *thisDiff in a) { + [diffs insertObject:thisDiff atIndex:insertionIndex]; + insertionIndex++; + } + thisPointer = thisPointer + a.count; + } + count_insert = 0; + count_delete = 0; + text_delete = @""; + text_insert = @""; + break; + } + thisPointer++; + } + [diffs removeLastObject]; // Remove the dummy entry at the end. + + return diffs; +} + +/** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text NSString to encode. + * @param lineArray NSMutableArray of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ +- (NSString *)diff_linesToCharsMungeOfText:(NSString *)text + lineArray:(NSMutableArray *)lineArray + lineHash:(NSMutableDictionary *)lineHash; +{ + return [((NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text, + (CFMutableArrayRef)lineArray, + (CFMutableDictionaryRef)lineHash)) autorelease]; +} + +/** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time at which to bail if not yet complete. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_bisectOfOldString:(NSString *)_text1 + andNewString:(NSString *)_text2 + deadline:(NSTimeInterval)deadline; +{ +#define text1CharacterAtIndex(A) text1_chars[(A)] +#define text2CharacterAtIndex(A) text2_chars[(A)] +#define freeTextBuffers() if (text1_buffer != NULL) free(text1_buffer);\ + if (text2_buffer != NULL) free(text2_buffer); + + CFStringRef text1 = (CFStringRef)_text1; + CFStringRef text2 = (CFStringRef)_text2; + + // Cache the text lengths to prevent multiple calls. + CFIndex text1_length = CFStringGetLength(text1); + CFIndex text2_length = CFStringGetLength(text2); + CFIndex max_d = (text1_length + text2_length + 1) / 2; + CFIndex v_offset = max_d; + CFIndex v_length = 2 * max_d; + CFIndex v1[v_length]; + CFIndex v2[v_length]; + for (CFIndex x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + CFIndex delta = text1_length - text2_length; + + // Prepare access to chars arrays for text1 (massive speedup). + const UniChar *text1_chars; + UniChar *text1_buffer = NULL; + diff_CFStringPrepareUniCharBuffer(text1, &text1_chars, &text1_buffer, CFRangeMake(0, text1_length)); + + // Prepare access to chars arrays for text 2 (massive speedup). + const UniChar *text2_chars; + UniChar *text2_buffer = NULL; + diff_CFStringPrepareUniCharBuffer(text2, &text2_chars, &text2_buffer, CFRangeMake(0, text2_length)); + + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + BOOL front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + CFIndex k1start = 0; + CFIndex k1end = 0; + CFIndex k2start = 0; + CFIndex k2end = 0; + NSMutableArray *diffs; + for (CFIndex d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if ([NSDate timeIntervalSinceReferenceDate] > deadline) { + break; + } + + // Walk the front path one step. + for (CFIndex k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + CFIndex k1_offset = v_offset + k1; + CFIndex x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + CFIndex y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1CharacterAtIndex(x1) == text2CharacterAtIndex(y1)) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + CFIndex k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + CFIndex x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + freeTextBuffers(); + + // Overlap detected. + return [self diff_bisectSplitOfOldString:_text1 + andNewString:_text2 + x:x1 + y:y1 + deadline:deadline]; + } + } + } + } + + // Walk the reverse path one step. + for (CFIndex k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + CFIndex k2_offset = v_offset + k2; + CFIndex x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + CFIndex y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1CharacterAtIndex(text1_length - x2 - 1) + == text2CharacterAtIndex(text2_length - y2 - 1)) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + CFIndex k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + CFIndex x1 = v1[k1_offset]; + CFIndex y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + freeTextBuffers(); + + return [self diff_bisectSplitOfOldString:_text1 + andNewString:_text2 + x:x1 + y:y1 + deadline:deadline]; + } + } + } + } + } + + freeTextBuffers(); + + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + diffs = [NSMutableArray array]; + [diffs addObject:[Diff diffWithOperation:DIFF_DELETE andText:_text1]]; + [diffs addObject:[Diff diffWithOperation:DIFF_INSERT andText:_text2]]; + return diffs; + +#undef freeTextBuffers +#undef text1CharacterAtIndex +#undef text2CharacterAtIndex +} + +/** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return NSMutableArray of Diff objects. + */ +- (NSMutableArray *)diff_bisectSplitOfOldString:(NSString *)text1 + andNewString:(NSString *)text2 + x:(NSUInteger)x + y:(NSUInteger)y + deadline:(NSTimeInterval)deadline; +{ + NSString *text1a = [text1 substringToIndex:x]; + NSString *text2a = [text2 substringToIndex:y]; + NSString *text1b = [text1 substringFromIndex:x]; + NSString *text2b = [text2 substringFromIndex:y]; + + // Compute both diffs serially. + NSMutableArray *diffs = [self diff_mainOfOldString:text1a + andNewString:text2a + checkLines:NO + deadline:deadline]; + NSMutableArray *diffsb = [self diff_mainOfOldString:text1b + andNewString:text2b + checkLines:NO + deadline:deadline]; + + [diffs addObjectsFromArray: diffsb]; + return diffs; +} + +/** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First NSString. + * @param text2 Second NSString. + * @return Three element NSArray, containing the encoded text1, the + * encoded text2 and the NSMutableArray of unique strings. The zeroth element + * of the NSArray of unique strings is intentionally blank. + */ +- (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1 + andSecondString:(NSString *)text2; +{ + NSMutableArray *lineArray = [NSMutableArray array]; // NSString objects + NSMutableDictionary *lineHash = [NSMutableDictionary dictionary]; // keys: NSString, values:NSNumber + // e.g. [lineArray objectAtIndex:4] == "Hello\n" + // e.g. [lineHash objectForKey:"Hello\n"] == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nil character. + [lineArray addObject:@""]; + + NSString *chars1 = (NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text1, + (CFMutableArrayRef)lineArray, + (CFMutableDictionaryRef)lineHash); + NSString *chars2 = (NSString *)diff_linesToCharsMungeCFStringCreate((CFStringRef)text2, + (CFMutableArrayRef)lineArray, + (CFMutableDictionaryRef)lineHash); + + NSArray *result = [NSArray arrayWithObjects:chars1, chars2, lineArray, nil]; + + [chars1 release]; + [chars2 release]; + + return result; +} + +/** + * Rehydrate the text in a diff from an NSString of line hashes to real lines + * of text. + * @param NSArray of Diff objects. + * @param NSMutableArray of unique strings. + */ +- (void)diff_chars:(NSArray *)diffs toLines:(NSMutableArray *)lineArray; +{ + NSMutableString *text; + NSUInteger lineHash; + for (Diff *diff in diffs) { + text = [NSMutableString string]; + for (NSUInteger y = 0; y < [diff.text length]; y++) { + lineHash = (NSUInteger)[diff.text characterAtIndex:y]; + [text appendString:[lineArray objectAtIndex:lineHash]]; + } + diff.text = text; + } +} + +/** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs NSMutableArray of Diff objects. + */ +- (void)diff_cleanupMerge:(NSMutableArray *)diffs; +{ +#define prevDiff ((Diff *)[diffs objectAtIndex:(thisPointer - 1)]) +#define thisDiff ((Diff *)[diffs objectAtIndex:thisPointer]) +#define nextDiff ((Diff *)[diffs objectAtIndex:(thisPointer + 1)]) + + if (diffs.count == 0) { + return; + } + + // Add a dummy entry at the end. + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:@""]]; + NSUInteger thisPointer = 0; + NSUInteger count_delete = 0; + NSUInteger count_insert = 0; + NSString *text_delete = @""; + NSString *text_insert = @""; + NSUInteger commonlength; + while (thisPointer < diffs.count) { + switch (thisDiff.operation) { + case DIFF_INSERT: + count_insert++; + text_insert = [text_insert stringByAppendingString:thisDiff.text]; + thisPointer++; + break; + case DIFF_DELETE: + count_delete++; + text_delete = [text_delete stringByAppendingString:thisDiff.text]; + thisPointer++; + break; + case DIFF_EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete != 0 && count_insert != 0) { + // Factor out any common prefixes. + commonlength = (NSUInteger)diff_commonPrefix((CFStringRef)text_insert, (CFStringRef)text_delete); + if (commonlength != 0) { + if ((thisPointer - count_delete - count_insert) > 0 && + ((Diff *)[diffs objectAtIndex:(thisPointer - count_delete - count_insert - 1)]).operation + == DIFF_EQUAL) { + ((Diff *)[diffs objectAtIndex:(thisPointer - count_delete - count_insert - 1)]).text + = [((Diff *)[diffs objectAtIndex:(thisPointer - count_delete - count_insert - 1)]).text + stringByAppendingString:[text_insert substringWithRange:NSMakeRange(0, commonlength)]]; + } else { + [diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL + andText:[text_insert substringWithRange:NSMakeRange(0, commonlength)]] + atIndex:0]; + thisPointer++; + } + text_insert = [text_insert substringFromIndex:commonlength]; + text_delete = [text_delete substringFromIndex:commonlength]; + } + // Factor out any common suffixes. + commonlength = (NSUInteger)diff_commonSuffix((CFStringRef)text_insert, (CFStringRef)text_delete); + if (commonlength != 0) { + thisDiff.text = [[text_insert substringFromIndex:(text_insert.length + - commonlength)] stringByAppendingString:thisDiff.text]; + text_insert = [text_insert substringWithRange:NSMakeRange(0, + text_insert.length - commonlength)]; + text_delete = [text_delete substringWithRange:NSMakeRange(0, + text_delete.length - commonlength)]; + } + } + // Delete the offending records and add the merged ones. + if (count_delete == 0) { + splice(diffs, thisPointer - count_insert, + count_delete + count_insert, + [NSMutableArray arrayWithObject:[Diff diffWithOperation:DIFF_INSERT andText:text_insert]]); + } else if (count_insert == 0) { + splice(diffs, thisPointer - count_delete, + count_delete + count_insert, + [NSMutableArray arrayWithObject:[Diff diffWithOperation:DIFF_DELETE andText:text_delete]]); + } else { + splice(diffs, thisPointer - count_delete - count_insert, + count_delete + count_insert, + [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:text_delete], + [Diff diffWithOperation:DIFF_INSERT andText:text_insert], nil]); + } + thisPointer = thisPointer - count_delete - count_insert + + (count_delete != 0 ? 1 : 0) + (count_insert != 0 ? 1 : 0) + 1; + } else if (thisPointer != 0 && prevDiff.operation == DIFF_EQUAL) { + // Merge this equality with the previous one. + prevDiff.text = [prevDiff.text stringByAppendingString:thisDiff.text]; + [diffs removeObjectAtIndex:thisPointer]; + } else { + thisPointer++; + } + count_insert = 0; + count_delete = 0; + text_delete = @""; + text_insert = @""; + break; + } + } + if (((Diff *)diffs.lastObject).text.length == 0) { + [diffs removeLastObject]; // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + BOOL changes = NO; + thisPointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (thisPointer < (diffs.count - 1)) { + if (prevDiff.operation == DIFF_EQUAL && + nextDiff.operation == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + if ([thisDiff.text hasSuffix:prevDiff.text]) { + // Shift the edit over the previous equality. + thisDiff.text = [prevDiff.text stringByAppendingString: + [thisDiff.text substringWithRange:NSMakeRange(0, thisDiff.text.length - prevDiff.text.length)]]; + nextDiff.text = [prevDiff.text stringByAppendingString:nextDiff.text]; + splice(diffs, thisPointer - 1, 1, nil); + changes = YES; + } else if ([thisDiff.text hasPrefix:nextDiff.text]) { + // Shift the edit over the next equality. + prevDiff.text = [prevDiff.text stringByAppendingString:nextDiff.text]; + thisDiff.text = [[thisDiff.text substringFromIndex:nextDiff.text.length] stringByAppendingString:nextDiff.text]; + splice(diffs, thisPointer + 1, 1, nil); + changes = YES; + } + } + thisPointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + [self diff_cleanupMerge:diffs]; + } + +#undef prevDiff +#undef thisDiff +#undef nextDiff +} + + +/** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs NSMutableArray of Diff objects. + */ +- (void)diff_cleanupSemanticLossless:(NSMutableArray *)diffs; +{ +#define prevDiff ((Diff *)[diffs objectAtIndex:(thisPointer - 1)]) +#define thisDiff ((Diff *)[diffs objectAtIndex:thisPointer]) +#define nextDiff ((Diff *)[diffs objectAtIndex:(thisPointer + 1)]) + + if (diffs.count == 0) { + return; + } + + NSUInteger thisPointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (thisPointer < (diffs.count - 1)) { + if (prevDiff.operation == DIFF_EQUAL && nextDiff.operation == DIFF_EQUAL) { + // This is a single edit surrounded by equalities. + NSString *equality1 = prevDiff.text; + NSString *edit = thisDiff.text; + NSString *equality2 = nextDiff.text; + + // First, shift the edit as far left as possible. + NSUInteger commonOffset = (NSUInteger)diff_commonSuffix((CFStringRef)equality1, (CFStringRef)edit); + + if (commonOffset > 0) { + NSString *commonString = [edit substringFromIndex:(edit.length - commonOffset)]; + equality1 = [equality1 substringWithRange:NSMakeRange(0, (equality1.length - commonOffset))]; + edit = [commonString stringByAppendingString:[edit substringWithRange:NSMakeRange(0, (edit.length - commonOffset))]]; + equality2 = [commonString stringByAppendingString:equality2]; + } + + // Second, step right character by character, + // looking for the best fit. + NSString *bestEquality1 = equality1; + NSString *bestEdit = edit; + NSString *bestEquality2 = equality2; + CFIndex bestScore = diff_cleanupSemanticScore((CFStringRef)equality1, (CFStringRef)edit) + + diff_cleanupSemanticScore((CFStringRef)edit, (CFStringRef)equality2); + while (edit.length != 0 && equality2.length != 0 + && [edit characterAtIndex:0] == [equality2 characterAtIndex:0]) { + equality1 = [equality1 stringByAppendingString:[edit substringWithRange:NSMakeRange(0, 1)]]; + edit = [[edit substringFromIndex:1] stringByAppendingString:[equality2 substringWithRange:NSMakeRange(0, 1)]]; + equality2 = [equality2 substringFromIndex:1]; + CFIndex score = diff_cleanupSemanticScore((CFStringRef)equality1, (CFStringRef)edit) + + diff_cleanupSemanticScore((CFStringRef)edit, (CFStringRef)equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (prevDiff.text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (bestEquality1.length != 0) { + prevDiff.text = bestEquality1; + } else { + [diffs removeObjectAtIndex:thisPointer - 1]; + thisPointer--; + } + thisDiff.text = bestEdit; + if (bestEquality2.length != 0) { + nextDiff.text = bestEquality2; + } else { + [diffs removeObjectAtIndex:thisPointer + 1]; + thisPointer--; + } + } + } + thisPointer++; + } + +#undef prevDiff +#undef thisDiff +#undef nextDiff +} + +/** + * Given two strings, comAdde a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 5 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ +- (NSInteger)diff_cleanupSemanticScoreOfFirstString:(NSString *)one + andSecondString:(NSString *)two; +{ + return diff_cleanupSemanticScore((CFStringRef)one, (CFStringRef)two); +} + +/** + * Reduce the number of edits by eliminating operationally trivial + * equalities. + * @param diffs NSMutableArray of Diff objects. + */ +- (void)diff_cleanupEfficiency:(NSMutableArray *)diffs; +{ +#define thisDiff ((Diff *)[diffs objectAtIndex:thisPointer]) +#define equalitiesLastItem ((NSNumber *)equalities.lastObject) +#define equalitiesLastValue ((NSNumber *)equalities.lastObject).integerValue + if (diffs.count == 0) { + return; + } + + BOOL changes = NO; + // Stack of indices where equalities are found. + NSMutableArray *equalities = [NSMutableArray array]; + // Always equal to equalities.lastObject.text + NSString *lastequality = nil; + NSInteger thisPointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + BOOL pre_ins = NO; + // Is there a deletion operation before the last equality. + BOOL pre_del = NO; + // Is there an insertion operation after the last equality. + BOOL post_ins = NO; + // Is there a deletion operation after the last equality. + BOOL post_del = NO; + + NSUInteger indexToChange; + Diff *diffToChange; + + while (thisPointer < (NSInteger)diffs.count) { + if (thisDiff.operation == DIFF_EQUAL) { // Equality found. + if (thisDiff.text.length < Diff_EditCost && (post_ins || post_del)) { + // Candidate found. + [equalities addObject:[NSNumber numberWithInteger:thisPointer]]; + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff.text; + } else { + // Not a candidate, and can never become one. + [equalities removeAllObjects]; + lastequality = nil; + } + post_ins = post_del = NO; + } else { // An insertion or deletion. + if (thisDiff.operation == DIFF_DELETE) { + post_del = YES; + } else { + post_ins = YES; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (lastequality != nil + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + + (post_del ? 1 : 0)) == 3))) { + // Duplicate record. + [diffs insertObject:[Diff diffWithOperation:DIFF_DELETE andText:lastequality] + atIndex:equalitiesLastValue]; + // Change second copy to insert. + // Hash values for objects must not change while in a collection + indexToChange = equalitiesLastValue + 1; + diffToChange = [[diffs objectAtIndex:indexToChange] retain]; + [diffs replaceObjectAtIndex:indexToChange withObject:[NSNull null]]; + diffToChange.operation = DIFF_INSERT; + [diffs replaceObjectAtIndex:indexToChange withObject:diffToChange]; + [diffToChange release]; + + [equalities removeLastObject]; // Throw away the equality we just deleted. + lastequality = nil; + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = YES; + [equalities removeAllObjects]; + } else { + if (equalities.count > 0) { + [equalities removeLastObject]; + } + + thisPointer = equalities.count > 0 ? equalitiesLastValue : -1; + post_ins = post_del = NO; + } + changes = YES; + } + } + thisPointer++; + } + + if (changes) { + [self diff_cleanupMerge:diffs]; + } + +#undef thisDiff +#undef equalitiesLastItem +#undef equalitiesLastValue +} + +/** + * Convert a Diff list into a pretty HTML report. + * @param diffs NSMutableArray of Diff objects. + * @return HTML representation. + */ +- (NSString *)diff_prettyHtml:(NSMutableArray *)diffs; +{ + NSMutableString *html = [NSMutableString string]; + for (Diff *aDiff in diffs) { + NSMutableString *text = [[aDiff.text mutableCopy] autorelease]; + [text replaceOccurrencesOfString:@"&" withString:@"&" options:NSLiteralSearch range:NSMakeRange(0, text.length)]; + [text replaceOccurrencesOfString:@"<" withString:@"<" options:NSLiteralSearch range:NSMakeRange(0, text.length)]; + [text replaceOccurrencesOfString:@">" withString:@">" options:NSLiteralSearch range:NSMakeRange(0, text.length)]; + [text replaceOccurrencesOfString:@"\n" withString:@"¶
    " options:NSLiteralSearch range:NSMakeRange(0, text.length)]; + + switch (aDiff.operation) { + case DIFF_INSERT: + [html appendFormat:@"%@", text]; + break; + case DIFF_DELETE: + [html appendFormat:@"%@", text]; + break; + case DIFF_EQUAL: + [html appendFormat:@"%@", text]; + break; + } + } + return html; +} + +/** + * Compute and return the source text (all equalities and deletions). + * @param diffs NSMutableArray of Diff objects. + * @return Source text. + */ +- (NSString *)diff_text1:(NSMutableArray *)diffs; +{ + NSMutableString *text = [NSMutableString string]; + for (Diff *aDiff in diffs) { + if (aDiff.operation != DIFF_INSERT) { + [text appendString:aDiff.text]; + } + } + return text; +} + +/** + * Compute and return the destination text (all equalities and insertions). + * @param diffs NSMutableArray of Diff objects. + * @return Destination text. + */ +- (NSString *)diff_text2:(NSMutableArray *)diffs; +{ + NSMutableString *text = [NSMutableString string]; + for (Diff *aDiff in diffs) { + if (aDiff.operation != DIFF_DELETE) { + [text appendString:aDiff.text]; + } + } + return text; +} + +/** + * Crush the diff into an encoded NSString which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx + * notation. + * @param diffs NSMutableArray of Diff objects. + * @return Delta text. + */ +- (NSString *)diff_toDelta:(NSMutableArray *)diffs; +{ + NSMutableString *delta = [NSMutableString string]; + for (Diff *aDiff in diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + [delta appendFormat:@"+%@\t", [[aDiff.text diff_stringByAddingPercentEscapesForEncodeUriCompatibility] + stringByReplacingOccurrencesOfString:@"%20" withString:@" "]]; + break; + case DIFF_DELETE: + [delta appendFormat:@"-%" PRId32 "\t", (int32_t)aDiff.text.length]; + break; + case DIFF_EQUAL: + [delta appendFormat:@"=%" PRId32 "\t", (int32_t)aDiff.text.length]; + break; + } + } + + if (delta.length != 0) { + // Strip off trailing tab character. + return [delta substringWithRange:NSMakeRange(0, delta.length-1)]; + } + return delta; +} + +/** + * Given the original text1, and an encoded NSString which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source NSString for the diff. + * @param delta Delta text. + * @param error NSError if invalid input. + * @return NSMutableArray of Diff objects or nil if invalid. + */ +- (NSMutableArray *)diff_fromDeltaWithText:(NSString *)text1 + andDelta:(NSString *)delta + error:(NSError **)error; +{ + NSMutableArray *diffs = [NSMutableArray array]; + NSUInteger thisPointer = 0; // Cursor in text1 + NSArray *tokens = [delta componentsSeparatedByString:@"\t"]; + NSInteger n; + NSDictionary *errorDetail = nil; + for (NSString *token in tokens) { + if (token.length == 0) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + NSString *param = [token substringFromIndex:1]; + switch ([token characterAtIndex:0]) { + case '+': + param = [param diff_stringByReplacingPercentEscapesForEncodeUriCompatibility]; + if (param == nil) { + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Invalid character in diff_fromDelta: %@", @"Error"), param], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:99 userInfo:errorDetail]; + } + return nil; + } + [diffs addObject:[Diff diffWithOperation:DIFF_INSERT andText:param]]; + break; + case '-': + // Fall through. + case '=': + n = [param integerValue]; + if (n == 0) { + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Invalid number in diff_fromDelta: %@", @"Error"), param], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:100 userInfo:errorDetail]; + } + return nil; + } else if (n < 0) { + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Negative number in diff_fromDelta: %@", @"Error"), param], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:101 userInfo:errorDetail]; + } + return nil; + } + NSString *text; + @try { + text = [text1 substringWithRange:NSMakeRange(thisPointer, (NSUInteger)n)]; + thisPointer += (NSUInteger)n; + } + @catch (NSException *e) { + if (error != NULL) { + // CHANGME: Pass on the information contained in e + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Delta length (%lu) larger than source text length (%lu).", @"Error"), + (unsigned long)thisPointer, (unsigned long)text1.length], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:102 userInfo:errorDetail]; + } + return nil; + } + if ([token characterAtIndex:0] == '=') { + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:text]]; + } else { + [diffs addObject:[Diff diffWithOperation:DIFF_DELETE andText:text]]; + } + break; + default: + // Anything else is an error. + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Invalid diff operation in diff_fromDelta: %C", @"Error"), + [token characterAtIndex:0]], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:102 userInfo:errorDetail]; + } + return nil; + } + } + if (thisPointer != text1.length) { + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Delta length (%lu) smaller than source text length (%lu).", @"Error"), + (unsigned long)thisPointer, (unsigned long)text1.length], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:103 userInfo:errorDetail]; + } + return nil; + } + return diffs; +} + +/** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs NSMutableArray of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ +- (NSUInteger)diff_xIndexIn:(NSMutableArray *)diffs + location:(NSUInteger) loc; +{ + NSUInteger chars1 = 0; + NSUInteger chars2 = 0; + NSUInteger last_chars1 = 0; + NSUInteger last_chars2 = 0; + Diff *lastDiff = nil; + for (Diff *aDiff in diffs) { + if (aDiff.operation != DIFF_INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length; + } + if (aDiff.operation != DIFF_DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length; + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff != nil && lastDiff.operation == DIFF_DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +} + +/** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs NSMutableArray of Diff objects. + * @return Number of changes. + */ +- (NSUInteger)diff_levenshtein:(NSMutableArray *)diffs; +{ + NSUInteger levenshtein = 0; + NSUInteger insertions = 0; + NSUInteger deletions = 0; + for (Diff *aDiff in diffs) { + switch (aDiff.operation) { + case DIFF_INSERT: + insertions += aDiff.text.length; + break; + case DIFF_DELETE: + deletions += aDiff.text.length; + break; + case DIFF_EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += MAX(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += MAX(insertions, deletions); + return levenshtein; +} + +/** + * Reduce the number of edits by eliminating semantically trivial + * equalities. + * @param diffs NSMutableArray of Diff objects. + */ +- (void)diff_cleanupSemantic:(NSMutableArray *)diffs; +{ +#define prevDiff ((Diff *)[diffs objectAtIndex:(thisPointer - 1)]) +#define thisDiff ((Diff *)[diffs objectAtIndex:thisPointer]) +#define nextDiff ((Diff *)[diffs objectAtIndex:(thisPointer + 1)]) +#define equalitiesLastItem ((NSNumber *)equalities.lastObject) +#define equalitiesLastValue ((NSNumber *)equalities.lastObject).integerValue + + if (diffs == nil || diffs.count == 0) { + return; + } + + BOOL changes = NO; + // Stack of indices where equalities are found. + NSMutableArray *equalities = [NSMutableArray array]; + // Always equal to equalities.lastObject.text + NSString *lastequality = nil; + NSUInteger thisPointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + NSUInteger length_insertions1 = 0; + NSUInteger length_deletions1 = 0; + // Number of characters that changed after the equality. + NSUInteger length_insertions2 = 0; + NSUInteger length_deletions2 = 0; + + NSUInteger indexToChange; + Diff *diffToChange; + + while (thisPointer < diffs.count) { + if (thisDiff.operation == DIFF_EQUAL) { // Equality found. + [equalities addObject:[NSNumber numberWithInteger:thisPointer]]; + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff.text; + } else { // an insertion or deletion + if (thisDiff.operation == DIFF_INSERT) { + length_insertions2 += thisDiff.text.length; + } else { + length_deletions2 += thisDiff.text.length; + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (lastequality != nil + && (lastequality.length <= MAX(length_insertions1, length_deletions1)) + && (lastequality.length <= MAX(length_insertions2, length_deletions2))) { + // Duplicate record. + [diffs insertObject:[Diff diffWithOperation:DIFF_DELETE andText:lastequality] atIndex:equalitiesLastValue]; + // Change second copy to insert. + // Hash values for objects must not change while in a collection. + indexToChange = equalitiesLastValue + 1; + diffToChange = [[diffs objectAtIndex:indexToChange] retain]; + [diffs replaceObjectAtIndex:indexToChange withObject:[NSNull null]]; + diffToChange.operation = DIFF_INSERT; + [diffs replaceObjectAtIndex:indexToChange withObject:diffToChange]; + [diffToChange release]; + + // Throw away the equality we just deleted. + [equalities removeLastObject]; + if (equalities.count > 0) { + [equalities removeLastObject]; + } + // Setting an unsigned value to -1 may seem weird to some, + // but we will pass thru a ++ below: + // => overflow => 0 + thisPointer = equalities.count > 0 ? equalitiesLastValue : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = nil; + changes = YES; + } + } + thisPointer++; + } + + // Normalize the diff. + if (changes) { + [self diff_cleanupMerge:diffs]; + } + [self diff_cleanupSemanticLossless:diffs]; + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + thisPointer = 1; + while (thisPointer < diffs.count) { + if (prevDiff.operation == DIFF_DELETE && thisDiff.operation == DIFF_INSERT) { + NSString *deletion = prevDiff.text; + NSString *insertion = thisDiff.text; + NSUInteger overlap_length1 = (NSUInteger)diff_commonOverlap((CFStringRef)deletion, (CFStringRef)insertion); + NSUInteger overlap_length2 = (NSUInteger)diff_commonOverlap((CFStringRef)insertion, (CFStringRef)deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length / 2.0 || + overlap_length1 >= insertion.length / 2.0) { + // Overlap found. + // Insert an equality and trim the surrounding edits. + [diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL + andText:[insertion substringWithRange:NSMakeRange(0, overlap_length1)]] + atIndex:thisPointer]; + prevDiff.text = [deletion substringWithRange:NSMakeRange(0, deletion.length - overlap_length1)]; + nextDiff.text = [insertion substringFromIndex:overlap_length1]; + thisPointer++; + } + } else { + if (overlap_length2 >= deletion.length / 2.0 || + overlap_length2 >= insertion.length / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + [diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL + andText:[deletion substringWithRange:NSMakeRange(0, overlap_length2)]] + atIndex:thisPointer]; + prevDiff.operation = DIFF_INSERT; + prevDiff.text = [insertion substringWithRange:NSMakeRange(0, insertion.length - overlap_length2)]; + nextDiff.operation = DIFF_DELETE; + nextDiff.text = [deletion substringFromIndex:overlap_length2]; + thisPointer++; + } + } + thisPointer++; + } + thisPointer++; + } + +#undef prevDiff +#undef thisDiff +#undef nextDiff +#undef equalitiesLastItem +#undef equalitiesLastValue +} + +#pragma mark Match Functions +// MATCH FUNCTIONS + + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns NSNotFound if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or NSNotFound. + */ +- (NSUInteger)match_mainForText:(NSString *)text + pattern:(NSString *)pattern + near:(NSUInteger)loc; +{ + // Check for null inputs. + if (text == nil || pattern == nil) { + NSLog(@"Null inputs. (match_main)"); + return NSNotFound; + } + if (text.length == 0) { + NSLog(@"Empty text. (match_main)"); + return NSNotFound; + } + + NSUInteger new_loc; + new_loc = MIN(loc, text.length); + new_loc = MAX((NSUInteger)0, new_loc); + + if ([text isEqualToString:pattern]) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.length == 0) { + // Nothing to match. + return NSNotFound; + } else if (new_loc + pattern.length <= text.length + && [[text substringWithRange:NSMakeRange(new_loc, pattern.length)] isEqualToString:pattern]) { + // Perfect match at the perfect spot! (Includes case of empty pattern) + return new_loc; + } else { + // Do a fuzzy compare. + return [self match_bitapOfText:text andPattern:pattern near:new_loc]; + } +} + +/** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns NSNotFound if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or NSNotFound. + */ +- (NSUInteger)match_bitapOfText:(NSString *)text + andPattern:(NSString *)pattern + near:(NSUInteger)loc; +{ + NSAssert((Match_MaxBits == 0 || pattern.length <= Match_MaxBits), + @"Pattern too long for this application."); + + // Initialise the alphabet. + NSMutableDictionary *s = [self match_alphabet:pattern]; + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + NSUInteger best_loc = [text rangeOfString:pattern options:NSLiteralSearch range:NSMakeRange(loc, text.length - loc)].location; + if (best_loc != NSNotFound) { + score_threshold = MIN([self match_bitapScoreForErrorCount:0 location:best_loc near:loc pattern:pattern], score_threshold); + // What about in the other direction? (speedup) + NSUInteger searchRangeLoc = MIN(loc + pattern.length, text.length); + NSRange searchRange = NSMakeRange(0, searchRangeLoc); + best_loc = [text rangeOfString:pattern options:(NSLiteralSearch | NSBackwardsSearch) range:searchRange].location; + if (best_loc != NSNotFound) { + score_threshold = MIN([self match_bitapScoreForErrorCount:0 location:best_loc near:loc pattern:pattern], score_threshold); + } + } + + // Initialise the bit arrays. + NSUInteger matchmask = 1 << (pattern.length - 1); + best_loc = NSNotFound; + + NSUInteger bin_min, bin_mid; + NSUInteger bin_max = pattern.length + text.length; + NSUInteger *rd = NULL; + NSUInteger *last_rd = NULL; + for (NSUInteger d = 0; d < pattern.length; d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + double score = [self match_bitapScoreForErrorCount:d location:(loc + bin_mid) near:loc pattern:pattern]; + if (score <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + NSUInteger start = MAX_OF_CONST_AND_DIFF(1, loc, bin_mid); + NSUInteger finish = MIN(loc + bin_mid, text.length) + pattern.length; + + rd = (NSUInteger *)calloc((finish + 2), sizeof(NSUInteger)); + rd[finish + 1] = (1 << d) - 1; + + for (NSUInteger j = finish; j >= start; j--) { + NSUInteger charMatch; + if (text.length <= j - 1 || ![s diff_containsObjectForUnicharKey:[text characterAtIndex:(j - 1)]]) { + // Out of range. + charMatch = 0; + } else { + charMatch = [s diff_unsignedIntegerForUnicharKey:[text characterAtIndex:(j - 1)]]; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = [self match_bitapScoreForErrorCount:d location:(j - 1) near:loc pattern:pattern]; + // This match will almost certainly be better than any existing match. + // But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = MAX_OF_CONST_AND_DIFF(1, 2 * loc, best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if ([self match_bitapScoreForErrorCount:(d + 1) location:loc near:loc pattern:pattern] > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + + if (last_rd != NULL) { + free(last_rd); + } + last_rd = rd; + } + + if (rd != NULL && last_rd != rd) { + free(rd); + } + if (last_rd != NULL) { + free(last_rd); + } + + return best_loc; +} + +/** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ +- (double)match_bitapScoreForErrorCount:(NSUInteger)e + location:(NSUInteger)x + near:(NSUInteger)loc + pattern:(NSString *)pattern; +{ + double score; + + double accuracy = (double)e / pattern.length; + NSUInteger proximity = (NSUInteger)ABS((long long)loc - (long long)x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + score = accuracy + (proximity / (double) Match_Distance); + + return score; +} + +/** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations + * (NSMutableDictionary: keys:NSString/unichar, values:NSNumber/NSUInteger). + */ +- (NSMutableDictionary *)match_alphabet:(NSString *)pattern; +{ + NSMutableDictionary *s = [NSMutableDictionary dictionary]; + CFStringRef str = (CFStringRef)pattern; + CFStringInlineBuffer inlineBuffer; + CFIndex length; + CFIndex cnt; + + length = CFStringGetLength(str); + CFStringInitInlineBuffer(str, &inlineBuffer, CFRangeMake(0, length)); + + UniChar ch; + CFStringRef c; + for (cnt = 0; cnt < length; cnt++) { + ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); + c = diff_CFStringCreateFromUnichar(ch); + if (![s diff_containsObjectForKey:(NSString *)c]) { + [s diff_setUnsignedIntegerValue:0 forKey:(NSString *)c]; + } + CFRelease(c); + } + + NSUInteger i = 0; + for (cnt = 0; cnt < length; cnt++) { + ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); + c = diff_CFStringCreateFromUnichar(ch); + NSUInteger value = [s diff_unsignedIntegerForKey:(NSString *)c] | (1 << (pattern.length - i - 1)); + [s diff_setUnsignedIntegerValue:value forKey:(NSString *)c]; + i++; + CFRelease(c); + } + return s; +} + + +#pragma mark Patch Functions +// PATCH FUNCTIONS + + +/** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ +- (void)patch_addContextToPatch:(Patch *)patch + sourceText:(NSString *)text; +{ + if (text.length == 0) { + return; + } + NSString *pattern = [text substringWithRange:NSMakeRange(patch.start2, patch.length1)]; + NSUInteger padding = 0; + + // Look for the first and last matches of pattern in text. If two + // different matches are found, increase the pattern length. + while ([text rangeOfString:pattern options:NSLiteralSearch].location + != [text rangeOfString:pattern options:(NSLiteralSearch | NSBackwardsSearch)].location + && pattern.length < (Match_MaxBits - Patch_Margin - Patch_Margin)) { + padding += Patch_Margin; + pattern = [text diff_javaSubstringFromStart:MAX_OF_CONST_AND_DIFF(0, patch.start2, padding) + toEnd:MIN(text.length, patch.start2 + patch.length1 + padding)]; + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + NSString *prefix = [text diff_javaSubstringFromStart:MAX_OF_CONST_AND_DIFF(0, patch.start2, padding) + toEnd:patch.start2]; + if (prefix.length != 0) { + [patch.diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL andText:prefix] atIndex:0]; + } + // Add the suffix. + NSString *suffix = [text diff_javaSubstringFromStart:(patch.start2 + patch.length1) + toEnd:MIN(text.length, patch.start2 + patch.length1 + padding)]; + if (suffix.length != 0) { + [patch.diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:suffix]]; + } + + // Roll back the start points. + patch.start1 -= prefix.length; + patch.start2 -= prefix.length; + // Extend the lengths. + patch.length1 += prefix.length + suffix.length; + patch.length2 += prefix.length + suffix.length; +} + +/** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return NSMutableArray of Patch objects. + */ +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 + andNewString:(NSString *)text2; +{ + // Check for null inputs. + if (text1 == nil || text2 == nil) { + NSLog(@"Null inputs. (patch_make)"); + return nil; + } + + // No diffs provided, compute our own. + NSMutableArray *diffs = [self diff_mainOfOldString:text1 andNewString:text2 checkLines:YES]; + if (diffs.count > 2) { + [self diff_cleanupSemantic:diffs]; + [self diff_cleanupEfficiency:diffs]; + } + + return [self patch_makeFromOldString:text1 andDiffs:diffs]; +} + +/** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs NSMutableArray of Diff objects for text1 to text2. + * @return NSMutableArray of Patch objects. + */ +- (NSMutableArray *)patch_makeFromDiffs:(NSMutableArray *)diffs; +{ + // Check for nil inputs not needed since nil can't be passed in C#. + // No origin NSString *provided, comAdde our own. + NSString *text1 = [self diff_text1:diffs]; + return [self patch_makeFromOldString:text1 andDiffs:diffs]; +} + +/** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text + * @param text2 New text + * @param diffs NSMutableArray of Diff objects for text1 to text2. + * @return NSMutableArray of Patch objects. + * @deprecated Prefer -patch_makeFromOldString:diffs:. + */ +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 + newString:(NSString *)text2 + diffs:(NSMutableArray *)diffs; +{ + // Check for null inputs. + if (text1 == nil || text2 == nil) { + NSLog(@"Null inputs. (patch_make)"); + return nil; + } + + return [self patch_makeFromOldString:text1 andDiffs:diffs]; +} + +/** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs NSMutableArray of Diff objects for text1 to text2. + * @return NSMutableArray of Patch objects. + */ +- (NSMutableArray *)patch_makeFromOldString:(NSString *)text1 + andDiffs:(NSMutableArray *)diffs; +{ + // Check for null inputs. + if (text1 == nil) { + NSLog(@"Null inputs. (patch_make)"); + return nil; + } + + NSMutableArray *patches = [NSMutableArray array]; + if (diffs.count == 0) { + return patches; // Get rid of the nil case. + } + Patch *patch = [[Patch new] autorelease]; + NSUInteger char_count1 = 0; // Number of characters into the text1 NSString. + NSUInteger char_count2 = 0; // Number of characters into the text2 NSString. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + NSString *prepatch_text = [text1 retain]; + NSMutableString *postpatch_text = [text1 mutableCopy]; + for (Diff *aDiff in diffs) { + if (patch.diffs.count == 0 && aDiff.operation != DIFF_EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case DIFF_INSERT: + [patch.diffs addObject:aDiff]; + patch.length2 += aDiff.text.length; + [postpatch_text insertString:aDiff.text atIndex:char_count2]; + break; + case DIFF_DELETE: + patch.length1 += aDiff.text.length; + [patch.diffs addObject:aDiff]; + [postpatch_text deleteCharactersInRange:NSMakeRange(char_count2, aDiff.text.length)]; + break; + case DIFF_EQUAL: + if (aDiff.text.length <= 2 * Patch_Margin + && [patch.diffs count] != 0 && aDiff != diffs.lastObject) { + // Small equality inside a patch. + [patch.diffs addObject:aDiff]; + patch.length1 += aDiff.text.length; + patch.length2 += aDiff.text.length; + } + + if (aDiff.text.length >= 2 * Patch_Margin) { + // Time for a new patch. + if (patch.diffs.count != 0) { + [self patch_addContextToPatch:patch sourceText:prepatch_text]; + [patches addObject:patch]; + patch = [[Patch new] autorelease]; + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + [prepatch_text release]; + prepatch_text = [postpatch_text copy]; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != DIFF_INSERT) { + char_count1 += aDiff.text.length; + } + if (aDiff.operation != DIFF_DELETE) { + char_count2 += aDiff.text.length; + } + } + // Pick up the leftover patch if not empty. + if (patch.diffs.count != 0) { + [self patch_addContextToPatch:patch sourceText:prepatch_text]; + [patches addObject:patch]; + } + + [prepatch_text release]; + [postpatch_text release]; + + return patches; +} + +/** + * Given an array of patches, return another array that is identical. + * @param patches NSArray of Patch objects. + * @return NSMutableArray of Patch objects. + */ +- (NSMutableArray *)patch_deepCopy:(NSArray *)patches; +{ + NSMutableArray *patchesCopy = [[NSMutableArray alloc] initWithArray:patches copyItems:YES]; + return patchesCopy; +} + +/** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of YES/NO values indicating which patches were applied. + * @param patches NSMutableArray of Patch objects + * @param text Old text. + * @return Two element NSArray, containing the new text and an array of + * BOOL values. + */ +- (NSArray *)patch_apply:(NSArray *)sourcePatches + toString:(NSString *)text; +{ + if (sourcePatches.count == 0) { + return [NSArray arrayWithObjects:text, [NSMutableArray array], nil]; + } + + // Deep copy the patches so that no changes are made to originals. + NSMutableArray *patches = [self patch_deepCopy:sourcePatches]; + + NSMutableString *textMutable = [[text mutableCopy] autorelease]; + + NSString *nullPadding = [self patch_addPadding:patches]; + [textMutable insertString:nullPadding atIndex:0]; + [textMutable appendString:nullPadding]; + [self patch_splitMax:patches]; + + NSUInteger x = 0; + // delta keeps track of the offset between the expected and actual + // location of the previous patch. If there are patches expected at + // positions 10 and 20, but the first patch was found at 12, delta is 2 + // and the second patch has an effective expected position of 22. + NSUInteger delta = 0; + BOOL *results = (BOOL *)calloc(patches.count, sizeof(BOOL)); + for (Patch *aPatch in patches) { + NSUInteger expected_loc = aPatch.start2 + delta; + NSString *text1 = [self diff_text1:aPatch.diffs]; + NSUInteger start_loc; + NSUInteger end_loc = NSNotFound; + if (text1.length > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern + // in the case of a monster delete. + start_loc = [self match_mainForText:textMutable + pattern:[text1 substringWithRange:NSMakeRange(0, Match_MaxBits)] + near:expected_loc]; + if (start_loc != NSNotFound) { + end_loc = [self match_mainForText:textMutable + pattern:[text1 substringFromIndex:text1.length - Match_MaxBits] + near:(expected_loc + text1.length - Match_MaxBits)]; + if (end_loc == NSNotFound || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = NSNotFound; + } + } + } else { + start_loc = [self match_mainForText:textMutable pattern:text1 near:expected_loc]; + } + if (start_loc == NSNotFound) { + // No match found. :( + results[x] = NO; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = YES; + delta = start_loc - expected_loc; + NSString *text2; + if (end_loc == NSNotFound) { + text2 = [textMutable diff_javaSubstringFromStart:start_loc + toEnd:MIN(start_loc + text1.length, textMutable.length)]; + } else { + text2 = [textMutable diff_javaSubstringFromStart:start_loc + toEnd:MIN(end_loc + Match_MaxBits, textMutable.length)]; + } + if (text1 == text2) { + // Perfect match, just shove the Replacement text in. + [textMutable replaceCharactersInRange:NSMakeRange(start_loc, text1.length) withString:[self diff_text2:aPatch.diffs]]; + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + NSMutableArray *diffs = [self diff_mainOfOldString:text1 andNewString:text2 checkLines:NO]; + if (text1.length > Match_MaxBits + && ([self diff_levenshtein:diffs] / (float)text1.length) + > Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = NO; + } else { + [self diff_cleanupSemanticLossless:diffs]; + NSUInteger index1 = 0; + for (Diff *aDiff in aPatch.diffs) { + if (aDiff.operation != DIFF_EQUAL) { + NSUInteger index2 = [self diff_xIndexIn:diffs location:index1]; + if (aDiff.operation == DIFF_INSERT) { + // Insertion + [textMutable insertString:aDiff.text atIndex:(start_loc + index2)]; + } else if (aDiff.operation == DIFF_DELETE) { + // Deletion + [textMutable deleteCharactersInRange:NSMakeRange(start_loc + index2, + ([self diff_xIndexIn:diffs + location:(index1 + aDiff.text.length)] - index2))]; + } + } + if (aDiff.operation != DIFF_DELETE) { + index1 += aDiff.text.length; + } + } + } + } + } + x++; + } + + NSMutableArray *resultsArray = [NSMutableArray arrayWithCapacity:patches.count]; + for (NSUInteger i = 0; i < patches.count; i++) { + [resultsArray addObject:[NSNumber numberWithBool:(results[i])]]; + } + + if (results != NULL) { + free(results); + } + + // Strip the padding off. + text = [textMutable substringWithRange:NSMakeRange(nullPadding.length, + textMutable.length - 2 * nullPadding.length)]; + [patches release]; + return [NSArray arrayWithObjects:text, resultsArray, nil]; +} + +/** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches NSMutableArray of Patch objects. + * @return The padding NSString added to each side. + */ +- (NSString *)patch_addPadding:(NSMutableArray *)patches; +{ + uint16_t paddingLength = Patch_Margin; + NSMutableString *nullPadding = [NSMutableString string]; + for (UniChar x = 1; x <= paddingLength; x++) { + CFStringAppendCharacters((CFMutableStringRef)nullPadding, &x, 1); + } + + // Bump all the patches forward. + for (Patch *aPatch in patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch *patch = [patches objectAtIndex:0]; + NSMutableArray *diffs = patch.diffs; + if (diffs.count == 0 || ((Diff *)[diffs objectAtIndex:0]).operation != DIFF_EQUAL) { + // Add nullPadding equality. + [diffs insertObject:[Diff diffWithOperation:DIFF_EQUAL andText:nullPadding] atIndex:0]; + patch.start1 -= paddingLength; // Should be 0. + patch.start2 -= paddingLength; // Should be 0. + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > ((Diff *)[diffs objectAtIndex:0]).text.length) { + // Grow first equality. + Diff *firstDiff = [diffs objectAtIndex:0]; + NSUInteger extraLength = paddingLength - firstDiff.text.length; + firstDiff.text = [[nullPadding substringFromIndex:(firstDiff.text.length)] + stringByAppendingString:firstDiff.text]; + patch.start1 -= extraLength; + patch.start2 -= extraLength; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + // Add some padding on end of last diff. + patch = patches.lastObject; + diffs = patch.diffs; + if (diffs.count == 0 || ((Diff *)diffs.lastObject).operation != DIFF_EQUAL) { + // Add nullPadding equality. + [diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:nullPadding]]; + patch.length1 += paddingLength; + patch.length2 += paddingLength; + } else if (paddingLength > ((Diff *)diffs.lastObject).text.length) { + // Grow last equality. + Diff *lastDiff = diffs.lastObject; + NSUInteger extraLength = paddingLength - lastDiff.text.length; + lastDiff.text = [lastDiff.text stringByAppendingString:[nullPadding substringWithRange:NSMakeRange(0, extraLength)]]; + patch.length1 += extraLength; + patch.length2 += extraLength; + } + + return nullPadding; +} + +/** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches NSMutableArray of Patch objects. + */ +- (void)patch_splitMax:(NSMutableArray *)patches; +{ + NSUInteger patch_size = Match_MaxBits; + for (NSUInteger x = 0; x < patches.count; x++) { + if (((Patch *)[patches objectAtIndex:x]).length1 <= patch_size) { + continue; + } + Patch *bigpatch = [[patches objectAtIndex:x] retain]; + // Remove the big old patch. + splice(patches, x--, 1, nil); + NSUInteger start1 = bigpatch.start1; + NSUInteger start2 = bigpatch.start2; + NSString *precontext = @""; + while (bigpatch.diffs.count != 0) { + // Create one of several smaller patches. + Patch *patch = [[Patch new] autorelease]; + BOOL empty = YES; + patch.start1 = start1 - precontext.length; + patch.start2 = start2 - precontext.length; + if (precontext.length != 0) { + patch.length1 = patch.length2 = precontext.length; + [patch.diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:precontext]]; + } + while (bigpatch.diffs.count != 0 + && patch.length1 < patch_size - self.Patch_Margin) { + Operation diff_type = ((Diff *)[bigpatch.diffs objectAtIndex:0]).operation; + NSString *diff_text = ((Diff *)[bigpatch.diffs objectAtIndex:0]).text; + if (diff_type == DIFF_INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length; + start2 += diff_text.length; + [patch.diffs addObject:[bigpatch.diffs objectAtIndex:0]]; + [bigpatch.diffs removeObjectAtIndex:0]; + empty = NO; + } else if (diff_type == DIFF_DELETE && patch.diffs.count == 1 + && ((Diff *)[patch.diffs objectAtIndex:0]).operation == DIFF_EQUAL + && diff_text.length > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length; + start1 += diff_text.length; + empty = NO; + [patch.diffs addObject:[Diff diffWithOperation:diff_type andText:diff_text]]; + [bigpatch.diffs removeObjectAtIndex:0]; + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = [diff_text substringWithRange:NSMakeRange(0, + MIN(diff_text.length, + (patch_size - patch.length1 - Patch_Margin)))]; + patch.length1 += diff_text.length; + start1 += diff_text.length; + if (diff_type == DIFF_EQUAL) { + patch.length2 += diff_text.length; + start2 += diff_text.length; + } else { + empty = NO; + } + [patch.diffs addObject:[Diff diffWithOperation:diff_type andText:diff_text]]; + if (diff_text == ((Diff *)[bigpatch.diffs objectAtIndex:0]).text) { + [bigpatch.diffs removeObjectAtIndex:0]; + } else { + Diff *firstDiff = [bigpatch.diffs objectAtIndex:0]; + firstDiff.text = [firstDiff.text substringFromIndex:diff_text.length]; + } + } + } + // Compute the head context for the next patch. + precontext = [self diff_text2:patch.diffs]; + precontext = [precontext substringFromIndex:MAX_OF_CONST_AND_DIFF(0, precontext.length, Patch_Margin)]; + + NSString *postcontext = nil; + // Append the end context for this patch. + if ([self diff_text1:bigpatch.diffs].length > Patch_Margin) { + postcontext = [[self diff_text1:bigpatch.diffs] + substringWithRange:NSMakeRange(0, Patch_Margin)]; + } else { + postcontext = [self diff_text1:bigpatch.diffs]; + } + + if (postcontext.length != 0) { + patch.length1 += postcontext.length; + patch.length2 += postcontext.length; + if (patch.diffs.count != 0 + && ((Diff *)[patch.diffs objectAtIndex:(patch.diffs.count - 1)]).operation + == DIFF_EQUAL) { + Diff *lastDiff = [patch.diffs lastObject]; + lastDiff.text = [lastDiff.text stringByAppendingString:postcontext]; + } else { + [patch.diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:postcontext]]; + } + } + if (!empty) { + splice(patches, ++x, 0, [NSMutableArray arrayWithObject:patch]); + } + } + + [bigpatch release]; + + } +} + +/** + * Take a list of patches and return a textual representation. + * @param patches NSMutableArray of Patch objects. + * @return Text representation of patches. + */ +- (NSString *)patch_toText:(NSMutableArray *)patches; +{ + NSMutableString *text = [NSMutableString string]; + for (Patch *aPatch in patches) { + [text appendString:[aPatch description]]; + } + return text; +} + +/** + * Parse a textual representation of patches and return a NSMutableArray of + * Patch objects. + * @param textline Text representation of patches. + * @param error NSError if invalid input. + * @return NSMutableArray of Patch objects. + */ +- (NSMutableArray *)patch_fromText:(NSString *)textline + error:(NSError **)error; +{ + NSMutableArray *patches = [NSMutableArray array]; + if (textline.length == 0) { + return patches; + } + NSArray *text = [textline componentsSeparatedByString:@"\n"]; + NSUInteger textPointer = 0; + Patch *patch; + //NSString *patchHeader = @"^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"; + NSString *patchHeaderStart = @"@@ -"; + NSString *patchHeaderMid = @"+"; + NSString *patchHeaderEnd = @"@@"; + NSString *optionalValueDelimiter = @","; + BOOL scanSuccess, hasOptional; + NSInteger scannedValue, optionalValue; + NSDictionary *errorDetail = nil; + + unichar sign; + NSString *line; + while (textPointer < text.count) { + NSString *thisLine = [text objectAtIndex:textPointer]; + NSScanner *theScanner = [NSScanner scannerWithString:thisLine]; + patch = [[Patch new] autorelease]; + + scanSuccess = ([theScanner scanString:patchHeaderStart intoString:NULL] + && [theScanner scanInteger:&scannedValue]); + + if (scanSuccess) { + patch.start1 = scannedValue; + + hasOptional = [theScanner scanString:optionalValueDelimiter intoString:NULL]; + + if (hasOptional) { + // First set has an optional value. + scanSuccess = [theScanner scanInteger:&optionalValue]; + if (scanSuccess) { + if (optionalValue == 0) { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = optionalValue; + } + } + } else { + patch.start1--; + patch.length1 = 1; + } + + if (scanSuccess) { + scanSuccess = ([theScanner scanString:patchHeaderMid intoString:NULL] + && [theScanner scanInteger:&scannedValue]); + + if (scanSuccess) { + patch.start2 = scannedValue; + + hasOptional = [theScanner scanString:optionalValueDelimiter intoString:NULL]; + + if (hasOptional) { + // Second set has an optional value. + scanSuccess = [theScanner scanInteger:&optionalValue]; + if (scanSuccess) { + if (optionalValue == 0) { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = optionalValue; + } + } + } else { + patch.start2--; + patch.length2 = 1; + } + + if (scanSuccess) { + scanSuccess = ([theScanner scanString:patchHeaderEnd intoString:NULL] + && [theScanner isAtEnd] == YES); + } + } + } + } + + if (!scanSuccess) { + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Invalid patch string: %@", @"Error"), + [text objectAtIndex:textPointer]], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:104 userInfo:errorDetail]; + } + return nil; + } + + [patches addObject:patch]; + + textPointer++; + + while (textPointer < text.count) { + @try { + sign = [[text objectAtIndex:textPointer] characterAtIndex:0]; + } + @catch (NSException *e) { + // Blank line? Whatever. + textPointer++; + continue; + } + line = [[[text objectAtIndex:textPointer] substringFromIndex:1] + diff_stringByReplacingPercentEscapesForEncodeUriCompatibility]; + if (sign == '-') { + // Deletion. + [patch.diffs addObject:[Diff diffWithOperation:DIFF_DELETE andText:line]]; + } else if (sign == '+') { + // Insertion. + [patch.diffs addObject:[Diff diffWithOperation:DIFF_INSERT andText:line]]; + } else if (sign == ' ') { + // Minor equality. + [patch.diffs addObject:[Diff diffWithOperation:DIFF_EQUAL andText:line]]; + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + if (error != NULL) { + errorDetail = [NSDictionary dictionaryWithObjectsAndKeys: + [NSString stringWithFormat:NSLocalizedString(@"Invalid patch mode '%C' in: %@", @"Error"), sign, line], + NSLocalizedDescriptionKey, nil]; + *error = [NSError errorWithDomain:@"DiffMatchPatchErrorDomain" code:104 userInfo:errorDetail]; + } + return nil; + } + textPointer++; + } + } + return patches; +} + +@end diff --git a/objectivec/DiffMatchPatch.xcodeproj/project.pbxproj b/objectivec/DiffMatchPatch.xcodeproj/project.pbxproj new file mode 100755 index 0000000..f2f7637 --- /dev/null +++ b/objectivec/DiffMatchPatch.xcodeproj/project.pbxproj @@ -0,0 +1,771 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 3D08D25412A71B9C007A5316 /* NSString+UnicharUtilities.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D08D25212A71B9C007A5316 /* NSString+UnicharUtilities.m */; }; + 3D0D3FAB128CBD350093B0C7 /* DiffMatchPatch.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8DC2EF5B0486A6940098B216 /* DiffMatchPatch.framework */; }; + 3D5BBFDF128C416900B8F5FF /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0867D69BFE84028FC02AAC07 /* Foundation.framework */; }; + 3D5BC080128C44A700B8F5FF /* DiffMatchPatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 3D5BC07E128C44A700B8F5FF /* DiffMatchPatch.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 3D5BC081128C44A700B8F5FF /* DiffMatchPatch.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D5BC07F128C44A700B8F5FF /* DiffMatchPatch.m */; }; + 3D70BCC9128EDAF80078D1A6 /* NSString+UriCompatibility.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D70BCC7128EDAF80078D1A6 /* NSString+UriCompatibility.m */; }; + 3D96F3BC12AFC6D800C3E5C0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 0867D69BFE84028FC02AAC07 /* Foundation.framework */; }; + 3D96F44A12AFC82F00C3E5C0 /* speedtest.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D96F44912AFC82F00C3E5C0 /* speedtest.m */; }; + 3D96F44F12AFC96A00C3E5C0 /* DiffMatchPatch.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8DC2EF5B0486A6940098B216 /* DiffMatchPatch.framework */; }; + 3DA641D1128DE21D00B33CE9 /* DiffMatchPatchCFUtilities.c in Sources */ = {isa = PBXBuildFile; fileRef = 3DA64170128DDA3400B33CE9 /* DiffMatchPatchCFUtilities.c */; }; + 3DA64278128DE8C900B33CE9 /* DiffMatchPatchTest.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D5BC0DE128CAAE400B8F5FF /* DiffMatchPatchTest.m */; }; + 3DC87026129FF4B6001F602B /* NSString+JavaSubstring.m in Sources */ = {isa = PBXBuildFile; fileRef = 3DC87024129FF4B6001F602B /* NSString+JavaSubstring.m */; }; + 3DEE4C73129D484D00885485 /* NSMutableDictionary+DMPExtensions.m in Sources */ = {isa = PBXBuildFile; fileRef = 3DEE4C71129D484D00885485 /* NSMutableDictionary+DMPExtensions.m */; }; + 3DFDB2FA12AEC7700084EFE3 /* libstdc++.6.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 3DFDB2F912AEC7700084EFE3 /* libstdc++.6.dylib */; }; + 3DFDB2FB12AEC77E0084EFE3 /* libstdc++.6.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 3DFDB2F912AEC7700084EFE3 /* libstdc++.6.dylib */; }; + 8DC2EF530486A6940098B216 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 089C1666FE841158C02AAC07 /* InfoPlist.strings */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 3D96F3B812AFC6CD00C3E5C0 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 0867D690FE84028FC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 8DC2EF4F0486A6940098B216; + remoteInfo = DiffMatchPatch; + }; + 3DA64279128DE8CD00B33CE9 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 0867D690FE84028FC02AAC07 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 8DC2EF4F0486A6940098B216; + remoteInfo = DiffMatchPatch; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 0867D69BFE84028FC02AAC07 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = /System/Library/Frameworks/Foundation.framework; sourceTree = ""; }; + 0867D6A5FE840307C02AAC07 /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = /System/Library/Frameworks/AppKit.framework; sourceTree = ""; }; + 089C1667FE841158C02AAC07 /* English */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.strings; name = English; path = English.lproj/InfoPlist.strings; sourceTree = ""; }; + 1058C7B1FEA5585E11CA2CBB /* Cocoa.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Cocoa.framework; path = /System/Library/Frameworks/Cocoa.framework; sourceTree = ""; }; + 32DBCF5E0370ADEE00C91783 /* DiffMatchPatch_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiffMatchPatch_Prefix.pch; sourceTree = ""; }; + 3D08D25112A71B9C007A5316 /* NSString+UnicharUtilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+UnicharUtilities.h"; sourceTree = ""; }; + 3D08D25212A71B9C007A5316 /* NSString+UnicharUtilities.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+UnicharUtilities.m"; sourceTree = ""; }; + 3D105A0912DA5F9D002111E1 /* Base+SnowLeopard.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = "Base+SnowLeopard.xcconfig"; sourceTree = ""; }; + 3D510E1012BEBA44008C7CE7 /* Base.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Base.xcconfig; sourceTree = ""; }; + 3D510E1112BEBA44008C7CE7 /* Version.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Version.xcconfig; sourceTree = ""; }; + 3D5BC07E128C44A700B8F5FF /* DiffMatchPatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiffMatchPatch.h; sourceTree = ""; }; + 3D5BC07F128C44A700B8F5FF /* DiffMatchPatch.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DiffMatchPatch.m; sourceTree = ""; }; + 3D5BC0AF128CA8E200B8F5FF /* DiffMatchPatchTest.octest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = DiffMatchPatchTest.octest; sourceTree = BUILT_PRODUCTS_DIR; }; + 3D5BC0B0128CA8E200B8F5FF /* DiffMatchPatchTest-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "DiffMatchPatchTest-Info.plist"; sourceTree = ""; }; + 3D5BC0DD128CAAE400B8F5FF /* DiffMatchPatchTest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiffMatchPatchTest.h; sourceTree = ""; }; + 3D5BC0DE128CAAE400B8F5FF /* DiffMatchPatchTest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DiffMatchPatchTest.m; sourceTree = ""; }; + 3D5BC104128CAD6900B8F5FF /* SenTestingKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = SenTestingKit.framework; path = Library/Frameworks/SenTestingKit.framework; sourceTree = DEVELOPER_DIR; }; + 3D70BCC6128EDAF80078D1A6 /* NSString+UriCompatibility.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+UriCompatibility.h"; sourceTree = ""; }; + 3D70BCC7128EDAF80078D1A6 /* NSString+UriCompatibility.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+UriCompatibility.m"; sourceTree = ""; }; + 3D96F3A212AFC68900C3E5C0 /* speedtest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = speedtest; sourceTree = BUILT_PRODUCTS_DIR; }; + 3D96F43E12AFC76600C3E5C0 /* speedtest_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = speedtest_Prefix.pch; path = ../speedtest_Prefix.pch; sourceTree = ""; }; + 3D96F44912AFC82F00C3E5C0 /* speedtest.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = speedtest.m; sourceTree = ""; }; + 3DA6416F128DDA3400B33CE9 /* DiffMatchPatchCFUtilities.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DiffMatchPatchCFUtilities.h; sourceTree = ""; }; + 3DA64170128DDA3400B33CE9 /* DiffMatchPatchCFUtilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = DiffMatchPatchCFUtilities.c; sourceTree = ""; }; + 3DA642BC128DEF4900B33CE9 /* MinMaxMacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MinMaxMacros.h; sourceTree = ""; }; + 3DC87023129FF4B6001F602B /* NSString+JavaSubstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+JavaSubstring.h"; sourceTree = ""; }; + 3DC87024129FF4B6001F602B /* NSString+JavaSubstring.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+JavaSubstring.m"; sourceTree = ""; }; + 3DEE4C70129D484D00885485 /* NSMutableDictionary+DMPExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSMutableDictionary+DMPExtensions.h"; sourceTree = ""; }; + 3DEE4C71129D484D00885485 /* NSMutableDictionary+DMPExtensions.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSMutableDictionary+DMPExtensions.m"; sourceTree = ""; }; + 3DFDB2F912AEC7700084EFE3 /* libstdc++.6.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = "libstdc++.6.dylib"; path = "usr/lib/libstdc++.6.dylib"; sourceTree = SDKROOT; }; + 8DC2EF5A0486A6940098B216 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 8DC2EF5B0486A6940098B216 /* DiffMatchPatch.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = DiffMatchPatch.framework; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 3D5BC0AC128CA8E200B8F5FF /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D0D3FAB128CBD350093B0C7 /* DiffMatchPatch.framework in Frameworks */, + 3DFDB2FA12AEC7700084EFE3 /* libstdc++.6.dylib in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 3D96F3A012AFC68900C3E5C0 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D96F44F12AFC96A00C3E5C0 /* DiffMatchPatch.framework in Frameworks */, + 3D96F3BC12AFC6D800C3E5C0 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 8DC2EF560486A6940098B216 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D5BBFDF128C416900B8F5FF /* Foundation.framework in Frameworks */, + 3DFDB2FB12AEC77E0084EFE3 /* libstdc++.6.dylib in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 034768DFFF38A50411DB9C8B /* Products */ = { + isa = PBXGroup; + children = ( + 8DC2EF5B0486A6940098B216 /* DiffMatchPatch.framework */, + 3D5BC0AF128CA8E200B8F5FF /* DiffMatchPatchTest.octest */, + 3D96F3A212AFC68900C3E5C0 /* speedtest */, + ); + name = Products; + sourceTree = ""; + }; + 0867D691FE84028FC02AAC07 /* DiffMatchPatch */ = { + isa = PBXGroup; + children = ( + 08FB77AEFE84172EC02AAC07 /* Classes */, + 32C88DFF0371C24200C91783 /* Other Sources */, + 089C1665FE841158C02AAC07 /* Resources */, + 3D510E0F12BEBA44008C7CE7 /* Configurations */, + 0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */, + 3D5BC0D9128CAA7600B8F5FF /* Tests */, + 034768DFFF38A50411DB9C8B /* Products */, + ); + name = DiffMatchPatch; + sourceTree = ""; + }; + 0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */ = { + isa = PBXGroup; + children = ( + 1058C7B0FEA5585E11CA2CBB /* Linked Frameworks */, + 1058C7B2FEA5585E11CA2CBB /* Other Frameworks */, + ); + name = "External Frameworks and Libraries"; + sourceTree = ""; + }; + 089C1665FE841158C02AAC07 /* Resources */ = { + isa = PBXGroup; + children = ( + 8DC2EF5A0486A6940098B216 /* Info.plist */, + 089C1666FE841158C02AAC07 /* InfoPlist.strings */, + ); + name = Resources; + sourceTree = ""; + }; + 08FB77AEFE84172EC02AAC07 /* Classes */ = { + isa = PBXGroup; + children = ( + 3D5BC07E128C44A700B8F5FF /* DiffMatchPatch.h */, + 3D5BC07F128C44A700B8F5FF /* DiffMatchPatch.m */, + 3DC87023129FF4B6001F602B /* NSString+JavaSubstring.h */, + 3DC87024129FF4B6001F602B /* NSString+JavaSubstring.m */, + 3D08D25112A71B9C007A5316 /* NSString+UnicharUtilities.h */, + 3D08D25212A71B9C007A5316 /* NSString+UnicharUtilities.m */, + 3D70BCC6128EDAF80078D1A6 /* NSString+UriCompatibility.h */, + 3D70BCC7128EDAF80078D1A6 /* NSString+UriCompatibility.m */, + 3DEE4C70129D484D00885485 /* NSMutableDictionary+DMPExtensions.h */, + 3DEE4C71129D484D00885485 /* NSMutableDictionary+DMPExtensions.m */, + ); + name = Classes; + sourceTree = ""; + }; + 1058C7B0FEA5585E11CA2CBB /* Linked Frameworks */ = { + isa = PBXGroup; + children = ( + 1058C7B1FEA5585E11CA2CBB /* Cocoa.framework */, + ); + name = "Linked Frameworks"; + sourceTree = ""; + }; + 1058C7B2FEA5585E11CA2CBB /* Other Frameworks */ = { + isa = PBXGroup; + children = ( + 0867D6A5FE840307C02AAC07 /* AppKit.framework */, + 0867D69BFE84028FC02AAC07 /* Foundation.framework */, + 3DFDB2F912AEC7700084EFE3 /* libstdc++.6.dylib */, + ); + name = "Other Frameworks"; + sourceTree = ""; + }; + 32C88DFF0371C24200C91783 /* Other Sources */ = { + isa = PBXGroup; + children = ( + 32DBCF5E0370ADEE00C91783 /* DiffMatchPatch_Prefix.pch */, + 3DA642BC128DEF4900B33CE9 /* MinMaxMacros.h */, + 3DA6416F128DDA3400B33CE9 /* DiffMatchPatchCFUtilities.h */, + 3DA64170128DDA3400B33CE9 /* DiffMatchPatchCFUtilities.c */, + ); + name = "Other Sources"; + sourceTree = ""; + }; + 3D510E0F12BEBA44008C7CE7 /* Configurations */ = { + isa = PBXGroup; + children = ( + 3D510E1112BEBA44008C7CE7 /* Version.xcconfig */, + 3D510E1012BEBA44008C7CE7 /* Base.xcconfig */, + 3D105A0912DA5F9D002111E1 /* Base+SnowLeopard.xcconfig */, + ); + path = Configurations; + sourceTree = ""; + }; + 3D5BC0D9128CAA7600B8F5FF /* Tests */ = { + isa = PBXGroup; + children = ( + 3D5BC104128CAD6900B8F5FF /* SenTestingKit.framework */, + 3D5BC0B0128CA8E200B8F5FF /* DiffMatchPatchTest-Info.plist */, + 3D5BC0DD128CAAE400B8F5FF /* DiffMatchPatchTest.h */, + 3D5BC0DE128CAAE400B8F5FF /* DiffMatchPatchTest.m */, + 3D96F43E12AFC76600C3E5C0 /* speedtest_Prefix.pch */, + 3D96F44912AFC82F00C3E5C0 /* speedtest.m */, + ); + path = Tests; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 8DC2EF500486A6940098B216 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D5BC080128C44A700B8F5FF /* DiffMatchPatch.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 3D5BC0AE128CA8E200B8F5FF /* DiffMatchPatchTest */ = { + isa = PBXNativeTarget; + buildConfigurationList = 3D5BC0B3128CA8E500B8F5FF /* Build configuration list for PBXNativeTarget "DiffMatchPatchTest" */; + buildPhases = ( + 3D5BC0AA128CA8E200B8F5FF /* Resources */, + 3D5BC0AB128CA8E200B8F5FF /* Sources */, + 3D5BC0AC128CA8E200B8F5FF /* Frameworks */, + 3D5BC0AD128CA8E200B8F5FF /* ShellScript */, + ); + buildRules = ( + ); + dependencies = ( + 3DA6427A128DE8CD00B33CE9 /* PBXTargetDependency */, + ); + name = DiffMatchPatchTest; + productName = DiffMatchPatchTest; + productReference = 3D5BC0AF128CA8E200B8F5FF /* DiffMatchPatchTest.octest */; + productType = "com.apple.product-type.bundle"; + }; + 3D96F3A112AFC68900C3E5C0 /* speedtest */ = { + isa = PBXNativeTarget; + buildConfigurationList = 3D96F3B512AFC68E00C3E5C0 /* Build configuration list for PBXNativeTarget "speedtest" */; + buildPhases = ( + 3D96F39F12AFC68900C3E5C0 /* Sources */, + 3D96F3A012AFC68900C3E5C0 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + 3D96F3B912AFC6CD00C3E5C0 /* PBXTargetDependency */, + ); + name = speedtest; + productName = speedtest; + productReference = 3D96F3A212AFC68900C3E5C0 /* speedtest */; + productType = "com.apple.product-type.tool"; + }; + 8DC2EF4F0486A6940098B216 /* DiffMatchPatch */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB91AD08733DA50010E9CD /* Build configuration list for PBXNativeTarget "DiffMatchPatch" */; + buildPhases = ( + 8DC2EF500486A6940098B216 /* Headers */, + 8DC2EF520486A6940098B216 /* Resources */, + 8DC2EF540486A6940098B216 /* Sources */, + 8DC2EF560486A6940098B216 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = DiffMatchPatch; + productInstallPath = "$(HOME)/Library/Frameworks"; + productName = DiffMatchPatch; + productReference = 8DC2EF5B0486A6940098B216 /* DiffMatchPatch.framework */; + productType = "com.apple.product-type.framework"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 0867D690FE84028FC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB91B108733DA50010E9CD /* Build configuration list for PBXProject "DiffMatchPatch" */; + compatibilityVersion = "Xcode 3.1"; + developmentRegion = English; + hasScannedForEncodings = 1; + knownRegions = ( + English, + Japanese, + French, + German, + ); + mainGroup = 0867D691FE84028FC02AAC07 /* DiffMatchPatch */; + productRefGroup = 034768DFFF38A50411DB9C8B /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8DC2EF4F0486A6940098B216 /* DiffMatchPatch */, + 3D5BC0AE128CA8E200B8F5FF /* DiffMatchPatchTest */, + 3D96F3A112AFC68900C3E5C0 /* speedtest */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 3D5BC0AA128CA8E200B8F5FF /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 8DC2EF520486A6940098B216 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 8DC2EF530486A6940098B216 /* InfoPlist.strings in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXShellScriptBuildPhase section */ + 3D5BC0AD128CA8E200B8F5FF /* ShellScript */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "# Run the unit tests in this test bundle.\n\"${SYSTEM_DEVELOPER_DIR}/Tools/RunUnitTests\"\n"; + }; +/* End PBXShellScriptBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 3D5BC0AB128CA8E200B8F5FF /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 3DA64278128DE8C900B33CE9 /* DiffMatchPatchTest.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 3D96F39F12AFC68900C3E5C0 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D96F44A12AFC82F00C3E5C0 /* speedtest.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 8DC2EF540486A6940098B216 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 3D5BC081128C44A700B8F5FF /* DiffMatchPatch.m in Sources */, + 3DA641D1128DE21D00B33CE9 /* DiffMatchPatchCFUtilities.c in Sources */, + 3D70BCC9128EDAF80078D1A6 /* NSString+UriCompatibility.m in Sources */, + 3DEE4C73129D484D00885485 /* NSMutableDictionary+DMPExtensions.m in Sources */, + 3DC87026129FF4B6001F602B /* NSString+JavaSubstring.m in Sources */, + 3D08D25412A71B9C007A5316 /* NSString+UnicharUtilities.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 3D96F3B912AFC6CD00C3E5C0 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 8DC2EF4F0486A6940098B216 /* DiffMatchPatch */; + targetProxy = 3D96F3B812AFC6CD00C3E5C0 /* PBXContainerItemProxy */; + }; + 3DA6427A128DE8CD00B33CE9 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 8DC2EF4F0486A6940098B216 /* DiffMatchPatch */; + targetProxy = 3DA64279128DE8CD00B33CE9 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin PBXVariantGroup section */ + 089C1666FE841158C02AAC07 /* InfoPlist.strings */ = { + isa = PBXVariantGroup; + children = ( + 089C1667FE841158C02AAC07 /* English */, + ); + name = InfoPlist.strings; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 1DEB91AE08733DA50010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + FRAMEWORK_VERSION = A; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = DiffMatchPatch_Prefix.pch; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "@executable_path/../Frameworks"; + PRODUCT_NAME = DiffMatchPatch; + WRAPPER_EXTENSION = framework; + }; + name = Debug; + }; + 1DEB91AF08733DA50010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + FRAMEWORK_VERSION = A; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = DiffMatchPatch_Prefix.pch; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "@executable_path/../Frameworks"; + PRODUCT_NAME = DiffMatchPatch; + WRAPPER_EXTENSION = framework; + }; + name = Release; + }; + 1DEB91B208733DA50010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3D510E1012BEBA44008C7CE7 /* Base.xcconfig */; + buildSettings = { + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_UNINITIALIZED_AUTOS = NO; + ONLY_ACTIVE_ARCH = YES; + }; + name = Debug; + }; + 1DEB91B308733DA50010E9CD /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3D510E1012BEBA44008C7CE7 /* Base.xcconfig */; + buildSettings = { + }; + name = Release; + }; + 3D105A0C12DA601C002111E1 /* Debug 10.6 */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3D105A0912DA5F9D002111E1 /* Base+SnowLeopard.xcconfig */; + buildSettings = { + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_UNINITIALIZED_AUTOS = NO; + ONLY_ACTIVE_ARCH = YES; + }; + name = "Debug 10.6"; + }; + 3D105A0D12DA601C002111E1 /* Debug 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + FRAMEWORK_VERSION = A; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = DiffMatchPatch_Prefix.pch; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "@executable_path/../Frameworks"; + PRODUCT_NAME = DiffMatchPatch; + WRAPPER_EXTENSION = framework; + }; + name = "Debug 10.6"; + }; + 3D105A0E12DA601C002111E1 /* Debug 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + FRAMEWORK_SEARCH_PATHS = "$(DEVELOPER_LIBRARY_DIR)/Frameworks"; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(SYSTEM_LIBRARY_DIR)/Frameworks/Cocoa.framework/Headers/Cocoa.h"; + INFOPLIST_FILE = "Tests/DiffMatchPatchTest-Info.plist"; + INSTALL_PATH = "$(USER_LIBRARY_DIR)/Bundles"; + OTHER_LDFLAGS = ( + "-framework", + Cocoa, + "-framework", + SenTestingKit, + ); + PREBINDING = NO; + PRODUCT_NAME = DiffMatchPatchTest; + WRAPPER_EXTENSION = octest; + }; + name = "Debug 10.6"; + }; + 3D105A0F12DA601C002111E1 /* Debug 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = speedtest_Prefix.pch; + INSTALL_PATH = /usr/local/bin; + OTHER_LDFLAGS = ( + "-framework", + Foundation, + "-framework", + AppKit, + ); + PREBINDING = NO; + PRODUCT_NAME = speedtest; + }; + name = "Debug 10.6"; + }; + 3D105A1012DA6024002111E1 /* Release 10.6 */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 3D105A0912DA5F9D002111E1 /* Base+SnowLeopard.xcconfig */; + buildSettings = { + }; + name = "Release 10.6"; + }; + 3D105A1112DA6024002111E1 /* Release 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + FRAMEWORK_VERSION = A; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = DiffMatchPatch_Prefix.pch; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "@executable_path/../Frameworks"; + PRODUCT_NAME = DiffMatchPatch; + WRAPPER_EXTENSION = framework; + }; + name = "Release 10.6"; + }; + 3D105A1212DA6024002111E1 /* Release 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + FRAMEWORK_SEARCH_PATHS = "$(DEVELOPER_LIBRARY_DIR)/Frameworks"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(SYSTEM_LIBRARY_DIR)/Frameworks/Cocoa.framework/Headers/Cocoa.h"; + INFOPLIST_FILE = "Tests/DiffMatchPatchTest-Info.plist"; + INSTALL_PATH = "$(USER_LIBRARY_DIR)/Bundles"; + OTHER_LDFLAGS = ( + "-framework", + Cocoa, + "-framework", + SenTestingKit, + ); + PREBINDING = NO; + PRODUCT_NAME = DiffMatchPatchTest; + WRAPPER_EXTENSION = octest; + ZERO_LINK = NO; + }; + name = "Release 10.6"; + }; + 3D105A1312DA6024002111E1 /* Release 10.6 */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = speedtest_Prefix.pch; + INSTALL_PATH = /usr/local/bin; + OTHER_LDFLAGS = ( + "-framework", + Foundation, + "-framework", + AppKit, + ); + PREBINDING = NO; + PRODUCT_NAME = speedtest; + ZERO_LINK = NO; + }; + name = "Release 10.6"; + }; + 3D5BC0B1128CA8E500B8F5FF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + FRAMEWORK_SEARCH_PATHS = "$(DEVELOPER_LIBRARY_DIR)/Frameworks"; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(SYSTEM_LIBRARY_DIR)/Frameworks/Cocoa.framework/Headers/Cocoa.h"; + INFOPLIST_FILE = "Tests/DiffMatchPatchTest-Info.plist"; + INSTALL_PATH = "$(USER_LIBRARY_DIR)/Bundles"; + OTHER_LDFLAGS = ( + "-framework", + Cocoa, + "-framework", + SenTestingKit, + ); + PREBINDING = NO; + PRODUCT_NAME = DiffMatchPatchTest; + WRAPPER_EXTENSION = octest; + }; + name = Debug; + }; + 3D5BC0B2128CA8E500B8F5FF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + FRAMEWORK_SEARCH_PATHS = "$(DEVELOPER_LIBRARY_DIR)/Frameworks"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_OBJC_EXCEPTIONS = YES; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(SYSTEM_LIBRARY_DIR)/Frameworks/Cocoa.framework/Headers/Cocoa.h"; + INFOPLIST_FILE = "Tests/DiffMatchPatchTest-Info.plist"; + INSTALL_PATH = "$(USER_LIBRARY_DIR)/Bundles"; + OTHER_LDFLAGS = ( + "-framework", + Cocoa, + "-framework", + SenTestingKit, + ); + PREBINDING = NO; + PRODUCT_NAME = DiffMatchPatchTest; + WRAPPER_EXTENSION = octest; + ZERO_LINK = NO; + }; + name = Release; + }; + 3D96F3A412AFC68B00C3E5C0 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = speedtest_Prefix.pch; + INSTALL_PATH = /usr/local/bin; + OTHER_LDFLAGS = ( + "-framework", + Foundation, + "-framework", + AppKit, + ); + PREBINDING = NO; + PRODUCT_NAME = speedtest; + }; + name = Debug; + }; + 3D96F3A512AFC68B00C3E5C0 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_MODEL_TUNING = G5; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = speedtest_Prefix.pch; + INSTALL_PATH = /usr/local/bin; + OTHER_LDFLAGS = ( + "-framework", + Foundation, + "-framework", + AppKit, + ); + PREBINDING = NO; + PRODUCT_NAME = speedtest; + ZERO_LINK = NO; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB91AD08733DA50010E9CD /* Build configuration list for PBXNativeTarget "DiffMatchPatch" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91AE08733DA50010E9CD /* Debug */, + 3D105A0D12DA601C002111E1 /* Debug 10.6 */, + 1DEB91AF08733DA50010E9CD /* Release */, + 3D105A1112DA6024002111E1 /* Release 10.6 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB91B108733DA50010E9CD /* Build configuration list for PBXProject "DiffMatchPatch" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91B208733DA50010E9CD /* Debug */, + 3D105A0C12DA601C002111E1 /* Debug 10.6 */, + 1DEB91B308733DA50010E9CD /* Release */, + 3D105A1012DA6024002111E1 /* Release 10.6 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 3D5BC0B3128CA8E500B8F5FF /* Build configuration list for PBXNativeTarget "DiffMatchPatchTest" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 3D5BC0B1128CA8E500B8F5FF /* Debug */, + 3D105A0E12DA601C002111E1 /* Debug 10.6 */, + 3D5BC0B2128CA8E500B8F5FF /* Release */, + 3D105A1212DA6024002111E1 /* Release 10.6 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 3D96F3B512AFC68E00C3E5C0 /* Build configuration list for PBXNativeTarget "speedtest" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 3D96F3A412AFC68B00C3E5C0 /* Debug */, + 3D105A0F12DA601C002111E1 /* Debug 10.6 */, + 3D96F3A512AFC68B00C3E5C0 /* Release */, + 3D105A1312DA6024002111E1 /* Release 10.6 */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 0867D690FE84028FC02AAC07 /* Project object */; +} diff --git a/objectivec/DiffMatchPatchCFUtilities.c b/objectivec/DiffMatchPatchCFUtilities.c new file mode 100755 index 0000000..d21a4e6 --- /dev/null +++ b/objectivec/DiffMatchPatchCFUtilities.c @@ -0,0 +1,586 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#include + +#include "DiffMatchPatchCFUtilities.h" + +#include "MinMaxMacros.h" +#include +#include +#include + +CFStringRef diff_CFStringCreateSubstring(CFStringRef text, CFIndex start_index, CFIndex length); +CFRange diff_RightSubstringRange(CFIndex text_length, CFIndex new_length); +CFStringRef diff_CFStringCreateRightSubstring(CFStringRef text, CFIndex text_length, CFIndex new_length); +CFRange diff_LeftSubstringRange(CFIndex new_length); +CFStringRef diff_CFStringCreateLeftSubstring(CFStringRef text, CFIndex new_length); +CFStringRef diff_CFStringCreateSubstringWithStartIndex(CFStringRef text, CFIndex start_index); +CFStringRef diff_CFStringCreateJavaSubstring(CFStringRef s, CFIndex begin, CFIndex end); +CFStringRef diff_CFStringCreateByCombiningTwoStrings(CFStringRef best_common_part1, CFStringRef best_common_part2); +Boolean diff_regExMatch(CFStringRef text, const regex_t *re); + +CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i); + +// Utility functions +CFStringRef diff_CFStringCreateFromUnichar(UniChar ch) { + CFStringRef c = CFStringCreateWithCharacters(kCFAllocatorDefault, &ch, 1); + CFMakeCollectable(c); + return c; +} + +CFStringRef diff_CFStringCreateSubstring(CFStringRef text, CFIndex start_index, CFIndex length) { + CFRange substringRange; + substringRange.length = length; + substringRange.location = start_index; + + CFStringRef substring = CFStringCreateWithSubstring(kCFAllocatorDefault, text, substringRange); + CFMakeCollectable(substring); + + return substring; +} + +CFRange diff_RightSubstringRange(CFIndex text_length, CFIndex new_length) { + CFRange substringRange; + substringRange.length = new_length; + substringRange.location = text_length - new_length; + return substringRange; +} + +CFStringRef diff_CFStringCreateRightSubstring(CFStringRef text, CFIndex text_length, CFIndex new_length) { + return diff_CFStringCreateSubstring(text, text_length - new_length, new_length); +} + +CFRange diff_LeftSubstringRange(CFIndex new_length) { + CFRange substringRange; + substringRange.length = new_length; + substringRange.location = 0; + return substringRange; +} + +CFStringRef diff_CFStringCreateLeftSubstring(CFStringRef text, CFIndex new_length) { + return diff_CFStringCreateSubstring(text, 0, new_length); +} + +CFStringRef diff_CFStringCreateSubstringWithStartIndex(CFStringRef text, CFIndex start_index) { + return diff_CFStringCreateSubstring(text, start_index, (CFStringGetLength(text) - start_index)); +} + +CFStringRef diff_CFStringCreateJavaSubstring(CFStringRef s, CFIndex begin, CFIndex end) { + return diff_CFStringCreateSubstring(s, begin, end - begin); +} + +CFStringRef diff_CFStringCreateByCombiningTwoStrings(CFStringRef best_common_part1, CFStringRef best_common_part2) { + CFIndex best_common_length; + CFMutableStringRef best_common_mutable; + best_common_length = CFStringGetLength(best_common_part1) + CFStringGetLength(best_common_part2); + best_common_mutable = CFStringCreateMutableCopy(kCFAllocatorDefault, best_common_length, best_common_part1); + CFMakeCollectable(best_common_mutable); + CFStringAppend(best_common_mutable, best_common_part2); + return best_common_mutable; +} + +Boolean diff_regExMatch(CFStringRef text, const regex_t *re) { + //TODO(jan): Using regex.h is far from optimal. Find an alternative. + Boolean isMatch; + const char *bytes; + char *localBuffer = NULL; + char *textCString = NULL; + // We are only interested in line endings anyway so ASCII is fine. + CFStringEncoding encoding = kCFStringEncodingASCII; + + bytes = CFStringGetCStringPtr(text, encoding); + + if (bytes == NULL) { + Boolean success; + CFIndex length; + CFIndex usedBufferLength; + CFIndex textLength = CFStringGetLength(text); + CFRange rangeToProcess = CFRangeMake(0, textLength); + + success = (CFStringGetBytes(text, rangeToProcess, encoding, '?', false, NULL, LONG_MAX, &usedBufferLength) > 0); + if (success) { + length = usedBufferLength + 1; + + localBuffer = calloc(length, sizeof(char)); + success = (CFStringGetBytes(text, rangeToProcess, encoding, '?', false, (UInt8 *)localBuffer, length, NULL) > 0); + + if (success) { + textCString = localBuffer; + } + } + } else { + textCString = (char *)bytes; + } + + if (textCString != NULL) { + isMatch = (regexec(re, textCString, 0, NULL, 0) == 0); + } else { + isMatch = false; + //assert(0); + } + + if (localBuffer != NULL) { + free(localBuffer); + } + + return isMatch; +} + + +/** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ +CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + CFIndex text1_length = CFStringGetLength(text1); + CFIndex text2_length = CFStringGetLength(text2); + + CFStringInlineBuffer text1_inlineBuffer, text2_inlineBuffer; + CFStringInitInlineBuffer(text1, &text1_inlineBuffer, CFRangeMake(0, text1_length)); + CFStringInitInlineBuffer(text2, &text2_inlineBuffer, CFRangeMake(0, text2_length)); + + UniChar char1, char2; + CFIndex n = MIN(text1_length, text2_length); + + for (CFIndex i = 0; i < n; i++) { + char1 = CFStringGetCharacterFromInlineBuffer(&text1_inlineBuffer, i); + char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, i); + + if (char1 != char2) { + return i; + } + } + + return n; +} + +/** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ +CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + CFIndex text1_length = CFStringGetLength(text1); + CFIndex text2_length = CFStringGetLength(text2); + + CFStringInlineBuffer text1_inlineBuffer, text2_inlineBuffer; + CFStringInitInlineBuffer(text1, &text1_inlineBuffer, CFRangeMake(0, text1_length)); + CFStringInitInlineBuffer(text2, &text2_inlineBuffer, CFRangeMake(0, text2_length)); + + UniChar char1, char2; + CFIndex n = MIN(text1_length, text2_length); + + for (CFIndex i = 1; i <= n; i++) { + char1 = CFStringGetCharacterFromInlineBuffer(&text1_inlineBuffer, (text1_length - i)); + char2 = CFStringGetCharacterFromInlineBuffer(&text2_inlineBuffer, (text2_length - i)); + + if (char1 != char2) { + return i - 1; + } + } + return n; +} + +/** + * Determine if the suffix of one CFStringRef is the prefix of another. + * @param text1 First CFStringRef. + * @param text2 Second CFStringRef. + * @return The number of characters common to the end of the first + * CFStringRef and the start of the second CFStringRef. + */ +CFIndex diff_commonOverlap(CFStringRef text1, CFStringRef text2) { + CFIndex common_overlap = 0; + + // Cache the text lengths to prevent multiple calls. + CFIndex text1_length = CFStringGetLength(text1); + CFIndex text2_length = CFStringGetLength(text2); + + // Eliminate the nil case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + + // Truncate the longer CFStringRef. + CFStringRef text1_trunc; + CFStringRef text2_trunc; + CFIndex text1_trunc_length; + if (text1_length > text2_length) { + text1_trunc_length = text2_length; + text1_trunc = diff_CFStringCreateRightSubstring(text1, text1_length, text1_trunc_length); + + text2_trunc = CFRetain(text2); + } else if (text1_length < text2_length) { + text1_trunc_length = text1_length; + text1_trunc = CFRetain(text1); + + CFIndex text2_trunc_length = text1_length; + text2_trunc = diff_CFStringCreateLeftSubstring(text2, text2_trunc_length); + } else { + text1_trunc_length = text1_length; + text1_trunc = CFRetain(text1); + + text2_trunc = CFRetain(text2); + } + + CFIndex text_length = MIN(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + common_overlap = text_length; + } else { + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + CFIndex best = 0; + CFIndex length = 1; + while (true) { + CFStringRef pattern = diff_CFStringCreateRightSubstring(text1_trunc, text1_trunc_length, length); + CFRange foundRange = CFStringFind(text2_trunc, pattern, 0); + CFRelease(pattern); + + CFIndex found = foundRange.location; + if (found == kCFNotFound) { + common_overlap = best; + break; + } + length += found; + + CFStringRef text1_sub = diff_CFStringCreateRightSubstring(text1_trunc, text1_trunc_length, length); + CFStringRef text2_sub = diff_CFStringCreateLeftSubstring(text2_trunc, length); + + if (found == 0 || (CFStringCompare(text1_sub, text2_sub, 0) == kCFCompareEqualTo)) { + best = length; + length++; + } + + CFRelease(text1_sub); + CFRelease(text2_sub); + } + } + + CFRelease(text1_trunc); + CFRelease(text2_trunc); + return common_overlap; +} + +/** + * Do the two texts share a Substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First CFStringRef. + * @param text2 Second CFStringRef. + * @param diffTimeout Time limit for diff. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or NULL if there was no match. + */ +CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const float diffTimeout) { + if (diffTimeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return NULL; + } + CFStringRef longtext = CFStringGetLength(text1) > CFStringGetLength(text2) ? text1 : text2; + CFStringRef shorttext = CFStringGetLength(text1) > CFStringGetLength(text2) ? text2 : text1; + if (CFStringGetLength(longtext) < 4 || CFStringGetLength(shorttext) * 2 < CFStringGetLength(longtext)) { + return NULL; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + CFArrayRef hm1 = diff_halfMatchICreate(longtext, shorttext, + (CFStringGetLength(longtext) + 3) / 4); + // Check again based on the third quarter. + CFArrayRef hm2 = diff_halfMatchICreate(longtext, shorttext, + (CFStringGetLength(longtext) + 1) / 2); + CFArrayRef hm; + if (hm1 == NULL && hm2 == NULL) { + return NULL; + } else if (hm2 == NULL) { + hm = CFRetain(hm1); + } else if (hm1 == NULL) { + hm = CFRetain(hm2); + } else { + // Both matched. Select the longest. + hm = CFStringGetLength(CFArrayGetValueAtIndex(hm1, 4)) > CFStringGetLength(CFArrayGetValueAtIndex(hm2, 4)) ? CFRetain(hm1) : CFRetain(hm2); + } + + if (hm1 != NULL) { + CFRelease(hm1); + } + if (hm2 != NULL) { + CFRelease(hm2); + } + + // A half-match was found, sort out the return data. + if (CFStringGetLength(text1) > CFStringGetLength(text2)) { + return hm; + //return new CFStringRef[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; + } else { + // { hm[0], hm[1], hm[2], hm[3], hm[4] } + // => { hm[2], hm[3], hm[0], hm[1], hm[4] } + + CFMutableArrayRef hm_mutable = CFArrayCreateMutableCopy(kCFAllocatorDefault, CFArrayGetCount(hm), hm); + CFMakeCollectable(hm_mutable); + + CFRelease(hm); + + CFArrayExchangeValuesAtIndices(hm_mutable, 0, 2); + CFArrayExchangeValuesAtIndices(hm_mutable, 1, 3); + return hm_mutable; + } +} + +/** + * Does a Substring of shorttext exist within longtext such that the + * Substring is at least half the length of longtext? + * @param longtext Longer CFStringRef. + * @param shorttext Shorter CFStringRef. + * @param i Start index of quarter length Substring within longtext. + * @return Five element CFStringRef array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or NULL if there was no match. + */ +CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i) { + // Start with a 1/4 length Substring at position i as a seed. + CFStringRef seed = diff_CFStringCreateSubstring(longtext, i, CFStringGetLength(longtext) / 4); + CFIndex j = -1; + CFStringRef best_common = CFSTR(""); + CFStringRef best_longtext_a = CFSTR(""), best_longtext_b = CFSTR(""); + CFStringRef best_shorttext_a = CFSTR(""), best_shorttext_b = CFSTR(""); + + CFStringRef best_common_part1, best_common_part2; + + CFStringRef longtext_substring, shorttext_substring; + CFIndex shorttext_length = CFStringGetLength(shorttext); + CFRange resultRange; + CFRange rangeToSearch; + rangeToSearch.length = shorttext_length - (j + 1); + rangeToSearch.location = j + 1; + + while (j < CFStringGetLength(shorttext) + && (CFStringFindWithOptions(shorttext, seed, rangeToSearch, 0, &resultRange) == true)) { + j = resultRange.location; + rangeToSearch.length = shorttext_length - (j + 1); + rangeToSearch.location = j + 1; + + longtext_substring = diff_CFStringCreateSubstringWithStartIndex(longtext, i); + shorttext_substring = diff_CFStringCreateSubstringWithStartIndex(shorttext, j); + + CFIndex prefixLength = diff_commonPrefix(longtext_substring, shorttext_substring); + + CFRelease(longtext_substring); + CFRelease(shorttext_substring); + + longtext_substring = diff_CFStringCreateLeftSubstring(longtext, i); + shorttext_substring = diff_CFStringCreateLeftSubstring(shorttext, j); + + CFIndex suffixLength = diff_commonSuffix(longtext_substring, shorttext_substring); + + CFRelease(longtext_substring); + CFRelease(shorttext_substring); + + if (CFStringGetLength(best_common) < suffixLength + prefixLength) { + CFRelease(best_common); + CFRelease(best_longtext_a); + CFRelease(best_longtext_b); + CFRelease(best_shorttext_a); + CFRelease(best_shorttext_b); + + best_common_part1 = diff_CFStringCreateSubstring(shorttext, j - suffixLength, suffixLength); + best_common_part2 = diff_CFStringCreateSubstring(shorttext, j, prefixLength); + + best_common = diff_CFStringCreateByCombiningTwoStrings(best_common_part1, best_common_part2); + + CFRelease(best_common_part1); + CFRelease(best_common_part2); + + best_longtext_a = diff_CFStringCreateLeftSubstring(longtext, i - suffixLength); + best_longtext_b = diff_CFStringCreateSubstringWithStartIndex(longtext, i + prefixLength); + best_shorttext_a = diff_CFStringCreateLeftSubstring(shorttext, j - suffixLength); + best_shorttext_b = diff_CFStringCreateSubstringWithStartIndex(shorttext, j + prefixLength); + } + } + + CFRelease(seed); + + CFArrayRef halfMatchIArray; + if (CFStringGetLength(best_common) * 2 >= CFStringGetLength(longtext)) { + const CFStringRef values[] = { best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common }; + halfMatchIArray = CFArrayCreate(kCFAllocatorDefault, (const void **)values, (sizeof(values) / sizeof(values[0])), &kCFTypeArrayCallBacks); + CFMakeCollectable(halfMatchIArray); + } else { + halfMatchIArray = NULL; + } + + CFRelease(best_common); + CFRelease(best_longtext_a); + CFRelease(best_longtext_b); + CFRelease(best_shorttext_a); + CFRelease(best_shorttext_b); + + return halfMatchIArray; +} + +/** + * Split a text into a list of strings. Reduce the texts to a CFStringRef of + * hashes where each Unicode character represents one line. + * @param text CFString to encode. + * @param lineArray CFMutableArray of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded CFStringRef. + */ +CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash) { + #define lineStart lineStartRange.location + #define lineEnd lineEndRange.location + + CFRange lineStartRange; + CFRange lineEndRange; + lineStart = 0; + lineEnd = -1; + CFStringRef line; + CFMutableStringRef chars = CFStringCreateMutable(kCFAllocatorDefault, 0); + + CFIndex textLength = CFStringGetLength(text); + CFIndex hash; + CFNumberRef hashNumber; + + // Walk the text, pulling out a Substring for each line. + // CFStringCreateArrayBySeparatingStrings(kCFAllocatorDefault, text, CFSTR("\n")) would temporarily double our memory footprint. + // Modifying text would create many large strings. + while (lineEnd < textLength - 1) { + lineStartRange.length = textLength - lineStart; + + if (CFStringFindWithOptions(text, CFSTR("\n"), lineStartRange, 0, &lineEndRange) == false) { + lineEnd = textLength - 1; + } /* else { + lineEnd = lineEndRange.location; + }*/ + + line = diff_CFStringCreateJavaSubstring(text, lineStart, lineEnd + 1); + lineStart = lineEnd + 1; + + if (CFDictionaryContainsKey(lineHash, line)) { + CFDictionaryGetValueIfPresent(lineHash, line, (const void **)&hashNumber); + CFNumberGetValue(hashNumber, kCFNumberCFIndexType, &hash); + const UniChar hashChar = (UniChar)hash; + CFStringAppendCharacters(chars, &hashChar, 1); + } else { + CFArrayAppendValue(lineArray, line); + hash = CFArrayGetCount(lineArray) - 1; + hashNumber = CFNumberCreate(kCFAllocatorDefault, kCFNumberCFIndexType, &hash); + CFMakeCollectable(hashNumber); + CFDictionaryAddValue(lineHash, line, hashNumber); + CFRelease(hashNumber); + const UniChar hashChar = (UniChar)hash; + CFStringAppendCharacters(chars, &hashChar, 1); + } + + CFRelease(line); + } + return chars; + + #undef lineStart + #undef lineEnd +} + +/** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First CFStringRef. + * @param two Second CFStringRef. + * @return The score. + */ +CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two) { + static Boolean firstRun = true; + static CFCharacterSetRef alphaNumericSet = NULL; + static CFCharacterSetRef whiteSpaceSet = NULL; + static CFCharacterSetRef controlSet = NULL; + static regex_t blankLineEndRegEx; + static regex_t blankLineStartRegEx; + + if (firstRun) { + // Define some regex patterns for matching boundaries. + alphaNumericSet = CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric); + whiteSpaceSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline); + controlSet = CFCharacterSetGetPredefined(kCFCharacterSetControl); + int status; + status = regcomp(&blankLineEndRegEx, "\n\r?\n$", REG_EXTENDED | REG_NOSUB); + assert(status == 0); + status = regcomp(&blankLineStartRegEx, "^\r?\n\r?\n", REG_EXTENDED | REG_NOSUB); + assert(status == 0); + firstRun = false; + } + + if (CFStringGetLength(one) == 0 || CFStringGetLength(two) == 0) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + UniChar char1 = + CFStringGetCharacterAtIndex(one, (CFStringGetLength(one) - 1)); + UniChar char2 = + CFStringGetCharacterAtIndex(two, 0); + Boolean nonAlphaNumeric1 = + !CFCharacterSetIsCharacterMember(alphaNumericSet, char1); + Boolean nonAlphaNumeric2 = + !CFCharacterSetIsCharacterMember(alphaNumericSet, char2); + Boolean whitespace1 = + nonAlphaNumeric1 && CFCharacterSetIsCharacterMember(whiteSpaceSet, char1); + Boolean whitespace2 = + nonAlphaNumeric2 && CFCharacterSetIsCharacterMember(whiteSpaceSet, char2); + Boolean lineBreak1 = + whitespace1 && CFCharacterSetIsCharacterMember(controlSet, char1); + Boolean lineBreak2 = + whitespace2 && CFCharacterSetIsCharacterMember(controlSet, char2); + Boolean blankLine1 = + lineBreak1 && diff_regExMatch(one, &blankLineEndRegEx); + Boolean blankLine2 = + lineBreak2 && diff_regExMatch(two, &blankLineStartRegEx); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; +} diff --git a/objectivec/DiffMatchPatchCFUtilities.h b/objectivec/DiffMatchPatchCFUtilities.h new file mode 100755 index 0000000..af75506 --- /dev/null +++ b/objectivec/DiffMatchPatchCFUtilities.h @@ -0,0 +1,48 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#ifndef _DIFFMATCHPATCHCFUTILITIES_H +#define _DIFFMATCHPATCHCFUTILITIES_H + +CFStringRef diff_CFStringCreateFromUnichar(UniChar ch); +CFStringRef diff_CFStringCreateJavaSubstring(CFStringRef s, CFIndex begin, CFIndex end); + +CFIndex diff_commonPrefix(CFStringRef text1, CFStringRef text2); +CFIndex diff_commonSuffix(CFStringRef text1, CFStringRef text2); +CFIndex diff_commonOverlap(CFStringRef text1, CFStringRef text2); +CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const float diffTimeout); +CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i); + +CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash); + +CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two); + +CF_INLINE void diff_CFStringPrepareUniCharBuffer(CFStringRef string, const UniChar **string_chars, UniChar **string_buffer, CFRange string_range) { + *string_chars = CFStringGetCharactersPtr(string); + if (*string_chars == NULL) { + // Fallback in case CFStringGetCharactersPtr() didn’t work. + *string_buffer = malloc(string_range.length * sizeof(UniChar)); + CFStringGetCharacters(string, string_range, *string_buffer); + *string_chars = *string_buffer; + } +} + +#endif //ifndef _DIFFMATCHPATCHCFUTILITIES_H diff --git a/objectivec/DiffMatchPatch_Prefix.pch b/objectivec/DiffMatchPatch_Prefix.pch new file mode 100755 index 0000000..2077572 --- /dev/null +++ b/objectivec/DiffMatchPatch_Prefix.pch @@ -0,0 +1,7 @@ +// +// Prefix header for all source files of the 'DiffMatchPatch' target in the 'DiffMatchPatch' project. +// + +#ifdef __OBJC__ + #import +#endif diff --git a/objectivec/English.lproj/InfoPlist.strings b/objectivec/English.lproj/InfoPlist.strings new file mode 100755 index 0000000..88f65cf --- /dev/null +++ b/objectivec/English.lproj/InfoPlist.strings @@ -0,0 +1,2 @@ +/* Localized versions of Info.plist keys */ + diff --git a/objectivec/Info.plist b/objectivec/Info.plist new file mode 100755 index 0000000..5d7c307 --- /dev/null +++ b/objectivec/Info.plist @@ -0,0 +1,28 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIconFile + + CFBundleIdentifier + de.geheimwerk.${PRODUCT_NAME:rfc1034Identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + ${PRODUCT_NAME} + CFBundlePackageType + FMWK + CFBundleShortVersionString + ${MARKETING_VERSION} + CFBundleSignature + ???? + CFBundleVersion + ${PROJECT_VERSION} + NSPrincipalClass + + + diff --git a/objectivec/MinMaxMacros.h b/objectivec/MinMaxMacros.h new file mode 100755 index 0000000..2765e0f --- /dev/null +++ b/objectivec/MinMaxMacros.h @@ -0,0 +1,40 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#if !defined(MIN) + #define MIN(A,B) \ + ({__typeof__(A) a = (A); \ + __typeof__(B) b = (B); \ + (a < b) ? a : b; }) +#endif + +#if !defined(MAX) + #define MAX(A,B) \ + ({__typeof__(A) a = (A); \ + __typeof__(B) b = (B); \ + (a > b) ? a : b; }) +#endif + +#if !defined(ABS) + #define ABS(A) \ + ({__typeof__(A) a = (A); \ + (a > 0) ? a : -a; }) +#endif diff --git a/objectivec/NSMutableDictionary+DMPExtensions.h b/objectivec/NSMutableDictionary+DMPExtensions.h new file mode 100755 index 0000000..6230424 --- /dev/null +++ b/objectivec/NSMutableDictionary+DMPExtensions.h @@ -0,0 +1,46 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + + +@interface NSMutableDictionary (DMPExtensions) + +- (id)diff_objectForIntegerKey:(NSInteger)keyInteger; +- (id)diff_objectForUnsignedIntegerKey:(NSUInteger)keyUInteger; +- (id)diff_objectForUnicharKey:(unichar)aUnicharKey; + +- (NSInteger)diff_integerForKey:(id)aKey; +- (NSUInteger)diff_unsignedIntegerForKey:(id)aKey; +- (NSInteger)diff_integerForIntegerKey:(NSInteger)keyInteger; +- (NSUInteger)diff_unsignedIntegerForUnicharKey:(unichar)aUnicharKey; + +- (BOOL)diff_containsObjectForKey:(id)aKey; +- (BOOL)diff_containsObjectForUnicharKey:(unichar)aUnicharKey; + +- (void)diff_setIntegerValue:(NSInteger)anInteger forKey:(id)aKey; +- (void)diff_setIntegerValue:(NSInteger)anInteger forIntegerKey:(NSInteger)keyInteger; + +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forKey:(id)aKey; +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forUnsignedIntegerKey:(NSUInteger)keyUInteger; +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forUnicharKey:(unichar)aUnicharKey; + +@end diff --git a/objectivec/NSMutableDictionary+DMPExtensions.m b/objectivec/NSMutableDictionary+DMPExtensions.m new file mode 100755 index 0000000..dc092be --- /dev/null +++ b/objectivec/NSMutableDictionary+DMPExtensions.m @@ -0,0 +1,108 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "NSMutableDictionary+DMPExtensions.h" + +#import "NSString+UnicharUtilities.h" + + +@implementation NSMutableDictionary (DMPExtensions) + +- (id)diff_objectForIntegerKey:(NSInteger)keyInteger; +{ + return [self objectForKey:[NSNumber numberWithInteger:keyInteger]]; +} + +- (id)diff_objectForUnsignedIntegerKey:(NSUInteger)keyUInteger; +{ + return [self objectForKey:[NSNumber numberWithUnsignedInteger:keyUInteger]]; +} + +- (id)diff_objectForUnicharKey:(unichar)aUnicharKey; +{ + return [self objectForKey:[NSString diff_stringFromUnichar:aUnicharKey]]; +} + + +- (NSInteger)diff_integerForKey:(id)aKey; +{ + return [((NSNumber *)[self objectForKey:aKey]) integerValue]; +} + +- (NSUInteger)diff_unsignedIntegerForKey:(id)aKey; +{ + return [((NSNumber *)[self objectForKey:aKey]) unsignedIntegerValue]; +} + +- (NSInteger)diff_integerForIntegerKey:(NSInteger)keyInteger; +{ + return [((NSNumber *)[self objectForKey:[NSNumber numberWithInteger:keyInteger]]) integerValue]; +} + +- (NSUInteger)diff_unsignedIntegerForUnicharKey:(unichar)aUnicharKey; +{ + return [((NSNumber *)[self diff_objectForUnicharKey:aUnicharKey]) unsignedIntegerValue]; +} + + +- (BOOL)diff_containsObjectForKey:(id)aKey; +{ + return ([self objectForKey:aKey] != nil); +} + +- (BOOL)containsObjectForIntegerKey:(NSInteger)keyInteger; +{ + return ([self objectForKey:[NSNumber numberWithInteger:keyInteger]] != nil); +} + +- (BOOL)diff_containsObjectForUnicharKey:(unichar)aUnicharKey; +{ + return ([self diff_objectForUnicharKey:aUnicharKey] != nil); +} + + +- (void)diff_setIntegerValue:(NSInteger)anInteger forKey:(id)aKey; +{ + [self setObject:[NSNumber numberWithInteger:anInteger] forKey:aKey]; +} + +- (void)diff_setIntegerValue:(NSInteger)anInteger forIntegerKey:(NSInteger)keyInteger; +{ + [self setObject:[NSNumber numberWithInteger:anInteger] forKey:[NSNumber numberWithInteger:keyInteger]]; +} + + +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forKey:(id)aKey; +{ + [self setObject:[NSNumber numberWithUnsignedInteger:anUInteger] forKey:aKey]; +} + +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forUnsignedIntegerKey:(NSUInteger)keyUInteger; +{ + [self setObject:[NSNumber numberWithUnsignedInteger:anUInteger] forKey:[NSNumber numberWithUnsignedInteger:keyUInteger]]; +} + +- (void)diff_setUnsignedIntegerValue:(NSUInteger)anUInteger forUnicharKey:(unichar)aUnicharKey; +{ + [self setObject:[NSNumber numberWithUnsignedInteger:anUInteger] forKey:[NSString diff_stringFromUnichar:aUnicharKey]]; +} + +@end diff --git a/objectivec/NSString+JavaSubstring.h b/objectivec/NSString+JavaSubstring.h new file mode 100755 index 0000000..3f6485c --- /dev/null +++ b/objectivec/NSString+JavaSubstring.h @@ -0,0 +1,29 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + + +@interface NSString (JavaSubstring) + +- (NSString *)diff_javaSubstringFromStart:(NSUInteger)start toEnd:(NSUInteger)end; + +@end diff --git a/objectivec/NSString+JavaSubstring.m b/objectivec/NSString+JavaSubstring.m new file mode 100755 index 0000000..8c17dbd --- /dev/null +++ b/objectivec/NSString+JavaSubstring.m @@ -0,0 +1,35 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "NSString+JavaSubstring.h" + +#import "DiffMatchPatchCFUtilities.h" + +@implementation NSString (JavaSubstring) + +- (NSString *)diff_javaSubstringFromStart:(NSUInteger)start toEnd:(NSUInteger)end; +{ + CFStringRef c = diff_CFStringCreateJavaSubstring((CFStringRef)self, (CFIndex)start, (CFIndex)end); + CFMakeCollectable(c); + return [(NSString *)c autorelease]; +} + +@end diff --git a/objectivec/NSString+UnicharUtilities.h b/objectivec/NSString+UnicharUtilities.h new file mode 100755 index 0000000..867196e --- /dev/null +++ b/objectivec/NSString+UnicharUtilities.h @@ -0,0 +1,30 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + + +@interface NSString (UnicharUtilities) + ++ (NSString *)diff_stringFromUnichar:(unichar)ch; +- (NSString *)diff_substringWithCharacterAtIndex:(NSUInteger)anIndex; + +@end diff --git a/objectivec/NSString+UnicharUtilities.m b/objectivec/NSString+UnicharUtilities.m new file mode 100755 index 0000000..a45d366 --- /dev/null +++ b/objectivec/NSString+UnicharUtilities.m @@ -0,0 +1,39 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "NSString+UnicharUtilities.h" + + +@implementation NSString (UnicharUtilities) + ++ (NSString *)diff_stringFromUnichar:(unichar)ch; +{ + CFStringRef c = CFStringCreateWithCharacters(kCFAllocatorDefault, &ch, 1); + CFMakeCollectable(c); + return [(NSString *)c autorelease]; +} + +- (NSString *)diff_substringWithCharacterAtIndex:(NSUInteger)anIndex; +{ + return [self substringWithRange:NSMakeRange(anIndex, 1)]; +} + +@end diff --git a/objectivec/NSString+UriCompatibility.h b/objectivec/NSString+UriCompatibility.h new file mode 100755 index 0000000..75c7873 --- /dev/null +++ b/objectivec/NSString+UriCompatibility.h @@ -0,0 +1,30 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + + +@interface NSString (UriCompatibility) + +- (NSString *)diff_stringByAddingPercentEscapesForEncodeUriCompatibility; +- (NSString *)diff_stringByReplacingPercentEscapesForEncodeUriCompatibility; + +@end diff --git a/objectivec/NSString+UriCompatibility.m b/objectivec/NSString+UriCompatibility.m new file mode 100755 index 0000000..44d4a78 --- /dev/null +++ b/objectivec/NSString+UriCompatibility.m @@ -0,0 +1,62 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "NSString+UriCompatibility.h" + + +@implementation NSString (UriCompatibility) + +/** + * Escape excluding selected chars for compatability with JavaScript's encodeURI. + * This method produces uppercase hex. + * + * @param str The CFStringRef to escape. + * @return The escaped CFStringRef. + */ +- (NSString *)diff_stringByAddingPercentEscapesForEncodeUriCompatibility; +{ + CFStringRef urlString = CFURLCreateStringByAddingPercentEscapes(NULL, + (CFStringRef)self, + CFSTR(" !~*'();/?:@&=+$,#"), + NULL, + kCFStringEncodingUTF8); + CFMakeCollectable(urlString); + return [(NSString *)urlString autorelease]; +} + +/** + * Unescape all percent escapes. + * + * Example: "%3f" -> "?", "%24" -> "$", etc. + * + * @return The unescaped NSString. + */ +- (NSString *)diff_stringByReplacingPercentEscapesForEncodeUriCompatibility; +{ + CFStringRef decodedString = CFURLCreateStringByReplacingPercentEscapesUsingEncoding(NULL, + (CFStringRef)self, + CFSTR(""), + kCFStringEncodingUTF8); + CFMakeCollectable(decodedString); + return [(NSString *)decodedString autorelease]; +} + +@end diff --git a/objectivec/Speedtest1.txt b/objectivec/Speedtest1.txt new file mode 100644 index 0000000..54b438f --- /dev/null +++ b/objectivec/Speedtest1.txt @@ -0,0 +1,230 @@ +This is a '''list of newspapers published by [[Journal Register Company]]'''. + +The company owns daily and weekly newspapers, other print media properties and newspaper-affiliated local Websites in the [[U.S.]] states of [[Connecticut]], [[Michigan]], [[New York]], [[Ohio]] and [[Pennsylvania]], organized in six geographic "clusters":[http://www.journalregister.com/newspapers.html Journal Register Company: Our Newspapers], accessed February 10, 2008. + +== Capital-Saratoga == +Three dailies, associated weeklies and [[pennysaver]]s in greater [[Albany, New York]]; also [http://www.capitalcentral.com capitalcentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. + +* ''The Oneida Daily Dispatch'' {{WS|oneidadispatch.com}} of [[Oneida, New York]] +* ''[[The Record (Troy)|The Record]]'' {{WS|troyrecord.com}} of [[Troy, New York]] +* ''[[The Saratogian]]'' {{WS|saratogian.com}} of [[Saratoga Springs, New York]] +* Weeklies: +** ''Community News'' {{WS|cnweekly.com}} weekly of [[Clifton Park, New York]] +** ''Rome Observer'' of [[Rome, New York]] +** ''Life & Times of Utica'' of [[Utica, New York]] + +== Connecticut == +Five dailies, associated weeklies and [[pennysaver]]s in the state of [[Connecticut]]; also [http://www.ctcentral.com CTcentral.com], [http://www.ctcarsandtrucks.com CTCarsAndTrucks.com] and [http://www.jobsinct.com JobsInCT.com]. + +* ''The Middletown Press'' {{WS|middletownpress.com}} of [[Middletown, Connecticut|Middletown]] +* ''[[New Haven Register]]'' {{WS|newhavenregister.com}} of [[New Haven, Connecticut|New Haven]] +* ''The Register Citizen'' {{WS|registercitizen.com}} of [[Torrington, Connecticut|Torrington]] + +* [[New Haven Register#Competitors|Elm City Newspapers]] {{WS|ctcentral.com}} +** ''The Advertiser'' of [[East Haven, Connecticut|East Haven]] +** ''Hamden Chronicle'' of [[Hamden, Connecticut|Hamden]] +** ''Milford Weekly'' of [[Milford, Connecticut|Milford]] +** ''The Orange Bulletin'' of [[Orange, Connecticut|Orange]] +** ''The Post'' of [[North Haven, Connecticut|North Haven]] +** ''Shelton Weekly'' of [[Shelton, Connecticut|Shelton]] +** ''The Stratford Bard'' of [[Stratford, Connecticut|Stratford]] +** ''Wallingford Voice'' of [[Wallingford, Connecticut|Wallingford]] +** ''West Haven News'' of [[West Haven, Connecticut|West Haven]] +* Housatonic Publications +** ''The New Milford Times'' {{WS|newmilfordtimes.com}} of [[New Milford, Connecticut|New Milford]] +** ''The Brookfield Journal'' of [[Brookfield, Connecticut|Brookfield]] +** ''The Kent Good Times Dispatch'' of [[Kent, Connecticut|Kent]] +** ''The Bethel Beacon'' of [[Bethel, Connecticut|Bethel]] +** ''The Litchfield Enquirer'' of [[Litchfield, Connecticut|Litchfield]] +** ''Litchfield County Times'' of [[Litchfield, Connecticut|Litchfield]] +* Imprint Newspapers {{WS|imprintnewspapers.com}} +** ''West Hartford News'' of [[West Hartford, Connecticut|West Hartford]] +** ''Windsor Journal'' of [[Windsor, Connecticut|Windsor]] +** ''Windsor Locks Journal'' of [[Windsor Locks, Connecticut|Windsor Locks]] +** ''Avon Post'' of [[Avon, Connecticut|Avon]] +** ''Farmington Post'' of [[Farmington, Connecticut|Farmington]] +** ''Simsbury Post'' of [[Simsbury, Connecticut|Simsbury]] +** ''Tri-Town Post'' of [[Burlington, Connecticut|Burlington]], [[Canton, Connecticut|Canton]] and [[Harwinton, Connecticut|Harwinton]] +* Minuteman Publications +** ''[[Fairfield Minuteman]]'' of [[Fairfield, Connecticut|Fairfield]] +** ''The Westport Minuteman'' {{WS|westportminuteman.com}} of [[Westport, Connecticut|Westport]] +* Shoreline Newspapers weeklies: +** ''Branford Review'' of [[Branford, Connecticut|Branford]] +** ''Clinton Recorder'' of [[Clinton, Connecticut|Clinton]] +** ''The Dolphin'' of [[Naval Submarine Base New London]] in [[New London, Connecticut|New London]] +** ''Main Street News'' {{WS|ctmainstreetnews.com}} of [[Essex, Connecticut|Essex]] +** ''Pictorial Gazette'' of [[Old Saybrook, Connecticut|Old Saybrook]] +** ''Regional Express'' of [[Colchester, Connecticut|Colchester]] +** ''Regional Standard'' of [[Colchester, Connecticut|Colchester]] +** ''Shoreline Times'' {{WS|shorelinetimes.com}} of [[Guilford, Connecticut|Guilford]] +** ''Shore View East'' of [[Madison, Connecticut|Madison]] +** ''Shore View West'' of [[Guilford, Connecticut|Guilford]] +* Other weeklies: +** ''Registro'' {{WS|registroct.com}} of [[New Haven, Connecticut|New Haven]] +** ''Thomaston Express'' {{WS|thomastownexpress.com}} of [[Thomaston, Connecticut|Thomaston]] +** ''Foothills Traders'' {{WS|foothillstrader.com}} of Torrington, Bristol, Canton + +== Michigan == +Four dailies, associated weeklies and [[pennysaver]]s in the state of [[Michigan]]; also [http://www.micentralhomes.com MIcentralhomes.com] and [http://www.micentralautos.com MIcentralautos.com] +* ''[[Oakland Press]]'' {{WS|theoaklandpress.com}} of [[Oakland, Michigan|Oakland]] +* ''Daily Tribune'' {{WS|dailytribune.com}} of [[Royal Oak, Michigan|Royal Oak]] +* ''Macomb Daily'' {{WS|macombdaily.com}} of [[Mt. Clemens, Michigan|Mt. Clemens]] +* ''[[Morning Sun]]'' {{WS|themorningsun.com}} of [[Mount Pleasant, Michigan|Mount Pleasant]] +* Heritage Newspapers {{WS|heritage.com}} +** ''Belleville View'' +** ''Ile Camera'' +** ''Monroe Guardian'' +** ''Ypsilanti Courier'' +** ''News-Herald'' +** ''Press & Guide'' +** ''Chelsea Standard & Dexter Leader'' +** ''Manchester Enterprise'' +** ''Milan News-Leader'' +** ''Saline Reporter'' +* Independent Newspapers {{WS|sourcenewspapers.com}} +** ''Advisor'' +** ''Source'' +* Morning Star {{WS|morningstarpublishing.com}} +** ''Alma Reminder'' +** ''Alpena Star'' +** ''Antrim County News'' +** ''Carson City Reminder'' +** ''The Leader & Kalkaskian'' +** ''Ogemaw/Oscoda County Star'' +** ''Petoskey/Charlevoix Star'' +** ''Presque Isle Star'' +** ''Preview Community Weekly'' +** ''Roscommon County Star'' +** ''St. Johns Reminder'' +** ''Straits Area Star'' +** ''The (Edmore) Advertiser'' +* Voice Newspapers {{WS|voicenews.com}} +** ''Armada Times'' +** ''Bay Voice'' +** ''Blue Water Voice'' +** ''Downriver Voice'' +** ''Macomb Township Voice'' +** ''North Macomb Voice'' +** ''Weekend Voice'' +** ''Suburban Lifestyles'' {{WS|suburbanlifestyles.com}} + +== Mid-Hudson == +One daily, associated magazines in the [[Hudson River Valley]] of [[New York]]; also [http://www.midhudsoncentral.com MidHudsonCentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. + +* ''[[Daily Freeman]]'' {{WS|dailyfreeman.com}} of [[Kingston, New York]] + +== Ohio == +Two dailies, associated magazines and three shared Websites, all in the state of [[Ohio]]: [http://www.allaroundcleveland.com AllAroundCleveland.com], [http://www.allaroundclevelandcars.com AllAroundClevelandCars.com] and [http://www.allaroundclevelandjobs.com AllAroundClevelandJobs.com]. + +* ''[[The News-Herald (Ohio)|The News-Herald]]'' {{WS|news-herald.com}} of [[Willoughby, Ohio|Willoughby]] +* ''[[The Morning Journal]]'' {{WS|morningjournal.com}} of [[Lorain, Ohio|Lorain]] + +== Philadelphia area == +Seven dailies and associated weeklies and magazines in [[Pennsylvania]] and [[New Jersey]], and associated Websites: [http://www.allaroundphilly.com AllAroundPhilly.com], [http://www.jobsinnj.com JobsInNJ.com], [http://www.jobsinpa.com JobsInPA.com], and [http://www.phillycarsearch.com PhillyCarSearch.com]. + +* ''The Daily Local'' {{WS|dailylocal.com}} of [[West Chester, Pennsylvania|West Chester]] +* ''[[Delaware County Daily and Sunday Times]] {{WS|delcotimes.com}} of Primos +* ''[[The Mercury (Pennsylvania)|The Mercury]]'' {{WS|pottstownmercury.com}} of [[Pottstown, Pennsylvania|Pottstown]] +* ''The Phoenix'' {{WS|phoenixvillenews.com}} of [[Phoenixville, Pennsylvania|Phoenixville]] +* ''[[The Reporter (Lansdale)|The Reporter]]'' {{WS|thereporteronline.com}} of [[Lansdale, Pennsylvania|Lansdale]] +* ''The Times Herald'' {{WS|timesherald.com}} of [[Norristown, Pennsylvania|Norristown]] +* ''[[The Trentonian]]'' {{WS|trentonian.com}} of [[Trenton, New Jersey]] + +* Weeklies +** ''El Latino Expreso'' of [[Trenton, New Jersey]] +** ''La Voz'' of [[Norristown, Pennsylvania]] +** ''The Village News'' of [[Downingtown, Pennsylvania]] +** ''The Times Record'' of [[Kennett Square, Pennsylvania]] +** ''The Tri-County Record'' {{WS|tricountyrecord.com}} of [[Morgantown, Pennsylvania]] +** ''News of Delaware County'' {{WS|newsofdelawarecounty.com}}of [[Havertown, Pennsylvania]] +** ''Main Line Times'' {{WS|mainlinetimes.com}}of [[Ardmore, Pennsylvania]] +** ''Penny Pincher'' of [[Pottstown, Pennsylvania]] +** ''Town Talk'' {{WS|towntalknews.com}} of [[Ridley, Pennsylvania]] +* Chesapeake Publishing {{WS|pa8newsgroup.com}} +** ''Solanco Sun Ledger'' of [[Quarryville, Pennsylvania]] +** ''Columbia Ledger'' of [[Columbia, Pennsylvania]] +** ''Coatesville Ledger'' of [[Downingtown, Pennsylvania]] +** ''Parkesburg Post Ledger'' of [[Quarryville, Pennsylvania]] +** ''Downingtown Ledger'' of [[Downingtown, Pennsylvania]] +** ''The Kennett Paper'' of [[Kennett Square, Pennsylvania]] +** ''Avon Grove Sun'' of [[West Grove, Pennsylvania]] +** ''Oxford Tribune'' of [[Oxford, Pennsylvania]] +** ''Elizabethtown Chronicle'' of [[Elizabethtown, Pennsylvania]] +** ''Donegal Ledger'' of [[Donegal, Pennsylvania]] +** ''Chadds Ford Post'' of [[Chadds Ford, Pennsylvania]] +** ''The Central Record'' of [[Medford, New Jersey]] +** ''Maple Shade Progress'' of [[Maple Shade, New Jersey]] +* Intercounty Newspapers {{WS|buckslocalnews.com}} +** ''The Review'' of Roxborough, Pennsylvania +** ''The Recorder'' of [[Conshohocken, Pennsylvania]] +** ''The Leader'' of [[Mount Airy, Pennsylvania|Mount Airy]] and West Oak Lake, Pennsylvania +** ''The Pennington Post'' of [[Pennington, New Jersey]] +** ''The Bristol Pilot'' of [[Bristol, Pennsylvania]] +** ''Yardley News'' of [[Yardley, Pennsylvania]] +** ''New Hope Gazette'' of [[New Hope, Pennsylvania]] +** ''Doylestown Patriot'' of [[Doylestown, Pennsylvania]] +** ''Newtown Advance'' of [[Newtown, Pennsylvania]] +** ''The Plain Dealer'' of [[Williamstown, New Jersey]] +** ''News Report'' of [[Sewell, New Jersey]] +** ''Record Breeze'' of [[Berlin, New Jersey]] +** ''Newsweekly'' of [[Moorestown, New Jersey]] +** ''Haddon Herald'' of [[Haddonfield, New Jersey]] +** ''New Egypt Press'' of [[New Egypt, New Jersey]] +** ''Community News'' of [[Pemberton, New Jersey]] +** ''Plymouth Meeting Journal'' of [[Plymouth Meeting, Pennsylvania]] +** ''Lafayette Hill Journal'' of [[Lafayette Hill, Pennsylvania]] +* Montgomery Newspapers {{WS|montgomerynews.com}} +** ''Ambler Gazette'' of [[Ambler, Pennsylvania]] +** ''Central Bucks Life'' of [[Bucks County, Pennsylvania]] +** ''The Colonial'' of [[Plymouth Meeting, Pennsylvania]] +** ''Glenside News'' of [[Glenside, Pennsylvania]] +** ''The Globe'' of [[Lower Moreland Township, Pennsylvania]] +** ''Main Line Life'' of [[Ardmore, Pennsylvania]] +** ''Montgomery Life'' of [[Fort Washington, Pennsylvania]] +** ''North Penn Life'' of [[Lansdale, Pennsylvania]] +** ''Perkasie News Herald'' of [[Perkasie, Pennsylvania]] +** ''Public Spirit'' of [[Hatboro, Pennsylvania]] +** ''Souderton Independent'' of [[Souderton, Pennsylvania]] +** ''Springfield Sun'' of [[Springfield, Pennsylvania]] +** ''Spring-Ford Reporter'' of [[Royersford, Pennsylvania]] +** ''Times Chronicle'' of [[Jenkintown, Pennsylvania]] +** ''Valley Item'' of [[Perkiomenville, Pennsylvania]] +** ''Willow Grove Guide'' of [[Willow Grove, Pennsylvania]] +* News Gleaner Publications (closed December 2008) {{WS|newsgleaner.com}} +** ''Life Newspapers'' of [[Philadelphia, Pennsylvania]] +* Suburban Publications +** ''The Suburban & Wayne Times'' {{WS|waynesuburban.com}} of [[Wayne, Pennsylvania]] +** ''The Suburban Advertiser'' of [[Exton, Pennsylvania]] +** ''The King of Prussia Courier'' of [[King of Prussia, Pennsylvania]] +* Press Newspapers {{WS|countypressonline.com}} +** ''County Press'' of [[Newtown Square, Pennsylvania]] +** ''Garnet Valley Press'' of [[Glen Mills, Pennsylvania]] +** ''Haverford Press'' of [[Newtown Square, Pennsylvania]] (closed January 2009) +** ''Hometown Press'' of [[Glen Mills, Pennsylvania]] (closed January 2009) +** ''Media Press'' of [[Newtown Square, Pennsylvania]] (closed January 2009) +** ''Springfield Press'' of [[Springfield, Pennsylvania]] +* Berks-Mont Newspapers {{WS|berksmontnews.com}} +** ''The Boyertown Area Times'' of [[Boyertown, Pennsylvania]] +** ''The Kutztown Area Patriot'' of [[Kutztown, Pennsylvania]] +** ''The Hamburg Area Item'' of [[Hamburg, Pennsylvania]] +** ''The Southern Berks News'' of [[Exeter Township, Berks County, Pennsylvania]] +** ''The Free Press'' of [[Quakertown, Pennsylvania]] +** ''The Saucon News'' of [[Quakertown, Pennsylvania]] +** ''Westside Weekly'' of [[Reading, Pennsylvania]] + +* Magazines +** ''Bucks Co. Town & Country Living'' +** ''Chester Co. Town & Country Living'' +** ''Montomgery Co. Town & Country Living'' +** ''Garden State Town & Country Living'' +** ''Montgomery Homes'' +** ''Philadelphia Golfer'' +** ''Parents Express'' +** ''Art Matters'' + +{{JRC}} + +==References== + + +[[Category:Journal Register publications|*]] diff --git a/objectivec/Speedtest2.txt b/objectivec/Speedtest2.txt new file mode 100644 index 0000000..8f25a80 --- /dev/null +++ b/objectivec/Speedtest2.txt @@ -0,0 +1,188 @@ +This is a '''list of newspapers published by [[Journal Register Company]]'''. + +The company owns daily and weekly newspapers, other print media properties and newspaper-affiliated local Websites in the [[U.S.]] states of [[Connecticut]], [[Michigan]], [[New York]], [[Ohio]], [[Pennsylvania]] and [[New Jersey]], organized in six geographic "clusters":[http://www.journalregister.com/publications.html Journal Register Company: Our Publications], accessed April 21, 2010. + +== Capital-Saratoga == +Three dailies, associated weeklies and [[pennysaver]]s in greater [[Albany, New York]]; also [http://www.capitalcentral.com capitalcentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. + +* ''The Oneida Daily Dispatch'' {{WS|oneidadispatch.com}} of [[Oneida, New York]] +* ''[[The Record (Troy)|The Record]]'' {{WS|troyrecord.com}} of [[Troy, New York]] +* ''[[The Saratogian]]'' {{WS|saratogian.com}} of [[Saratoga Springs, New York]] +* Weeklies: +** ''Community News'' {{WS|cnweekly.com}} weekly of [[Clifton Park, New York]] +** ''Rome Observer'' {{WS|romeobserver.com}} of [[Rome, New York]] +** ''WG Life '' {{WS|saratogian.com/wglife/}} of [[Wilton, New York]] +** ''Ballston Spa Life '' {{WS|saratogian.com/bspalife}} of [[Ballston Spa, New York]] +** ''Greenbush Life'' {{WS|troyrecord.com/greenbush}} of [[Troy, New York]] +** ''Latham Life'' {{WS|troyrecord.com/latham}} of [[Latham, New York]] +** ''River Life'' {{WS|troyrecord.com/river}} of [[Troy, New York]] + +== Connecticut == +Three dailies, associated weeklies and [[pennysaver]]s in the state of [[Connecticut]]; also [http://www.ctcentral.com CTcentral.com], [http://www.ctcarsandtrucks.com CTCarsAndTrucks.com] and [http://www.jobsinct.com JobsInCT.com]. + +* ''The Middletown Press'' {{WS|middletownpress.com}} of [[Middletown, Connecticut|Middletown]] +* ''[[New Haven Register]]'' {{WS|newhavenregister.com}} of [[New Haven, Connecticut|New Haven]] +* ''The Register Citizen'' {{WS|registercitizen.com}} of [[Torrington, Connecticut|Torrington]] + +* Housatonic Publications +** ''The Housatonic Times'' {{WS|housatonictimes.com}} of [[New Milford, Connecticut|New Milford]] +** ''Litchfield County Times'' {{WS|countytimes.com}} of [[Litchfield, Connecticut|Litchfield]] + +* Minuteman Publications +** ''[[Fairfield Minuteman]]'' {{WS|fairfieldminuteman.com}}of [[Fairfield, Connecticut|Fairfield]] +** ''The Westport Minuteman'' {{WS|westportminuteman.com}} of [[Westport, Connecticut|Westport]] + +* Shoreline Newspapers +** ''The Dolphin'' {{WS|dolphin-news.com}} of [[Naval Submarine Base New London]] in [[New London, Connecticut|New London]] +** ''Shoreline Times'' {{WS|shorelinetimes.com}} of [[Guilford, Connecticut|Guilford]] + +* Foothills Media Group {{WS|foothillsmediagroup.com}} +** ''Thomaston Express'' {{WS|thomastonexpress.com}} of [[Thomaston, Connecticut|Thomaston]] +** ''Good News About Torrington'' {{WS|goodnewsabouttorrington.com}} of [[Torrington, Connecticut|Torrington]] +** ''Granby News'' {{WS|foothillsmediagroup.com/granby}} of [[Granby, Connecticut|Granby]] +** ''Canton News'' {{WS|foothillsmediagroup.com/canton}} of [[Canton, Connecticut|Canton]] +** ''Avon News'' {{WS|foothillsmediagroup.com/avon}} of [[Avon, Connecticut|Avon]] +** ''Simsbury News'' {{WS|foothillsmediagroup.com/simsbury}} of [[Simsbury, Connecticut|Simsbury]] +** ''Litchfield News'' {{WS|foothillsmediagroup.com/litchfield}} of [[Litchfield, Connecticut|Litchfield]] +** ''Foothills Trader'' {{WS|foothillstrader.com}} of Torrington, Bristol, Canton + +* Other weeklies +** ''The Milford-Orange Bulletin'' {{WS|ctbulletin.com}} of [[Orange, Connecticut|Orange]] +** ''The Post-Chronicle'' {{WS|ctpostchronicle.com}} of [[North Haven, Connecticut|North Haven]] +** ''West Hartford News'' {{WS|westhartfordnews.com}} of [[West Hartford, Connecticut|West Hartford]] + +* Magazines +** ''The Connecticut Bride'' {{WS|connecticutmag.com}} +** ''Connecticut Magazine'' {{WS|theconnecticutbride.com}} +** ''Passport Magazine'' {{WS|passport-mag.com}} + +== Michigan == +Four dailies, associated weeklies and [[pennysaver]]s in the state of [[Michigan]]; also [http://www.micentralhomes.com MIcentralhomes.com] and [http://www.micentralautos.com MIcentralautos.com] +* ''[[Oakland Press]]'' {{WS|theoaklandpress.com}} of [[Oakland, Michigan|Oakland]] +* ''Daily Tribune'' {{WS|dailytribune.com}} of [[Royal Oak, Michigan|Royal Oak]] +* ''Macomb Daily'' {{WS|macombdaily.com}} of [[Mt. Clemens, Michigan|Mt. Clemens]] +* ''[[Morning Sun]]'' {{WS|themorningsun.com}} of [[Mount Pleasant, Michigan|Mount Pleasant]] + +* Heritage Newspapers {{WS|heritage.com}} +** ''Belleville View'' {{WS|bellevilleview.com}} +** ''Ile Camera'' {{WS|thenewsherald.com/ile_camera}} +** ''Monroe Guardian'' {{WS|monreguardian.com}} +** ''Ypsilanti Courier'' {{WS|ypsilanticourier.com}} +** ''News-Herald'' {{WS|thenewsherald.com}} +** ''Press & Guide'' {{WS|pressandguide.com}} +** ''Chelsea Standard & Dexter Leader'' {{WS|chelseastandard.com}} +** ''Manchester Enterprise'' {{WS|manchesterguardian.com}} +** ''Milan News-Leader'' {{WS|milannews.com}} +** ''Saline Reporter'' {{WS|salinereporter.com}} +* Independent Newspapers +** ''Advisor'' {{WS|sourcenewspapers.com}} +** ''Source'' {{WS|sourcenewspapers.com}} +* Morning Star {{WS|morningstarpublishing.com}} +** ''The Leader & Kalkaskian'' {{WS|leaderandkalkaskian.com}} +** ''Grand Traverse Insider'' {{WS|grandtraverseinsider.com}} +** ''Alma Reminder'' +** ''Alpena Star'' +** ''Ogemaw/Oscoda County Star'' +** ''Presque Isle Star'' +** ''St. Johns Reminder'' + +* Voice Newspapers {{WS|voicenews.com}} +** ''Armada Times'' +** ''Bay Voice'' +** ''Blue Water Voice'' +** ''Downriver Voice'' +** ''Macomb Township Voice'' +** ''North Macomb Voice'' +** ''Weekend Voice'' + +== Mid-Hudson == +One daily, associated magazines in the [[Hudson River Valley]] of [[New York]]; also [http://www.midhudsoncentral.com MidHudsonCentral.com] and [http://www.jobsinnewyork.com JobsInNewYork.com]. + +* ''[[Daily Freeman]]'' {{WS|dailyfreeman.com}} of [[Kingston, New York]] +* ''Las Noticias'' {{WS|lasnoticiasny.com}} of [[Kingston, New York]] + +== Ohio == +Two dailies, associated magazines and three shared Websites, all in the state of [[Ohio]]: [http://www.allaroundcleveland.com AllAroundCleveland.com], [http://www.allaroundclevelandcars.com AllAroundClevelandCars.com] and [http://www.allaroundclevelandjobs.com AllAroundClevelandJobs.com]. + +* ''[[The News-Herald (Ohio)|The News-Herald]]'' {{WS|news-herald.com}} of [[Willoughby, Ohio|Willoughby]] +* ''[[The Morning Journal]]'' {{WS|morningjournal.com}} of [[Lorain, Ohio|Lorain]] +* ''El Latino Expreso'' {{WS|lorainlatino.com}} of [[Lorain, Ohio|Lorain]] + +== Philadelphia area == +Seven dailies and associated weeklies and magazines in [[Pennsylvania]] and [[New Jersey]], and associated Websites: [http://www.allaroundphilly.com AllAroundPhilly.com], [http://www.jobsinnj.com JobsInNJ.com], [http://www.jobsinpa.com JobsInPA.com], and [http://www.phillycarsearch.com PhillyCarSearch.com]. + +* ''[[The Daily Local News]]'' {{WS|dailylocal.com}} of [[West Chester, Pennsylvania|West Chester]] +* ''[[Delaware County Daily and Sunday Times]] {{WS|delcotimes.com}} of Primos [[Upper Darby Township, Pennsylvania]] +* ''[[The Mercury (Pennsylvania)|The Mercury]]'' {{WS|pottstownmercury.com}} of [[Pottstown, Pennsylvania|Pottstown]] +* ''[[The Reporter (Lansdale)|The Reporter]]'' {{WS|thereporteronline.com}} of [[Lansdale, Pennsylvania|Lansdale]] +* ''The Times Herald'' {{WS|timesherald.com}} of [[Norristown, Pennsylvania|Norristown]] +* ''[[The Trentonian]]'' {{WS|trentonian.com}} of [[Trenton, New Jersey]] + +* Weeklies +* ''The Phoenix'' {{WS|phoenixvillenews.com}} of [[Phoenixville, Pennsylvania]] +** ''El Latino Expreso'' {{WS|njexpreso.com}} of [[Trenton, New Jersey]] +** ''La Voz'' {{WS|lavozpa.com}} of [[Norristown, Pennsylvania]] +** ''The Tri County Record'' {{WS|tricountyrecord.com}} of [[Morgantown, Pennsylvania]] +** ''Penny Pincher'' {{WS|pennypincherpa.com}}of [[Pottstown, Pennsylvania]] + +* Chesapeake Publishing {{WS|southernchestercountyweeklies.com}} +** ''The Kennett Paper'' {{WS|kennettpaper.com}} of [[Kennett Square, Pennsylvania]] +** ''Avon Grove Sun'' {{WS|avongrovesun.com}} of [[West Grove, Pennsylvania]] +** ''The Central Record'' {{WS|medfordcentralrecord.com}} of [[Medford, New Jersey]] +** ''Maple Shade Progress'' {{WS|mapleshadeprogress.com}} of [[Maple Shade, New Jersey]] + +* Intercounty Newspapers {{WS|buckslocalnews.com}} {{WS|southjerseylocalnews.com}} +** ''The Pennington Post'' {{WS|penningtonpost.com}} of [[Pennington, New Jersey]] +** ''The Bristol Pilot'' {{WS|bristolpilot.com}} of [[Bristol, Pennsylvania]] +** ''Yardley News'' {{WS|yardleynews.com}} of [[Yardley, Pennsylvania]] +** ''Advance of Bucks County'' {{WS|advanceofbucks.com}} of [[Newtown, Pennsylvania]] +** ''Record Breeze'' {{WS|recordbreeze.com}} of [[Berlin, New Jersey]] +** ''Community News'' {{WS|sjcommunitynews.com}} of [[Pemberton, New Jersey]] + +* Montgomery Newspapers {{WS|montgomerynews.com}} +** ''Ambler Gazette'' {{WS|amblergazette.com}} of [[Ambler, Pennsylvania]] +** ''The Colonial'' {{WS|colonialnews.com}} of [[Plymouth Meeting, Pennsylvania]] +** ''Glenside News'' {{WS|glensidenews.com}} of [[Glenside, Pennsylvania]] +** ''The Globe'' {{WS|globenewspaper.com}} of [[Lower Moreland Township, Pennsylvania]] +** ''Montgomery Life'' {{WS|montgomerylife.com}} of [[Fort Washington, Pennsylvania]] +** ''North Penn Life'' {{WS|northpennlife.com}} of [[Lansdale, Pennsylvania]] +** ''Perkasie News Herald'' {{WS|perkasienewsherald.com}} of [[Perkasie, Pennsylvania]] +** ''Public Spirit'' {{WS|thepublicspirit.com}} of [[Hatboro, Pennsylvania]] +** ''Souderton Independent'' {{WS|soudertonindependent.com}} of [[Souderton, Pennsylvania]] +** ''Springfield Sun'' {{WS|springfieldsun.com}} of [[Springfield, Pennsylvania]] +** ''Spring-Ford Reporter'' {{WS|springfordreporter.com}} of [[Royersford, Pennsylvania]] +** ''Times Chronicle'' {{WS|thetimeschronicle.com}} of [[Jenkintown, Pennsylvania]] +** ''Valley Item'' {{WS|valleyitem.com}} of [[Perkiomenville, Pennsylvania]] +** ''Willow Grove Guide'' {{WS|willowgroveguide.com}} of [[Willow Grove, Pennsylvania]] +** ''The Review'' {{WS|roxreview.com}} of [[Roxborough, Philadelphia, Pennsylvania]] + +* Main Line Media News {{WS|mainlinemedianews.com}} +** ''Main Line Times'' {{WS|mainlinetimes.com}} of [[Ardmore, Pennsylvania]] +** ''Main Line Life'' {{WS|mainlinelife.com}} of [[Ardmore, Pennsylvania]] +** ''The King of Prussia Courier'' {{WS|kingofprussiacourier.com}} of [[King of Prussia, Pennsylvania]] + +* Delaware County News Network {{WS|delconewsnetwork.com}} +** ''News of Delaware County'' {{WS|newsofdelawarecounty.com}} of [[Havertown, Pennsylvania]] +** ''County Press'' {{WS|countypressonline.com}} of [[Newtown Square, Pennsylvania]] +** ''Garnet Valley Press'' {{WS|countypressonline.com}} of [[Glen Mills, Pennsylvania]] +** ''Springfield Press'' {{WS|countypressonline.com}} of [[Springfield, Pennsylvania]] +** ''Town Talk'' {{WS|towntalknews.com}} of [[Ridley, Pennsylvania]] + +* Berks-Mont Newspapers {{WS|berksmontnews.com}} +** ''The Boyertown Area Times'' {{WS|berksmontnews.com/boyertown_area_times}} of [[Boyertown, Pennsylvania]] +** ''The Kutztown Area Patriot'' {{WS|berksmontnews.com/kutztown_area_patriot}} of [[Kutztown, Pennsylvania]] +** ''The Hamburg Area Item'' {{WS|berksmontnews.com/hamburg_area_item}} of [[Hamburg, Pennsylvania]] +** ''The Southern Berks News'' {{WS|berksmontnews.com/southern_berks_news}} of [[Exeter Township, Berks County, Pennsylvania]] +** ''Community Connection'' {{WS|berksmontnews.com/community_connection}} of [[Boyertown, Pennsylvania]] + +* Magazines +** ''Bucks Co. Town & Country Living'' {{WS|buckscountymagazine.com}} +** ''Parents Express'' {{WS|parents-express.com}} +** ''Real Men, Rednecks'' {{WS|realmenredneck.com}} + +{{JRC}} + +==References== + + +[[Category:Journal Register publications|*]] diff --git a/objectivec/Tests/DiffMatchPatchTest-Info.plist b/objectivec/Tests/DiffMatchPatchTest-Info.plist new file mode 100755 index 0000000..c285a47 --- /dev/null +++ b/objectivec/Tests/DiffMatchPatchTest-Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIdentifier + com.yourcompany.${PRODUCT_NAME:rfc1034identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + BNDL + CFBundleShortVersionString + 1.0 + CFBundleSignature + ???? + CFBundleVersion + 1 + + diff --git a/objectivec/Tests/DiffMatchPatchTest.h b/objectivec/Tests/DiffMatchPatchTest.h new file mode 100755 index 0000000..1cb3753 --- /dev/null +++ b/objectivec/Tests/DiffMatchPatchTest.h @@ -0,0 +1,31 @@ +/* + * Diff Match and Patch -- Test harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + +#import "DiffMatchPatch.h" + + +@interface DiffMatchPatchTest : SenTestCase { + +} + +@end diff --git a/objectivec/Tests/DiffMatchPatchTest.m b/objectivec/Tests/DiffMatchPatchTest.m new file mode 100755 index 0000000..85bb9e3 --- /dev/null +++ b/objectivec/Tests/DiffMatchPatchTest.m @@ -0,0 +1,1315 @@ +/* + * Diff Match and Patch -- Test harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import "DiffMatchPatchTest.h" + +#import "DiffMatchPatch.h" +#import "NSMutableDictionary+DMPExtensions.h" + +#define stringForBOOL(A) ([((NSNumber *)A) boolValue] ? @"true" : @"false") + +@interface DiffMatchPatchTest (PrivatMethods) +- (NSArray *)diff_rebuildtexts:(NSMutableArray *)diffs; +@end + +@implementation DiffMatchPatchTest + +- (void)test_diff_commonPrefixTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Detect any common suffix. + // Null case. + STAssertEquals((NSUInteger)0, [dmp diff_commonPrefixOfFirstString:@"abc" andSecondString:@"xyz"], @"Common suffix null case failed."); + + // Non-null case. + STAssertEquals((NSUInteger)4, [dmp diff_commonPrefixOfFirstString:@"1234abcdef" andSecondString:@"1234xyz"], @"Common suffix non-null case failed."); + + // Whole case. + STAssertEquals((NSUInteger)4, [dmp diff_commonPrefixOfFirstString:@"1234" andSecondString:@"1234xyz"], @"Common suffix whole case failed."); + + [dmp release]; +} + +- (void)test_diff_commonSuffixTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Detect any common suffix. + // Null case. + STAssertEquals((NSUInteger)0, [dmp diff_commonSuffixOfFirstString:@"abc" andSecondString:@"xyz"], @"Detect any common suffix. Null case."); + + // Non-null case. + STAssertEquals((NSUInteger)4, [dmp diff_commonSuffixOfFirstString:@"abcdef1234" andSecondString:@"xyz1234"], @"Detect any common suffix. Non-null case."); + + // Whole case. + STAssertEquals((NSUInteger)4, [dmp diff_commonSuffixOfFirstString:@"1234" andSecondString:@"xyz1234"], @"Detect any common suffix. Whole case."); + + [dmp release]; +} + +- (void)test_diff_commonOverlapTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Detect any suffix/prefix overlap. + // Null case. + STAssertEquals((NSUInteger)0, [dmp diff_commonOverlapOfFirstString:@"" andSecondString:@"abcd"], @"Detect any suffix/prefix overlap. Null case."); + + // Whole case. + STAssertEquals((NSUInteger)3, [dmp diff_commonOverlapOfFirstString:@"abc" andSecondString:@"abcd"], @"Detect any suffix/prefix overlap. Whole case."); + + // No overlap. + STAssertEquals((NSUInteger)0, [dmp diff_commonOverlapOfFirstString:@"123456" andSecondString:@"abcd"], @"Detect any suffix/prefix overlap. No overlap."); + + // Overlap. + STAssertEquals((NSUInteger)3, [dmp diff_commonOverlapOfFirstString:@"123456xxx" andSecondString:@"xxxabcd"], @"Detect any suffix/prefix overlap. Overlap."); + + // Unicode. + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + STAssertEquals((NSUInteger)0, [dmp diff_commonOverlapOfFirstString:@"fi" andSecondString:@"\U0000fb01i"], @"Detect any suffix/prefix overlap. Unicode."); + + [dmp release]; +} + +- (void)test_diff_halfmatchTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + dmp.Diff_Timeout = 1; + NSArray *expectedResult = nil; + + // No match. + STAssertNil([dmp diff_halfMatchOfFirstString:@"1234567890" andSecondString:@"abcdef"], @"No match #1."); + + STAssertNil([dmp diff_halfMatchOfFirstString:@"12345" andSecondString:@"23"], @"No match #2."); + + // Single Match. + expectedResult = [NSArray arrayWithObjects:@"12", @"90", @"a", @"z", @"345678", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"1234567890" andSecondString:@"a345678z"], @"Single Match #1."); + + expectedResult = [NSArray arrayWithObjects:@"a", @"z", @"12", @"90", @"345678", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"a345678z" andSecondString:@"1234567890"], @"Single Match #2."); + + expectedResult = [NSArray arrayWithObjects:@"abc", @"z", @"1234", @"0", @"56789", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"abc56789z" andSecondString:@"1234567890"], @"Single Match #3."); + + expectedResult = [NSArray arrayWithObjects:@"a", @"xyz", @"1", @"7890", @"23456", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"a23456xyz" andSecondString:@"1234567890"], @"Single Match #4."); + + // Multiple Matches. + expectedResult = [NSArray arrayWithObjects:@"12123", @"123121", @"a", @"z", @"1234123451234", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"121231234123451234123121" andSecondString:@"a1234123451234z"], @"Multiple Matches #1."); + + expectedResult = [NSArray arrayWithObjects:@"", @"-=-=-=-=-=", @"x", @"", @"x-=-=-=-=-=-=-=", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"x-=-=-=-=-=-=-=-=-=-=-=-=" andSecondString:@"xx-=-=-=-=-=-=-="], @"Multiple Matches #2."); + + expectedResult = [NSArray arrayWithObjects:@"-=-=-=-=-=", @"", @"", @"y", @"-=-=-=-=-=-=-=y", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"-=-=-=-=-=-=-=-=-=-=-=-=y" andSecondString:@"-=-=-=-=-=-=-=yy"], @"Multiple Matches #3."); + + // Non-optimal halfmatch. + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + expectedResult = [NSArray arrayWithObjects:@"qHillo", @"w", @"x", @"Hulloy", @"HelloHe", nil]; + STAssertEqualObjects(expectedResult, [dmp diff_halfMatchOfFirstString:@"qHilloHelloHew" andSecondString:@"xHelloHeHulloy"], @"Non-optimal halfmatch."); + + // Optimal no halfmatch. + dmp.Diff_Timeout = 0; + STAssertNil([dmp diff_halfMatchOfFirstString:@"qHilloHelloHew" andSecondString:@"xHelloHeHulloy"], @"Optimal no halfmatch."); + + [dmp release]; +} + +- (void)test_diff_linesToCharsTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSArray *result; + + // Convert lines down to characters. + NSMutableArray *tmpVector = [NSMutableArray array]; // Array of NSString objects. + [tmpVector addObject:@""]; + [tmpVector addObject:@"alpha\n"]; + [tmpVector addObject:@"beta\n"]; + result = [dmp diff_linesToCharsForFirstString:@"alpha\nbeta\nalpha\n" andSecondString:@"beta\nalpha\nbeta\n"]; + STAssertEqualObjects(@"\001\002\001", [result objectAtIndex:0], @"Shared lines #1."); + STAssertEqualObjects(@"\002\001\002", [result objectAtIndex:1], @"Shared lines #2."); + STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Shared lines #3."); + + [tmpVector removeAllObjects]; + [tmpVector addObject:@""]; + [tmpVector addObject:@"alpha\r\n"]; + [tmpVector addObject:@"beta\r\n"]; + [tmpVector addObject:@"\r\n"]; + result = [dmp diff_linesToCharsForFirstString:@"" andSecondString:@"alpha\r\nbeta\r\n\r\n\r\n"]; + STAssertEqualObjects(@"", [result objectAtIndex:0], @"Empty string and blank lines #1."); + STAssertEqualObjects(@"\001\002\003\003", [result objectAtIndex:1], @"Empty string and blank lines #2."); + STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Empty string and blank lines #3."); + + [tmpVector removeAllObjects]; + [tmpVector addObject:@""]; + [tmpVector addObject:@"a"]; + [tmpVector addObject:@"b"]; + result = [dmp diff_linesToCharsForFirstString:@"a" andSecondString:@"b"]; + STAssertEqualObjects(@"\001", [result objectAtIndex:0], @"No linebreaks #1."); + STAssertEqualObjects(@"\002", [result objectAtIndex:1], @"No linebreaks #2."); + STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"No linebreaks #3."); + + // More than 256 to reveal any 8-bit limitations. + unichar n = 300; + [tmpVector removeAllObjects]; + NSMutableString *lines = [NSMutableString string]; + NSMutableString *chars = [NSMutableString string]; + NSString *currentLine; + for (unichar x = 1; x < n + 1; x++) { + currentLine = [NSString stringWithFormat:@"%d\n", (int)x]; + [tmpVector addObject:currentLine]; + [lines appendString:currentLine]; + [chars appendString:[NSString stringWithFormat:@"%C", x]]; + } + STAssertEquals((NSUInteger)n, tmpVector.count, @"More than 256 #1."); + STAssertEquals((NSUInteger)n, chars.length, @"More than 256 #2."); + [tmpVector insertObject:@"" atIndex:0]; + result = [dmp diff_linesToCharsForFirstString:lines andSecondString:@""]; + STAssertEqualObjects(chars, [result objectAtIndex:0], @"More than 256 #3."); + STAssertEqualObjects(@"", [result objectAtIndex:1], @"More than 256 #4."); + STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"More than 256 #5."); + + [dmp release]; +} + +- (void)test_diff_charsToLinesTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Convert chars up to lines. + NSArray *diffs = [NSArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"\001\002\001"], + [Diff diffWithOperation:DIFF_INSERT andText:@"\002\001\002"], nil]; + NSMutableArray *tmpVector = [NSMutableArray array]; // Array of NSString objects. + [tmpVector addObject:@""]; + [tmpVector addObject:@"alpha\n"]; + [tmpVector addObject:@"beta\n"]; + [dmp diff_chars:diffs toLines:tmpVector]; + NSArray *expectedResult = [NSArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"alpha\nbeta\nalpha\n"], + [Diff diffWithOperation:DIFF_INSERT andText:@"beta\nalpha\nbeta\n"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Shared lines."); + + // More than 256 to reveal any 8-bit limitations. + unichar n = 300; + [tmpVector removeAllObjects]; + NSMutableString *lines = [NSMutableString string]; + NSMutableString *chars = [NSMutableString string]; + NSString *currentLine; + for (unichar x = 1; x < n + 1; x++) { + currentLine = [NSString stringWithFormat:@"%d\n", (int)x]; + [tmpVector addObject:currentLine]; + [lines appendString:currentLine]; + [chars appendString:[NSString stringWithFormat:@"%C", x]]; + } + STAssertEquals((NSUInteger)n, tmpVector.count, @"More than 256 #1."); + STAssertEquals((NSUInteger)n, chars.length, @"More than 256 #2."); + [tmpVector insertObject:@"" atIndex:0]; + diffs = [NSArray arrayWithObject:[Diff diffWithOperation:DIFF_DELETE andText:chars]]; + [dmp diff_chars:diffs toLines:tmpVector]; + STAssertEqualObjects([NSArray arrayWithObject:[Diff diffWithOperation:DIFF_DELETE andText:lines]], diffs, @"More than 256 #3."); + + [dmp release]; +} + +- (void)test_diff_cleanupMergeTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *expectedResult = nil; + + // Cleanup a messy diff. + // Null case. + NSMutableArray *diffs = [NSMutableArray array]; + [dmp diff_cleanupMerge:diffs]; + STAssertEqualObjects([NSMutableArray array], diffs, @"Null case."); + + // No change case. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"b"], [Diff diffWithOperation:DIFF_INSERT andText:@"c"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"b"], [Diff diffWithOperation:DIFF_INSERT andText:@"c"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"No change case."); + + // Merge equalities. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_EQUAL andText:@"b"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"abc"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Merge equalities."); + + // Merge deletions. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"b"], [Diff diffWithOperation:DIFF_DELETE andText:@"c"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"abc"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Merge deletions."); + + // Merge insertions. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_INSERT andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"b"], [Diff diffWithOperation:DIFF_INSERT andText:@"c"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_INSERT andText:@"abc"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Merge insertions."); + + // Merge interweave. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"b"], [Diff diffWithOperation:DIFF_DELETE andText:@"c"], [Diff diffWithOperation:DIFF_INSERT andText:@"d"], [Diff diffWithOperation:DIFF_EQUAL andText:@"e"], [Diff diffWithOperation:DIFF_EQUAL andText:@"f"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"ac"], [Diff diffWithOperation:DIFF_INSERT andText:@"bd"], [Diff diffWithOperation:DIFF_EQUAL andText:@"ef"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Merge interweave."); + + // Prefix and suffix detection. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"abc"], [Diff diffWithOperation:DIFF_DELETE andText:@"dc"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"d"], [Diff diffWithOperation:DIFF_INSERT andText:@"b"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Prefix and suffix detection."); + + // Prefix and suffix detection with equalities. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"x"], [Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"abc"], [Diff diffWithOperation:DIFF_DELETE andText:@"dc"], [Diff diffWithOperation:DIFF_EQUAL andText:@"y"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"xa"], [Diff diffWithOperation:DIFF_DELETE andText:@"d"], [Diff diffWithOperation:DIFF_INSERT andText:@"b"], [Diff diffWithOperation:DIFF_EQUAL andText:@"cy"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Prefix and suffix detection with equalities."); + + // Slide edit left. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"ba"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_INSERT andText:@"ab"], [Diff diffWithOperation:DIFF_EQUAL andText:@"ac"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Slide edit left."); + + // Slide edit right. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"c"], [Diff diffWithOperation:DIFF_INSERT andText:@"ab"], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"ca"], [Diff diffWithOperation:DIFF_INSERT andText:@"ba"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Slide edit right."); + + // Slide edit left recursive. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"b"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], [Diff diffWithOperation:DIFF_DELETE andText:@"ac"], [Diff diffWithOperation:DIFF_EQUAL andText:@"x"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"abc"], [Diff diffWithOperation:DIFF_EQUAL andText:@"acx"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Slide edit left recursive."); + + // Slide edit right recursive. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"x"], [Diff diffWithOperation:DIFF_DELETE andText:@"ca"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], [Diff diffWithOperation:DIFF_DELETE andText:@"b"], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], nil]; + [dmp diff_cleanupMerge:diffs]; + expectedResult = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"xca"], [Diff diffWithOperation:DIFF_DELETE andText:@"cba"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Slide edit right recursive."); + + [dmp release]; +} + +- (void)test_diff_cleanupSemanticLosslessTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *expectedResult = nil; + + // Slide diffs to match logical boundaries. + // Null case. + NSMutableArray *diffs = [NSMutableArray array]; + [dmp diff_cleanupSemanticLossless:diffs]; + STAssertEqualObjects([NSMutableArray array], diffs, @"Null case."); + + // Blank lines. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"AAA\r\n\r\nBBB"], + [Diff diffWithOperation:DIFF_INSERT andText:@"\r\nDDD\r\n\r\nBBB"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"\r\nEEE"], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"AAA\r\n\r\n"], + [Diff diffWithOperation:DIFF_INSERT andText:@"BBB\r\nDDD\r\n\r\n"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"BBB\r\nEEE"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Blank lines."); + + // Line boundaries. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"AAA\r\nBBB"], + [Diff diffWithOperation:DIFF_INSERT andText:@" DDD\r\nBBB"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" EEE"], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"AAA\r\n"], + [Diff diffWithOperation:DIFF_INSERT andText:@"BBB DDD\r\n"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"BBB EEE"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Line boundaries."); + + // Word boundaries. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The c"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ow and the c"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"at."], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The "], + [Diff diffWithOperation:DIFF_INSERT andText:@"cow and the "], + [Diff diffWithOperation:DIFF_EQUAL andText:@"cat."], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Word boundaries."); + + // Alphanumeric boundaries. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The-c"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ow-and-the-c"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"at."], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The-"], + [Diff diffWithOperation:DIFF_INSERT andText:@"cow-and-the-"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"cat."], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Alphanumeric boundaries."); + + // Hitting the start. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"ax"], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"aax"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Hitting the start."); + + // Hitting the end. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"xa"], + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"xaa"], + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Hitting the end."); + + // Alphanumeric boundaries. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The xxx. The "], + [Diff diffWithOperation:DIFF_INSERT andText:@"zzz. The "], + [Diff diffWithOperation:DIFF_EQUAL andText:@"yyy."], nil]; + [dmp diff_cleanupSemanticLossless:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The xxx."], + [Diff diffWithOperation:DIFF_INSERT andText:@" The zzz."], + [Diff diffWithOperation:DIFF_EQUAL andText:@" The yyy."], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Sentence boundaries."); + + [dmp release]; +} + +- (void)test_diff_cleanupSemanticTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *expectedResult = nil; + + // Cleanup semantically trivial equalities. + // Null case. + NSMutableArray *diffs = [NSMutableArray array]; + [dmp diff_cleanupSemantic:diffs]; + STAssertEqualObjects([NSMutableArray array], diffs, @"Null case."); + + // No elimination #1. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"cd"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"12"], + [Diff diffWithOperation:DIFF_DELETE andText:@"e"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"cd"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"12"], + [Diff diffWithOperation:DIFF_DELETE andText:@"e"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"No elimination #1."); + + // No elimination #2. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ABC"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"1234"], + [Diff diffWithOperation:DIFF_DELETE andText:@"wxyz"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ABC"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"1234"], + [Diff diffWithOperation:DIFF_DELETE andText:@"wxyz"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"No elimination #2."); + + // Simple elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"b"], + [Diff diffWithOperation:DIFF_DELETE andText:@"c"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"b"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Simple elimination."); + + // Backpass elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"cd"], + [Diff diffWithOperation:DIFF_DELETE andText:@"e"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"f"], + [Diff diffWithOperation:DIFF_INSERT andText:@"g"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcdef"], + [Diff diffWithOperation:DIFF_INSERT andText:@"cdfg"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Backpass elimination."); + + // Multiple eliminations. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_INSERT andText:@"1"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"A"], + [Diff diffWithOperation:DIFF_DELETE andText:@"B"], + [Diff diffWithOperation:DIFF_INSERT andText:@"2"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"_"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"A"], + [Diff diffWithOperation:DIFF_DELETE andText:@"B"], + [Diff diffWithOperation:DIFF_INSERT andText:@"2"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"AB_AB"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1A2_1A2"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Multiple eliminations."); + + // Word boundaries. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The c"], + [Diff diffWithOperation:DIFF_DELETE andText:@"ow and the c"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"at."], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"The "], + [Diff diffWithOperation:DIFF_DELETE andText:@"cow and the "], + [Diff diffWithOperation:DIFF_EQUAL andText:@"cat."], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Word boundaries."); + + // No overlap elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcxx"], + [Diff diffWithOperation:DIFF_INSERT andText:@"xxdef"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcxx"], + [Diff diffWithOperation:DIFF_INSERT andText:@"xxdef"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"No overlap elimination."); + + // Overlap elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcxxx"], + [Diff diffWithOperation:DIFF_INSERT andText:@"xxxdef"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xxx"], + [Diff diffWithOperation:DIFF_INSERT andText:@"def"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Overlap elimination."); + + // Reverse overlap elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"xxxabc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"defxxx"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_INSERT andText:@"def"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xxx"], + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Reverse overlap elimination."); + + // Two overlap eliminations. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcd1212"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1212efghi"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"----"], + [Diff diffWithOperation:DIFF_DELETE andText:@"A3"], + [Diff diffWithOperation:DIFF_INSERT andText:@"3BC"], nil]; + [dmp diff_cleanupSemantic:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abcd"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"1212"], + [Diff diffWithOperation:DIFF_INSERT andText:@"efghi"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"----"], + [Diff diffWithOperation:DIFF_DELETE andText:@"A"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"3"], + [Diff diffWithOperation:DIFF_INSERT andText:@"BC"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Two overlap eliminations."); + + [dmp release]; +} + +- (void)test_diff_cleanupEfficiencyTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *expectedResult = nil; + + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + // Null case. + NSMutableArray *diffs = [NSMutableArray array]; + [dmp diff_cleanupEfficiency:diffs]; + STAssertEqualObjects([NSMutableArray array], diffs, @"Null case."); + + // No elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"wxyz"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], nil]; + [dmp diff_cleanupEfficiency:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"wxyz"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"No elimination."); + + // Four-edit elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], nil]; + [dmp diff_cleanupEfficiency:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abxyzcd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12xyz34"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Four-edit elimination."); + + // Three-edit elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"x"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], nil]; + [dmp diff_cleanupEfficiency:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"xcd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12x34"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Three-edit elimination."); + + // Backpass elimination. + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xy"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"z"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"56"], nil]; + [dmp diff_cleanupEfficiency:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abxyzcd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12xy34z56"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"Backpass elimination."); + + // High cost elimination. + dmp.Diff_EditCost = 5; + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"ab"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"wxyz"], + [Diff diffWithOperation:DIFF_DELETE andText:@"cd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"34"], nil]; + [dmp diff_cleanupEfficiency:diffs]; + expectedResult = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abwxyzcd"], + [Diff diffWithOperation:DIFF_INSERT andText:@"12wxyz34"], nil]; + STAssertEqualObjects(expectedResult, diffs, @"High cost elimination."); + dmp.Diff_EditCost = 4; + + [dmp release]; +} + +- (void)test_diff_prettyHtmlTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Pretty print. + NSMutableArray *diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"a\n"], + [Diff diffWithOperation:DIFF_DELETE andText:@"b"], + [Diff diffWithOperation:DIFF_INSERT andText:@"c&d"], nil]; + NSString *expectedResult = @"
    <B>b</B>c&d"; + STAssertEqualObjects(expectedResult, [dmp diff_prettyHtml:diffs], @"Pretty print."); + + [dmp release]; +} + +- (void)test_diff_textTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Compute the source and destination texts. + NSMutableArray *diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"jump"], + [Diff diffWithOperation:DIFF_DELETE andText:@"s"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ed"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" over "], + [Diff diffWithOperation:DIFF_DELETE andText:@"the"], + [Diff diffWithOperation:DIFF_INSERT andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" lazy"], nil]; + STAssertEqualObjects(@"jumps over the lazy", [dmp diff_text1:diffs], @"Compute the source and destination texts #1"); + + STAssertEqualObjects(@"jumped over a lazy", [dmp diff_text2:diffs], @"Compute the source and destination texts #2"); + + [dmp release]; +} + +- (void)test_diff_deltaTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *expectedResult = nil; + NSError *error = nil; + + // Convert a diff into delta string. + NSMutableArray *diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"jump"], + [Diff diffWithOperation:DIFF_DELETE andText:@"s"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ed"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" over "], + [Diff diffWithOperation:DIFF_DELETE andText:@"the"], + [Diff diffWithOperation:DIFF_INSERT andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" lazy"], + [Diff diffWithOperation:DIFF_INSERT andText:@"old dog"], nil]; + NSString *text1 = [dmp diff_text1:diffs]; + STAssertEqualObjects(@"jumps over the lazy", text1, @"Convert a diff into delta string 1."); + + NSString *delta = [dmp diff_toDelta:diffs]; + STAssertEqualObjects(@"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta, @"Convert a diff into delta string 2."); + + // Convert delta string into a diff. + STAssertEqualObjects(diffs, [dmp diff_fromDeltaWithText:text1 andDelta:delta error:NULL], @"Convert delta string into a diff."); + + // Generates error (19 < 20). + diffs = [dmp diff_fromDeltaWithText:[text1 stringByAppendingString:@"x"] andDelta:delta error:&error]; + if (diffs != nil || error == nil) { + STFail(@"diff_fromDelta: Too long."); + } + error = nil; + + // Generates error (19 > 18). + diffs = [dmp diff_fromDeltaWithText:[text1 substringFromIndex:1] andDelta:delta error:&error]; + if (diffs != nil || error == nil) { + STFail(@"diff_fromDelta: Too short."); + } + error = nil; + + // Generates error (%c3%xy invalid Unicode). + diffs = [dmp diff_fromDeltaWithText:@"" andDelta:@"+%c3%xy" error:&error]; + if (diffs != nil || error == nil) { + STFail(@"diff_fromDelta: Invalid character."); + } + error = nil; + + // Test deltas with special characters. + unichar zero = (unichar)0; + unichar one = (unichar)1; + unichar two = (unichar)2; + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:[NSString stringWithFormat:@"\U00000680 %C \t %%", zero]], + [Diff diffWithOperation:DIFF_DELETE andText:[NSString stringWithFormat:@"\U00000681 %C \n ^", one]], + [Diff diffWithOperation:DIFF_INSERT andText:[NSString stringWithFormat:@"\U00000682 %C \\ |", two]], nil]; + text1 = [dmp diff_text1:diffs]; + NSString *expectedString = [NSString stringWithFormat:@"\U00000680 %C \t %%\U00000681 %C \n ^", zero, one]; + STAssertEqualObjects(expectedString, text1, @"Test deltas with special characters."); + + delta = [dmp diff_toDelta:diffs]; + // Upper case, because to CFURLCreateStringByAddingPercentEscapes() uses upper. + STAssertEqualObjects(@"=7\t-7\t+%DA%82 %02 %5C %7C", delta, @"diff_toDelta: Unicode 1."); + + STAssertEqualObjects(diffs, [dmp diff_fromDeltaWithText:text1 andDelta:delta error:NULL], @"diff_fromDelta: Unicode 2."); + + // Verify pool of unchanged characters. + diffs = [NSMutableArray arrayWithObject: + [Diff diffWithOperation:DIFF_INSERT andText:@"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # "]]; + NSString *text2 = [dmp diff_text2:diffs]; + STAssertEqualObjects(@"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ", text2, @"diff_text2: Unchanged characters 1."); + + delta = [dmp diff_toDelta:diffs]; + STAssertEqualObjects(@"+A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ", delta, @"diff_toDelta: Unchanged characters 2."); + + // Convert delta string into a diff. + expectedResult = [dmp diff_fromDeltaWithText:@"" andDelta:delta error:NULL]; + STAssertEqualObjects(diffs, expectedResult, @"diff_fromDelta: Unchanged characters. Convert delta string into a diff."); + + [dmp release]; +} + +- (void)test_diff_xIndexTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Translate a location in text1 to text2. + NSMutableArray *diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"a"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1234"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], nil] /* Diff */; + STAssertEquals((NSUInteger)5, [dmp diff_xIndexIn:diffs location:2], @"diff_xIndex: Translation on equality. Translate a location in text1 to text2."); + + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], + [Diff diffWithOperation:DIFF_DELETE andText:@"1234"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], nil] /* Diff */; + STAssertEquals((NSUInteger)1, [dmp diff_xIndexIn:diffs location:3], @"diff_xIndex: Translation on deletion."); + + [dmp release]; +} + +- (void)test_diff_levenshteinTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSMutableArray *diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1234"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], nil] /* Diff */; + STAssertEquals((NSUInteger)4, [dmp diff_levenshtein:diffs], @"diff_levenshtein: Levenshtein with trailing equality."); + + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1234"], nil] /* Diff */; + STAssertEquals((NSUInteger)4, [dmp diff_levenshtein:diffs], @"diff_levenshtein: Levenshtein with leading equality."); + + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"abc"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"xyz"], + [Diff diffWithOperation:DIFF_INSERT andText:@"1234"], nil] /* Diff */; + STAssertEquals((NSUInteger)7, [dmp diff_levenshtein:diffs], @"diff_levenshtein: Levenshtein with middle equality."); + + [dmp release]; +} + +- (void)diff_bisectTest; +{ + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Normal. + NSString *a = @"cat"; + NSString *b = @"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + NSMutableArray *diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"c"], [Diff diffWithOperation:DIFF_INSERT andText:@"m"], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"t"], [Diff diffWithOperation:DIFF_INSERT andText:@"p"], nil]; + STAssertEqualObjects(diffs, [dmp diff_bisectOfOldString:a andNewString:b deadline:[[NSDate distantFuture] timeIntervalSinceReferenceDate]], @"Bisect test."); + + // Timeout. + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"cat"], [Diff diffWithOperation:DIFF_INSERT andText:@"map"], nil]; + STAssertEqualObjects(diffs, [dmp diff_bisectOfOldString:a andNewString:b deadline:[[NSDate distantPast] timeIntervalSinceReferenceDate]], @"Bisect timeout."); + + [dmp release]; +} + +- (void)test_diff_mainTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Perform a trivial diff. + NSMutableArray *diffs = [NSMutableArray array]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"" andNewString:@"" checkLines:NO], @"diff_main: Null case."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"abc"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"abc" andNewString:@"abc" checkLines:NO], @"diff_main: Equality."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"ab"], [Diff diffWithOperation:DIFF_INSERT andText:@"123"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"abc" andNewString:@"ab123c" checkLines:NO], @"diff_main: Simple insertion."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"123"], [Diff diffWithOperation:DIFF_EQUAL andText:@"bc"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"a123bc" andNewString:@"abc" checkLines:NO], @"diff_main: Simple deletion."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"123"], [Diff diffWithOperation:DIFF_EQUAL andText:@"b"], [Diff diffWithOperation:DIFF_INSERT andText:@"456"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"abc" andNewString:@"a123b456c" checkLines:NO], @"diff_main: Two insertions."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"123"], [Diff diffWithOperation:DIFF_EQUAL andText:@"b"], [Diff diffWithOperation:DIFF_DELETE andText:@"456"], [Diff diffWithOperation:DIFF_EQUAL andText:@"c"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"a123b456c" andNewString:@"abc" checkLines:NO], @"diff_main: Two deletions."); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"b"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"a" andNewString:@"b" checkLines:NO], @"diff_main: Simple case #1."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"Apple"], [Diff diffWithOperation:DIFF_INSERT andText:@"Banana"], [Diff diffWithOperation:DIFF_EQUAL andText:@"s are a"], [Diff diffWithOperation:DIFF_INSERT andText:@"lso"], [Diff diffWithOperation:DIFF_EQUAL andText:@" fruit."], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"Apples are a fruit." andNewString:@"Bananas are also fruit." checkLines:NO], @"diff_main: Simple case #2."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"\U00000680"], [Diff diffWithOperation:DIFF_EQUAL andText:@"x"], [Diff diffWithOperation:DIFF_DELETE andText:@"\t"], [Diff diffWithOperation:DIFF_INSERT andText:[NSString stringWithFormat:@"%C", 0]], nil]; + NSString *aString = [NSString stringWithFormat:@"\U00000680x%C", 0]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"ax\t" andNewString:aString checkLines:NO], @"diff_main: Simple case #3."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"1"], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"y"], [Diff diffWithOperation:DIFF_EQUAL andText:@"b"], [Diff diffWithOperation:DIFF_DELETE andText:@"2"], [Diff diffWithOperation:DIFF_INSERT andText:@"xab"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"1ayb2" andNewString:@"abxab" checkLines:NO], @"diff_main: Overlap #1."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_INSERT andText:@"xaxcx"], [Diff diffWithOperation:DIFF_EQUAL andText:@"abc"], [Diff diffWithOperation:DIFF_DELETE andText:@"y"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"abcy" andNewString:@"xaxcxabc" checkLines:NO], @"diff_main: Overlap #2."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_DELETE andText:@"ABCD"], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_DELETE andText:@"="], [Diff diffWithOperation:DIFF_INSERT andText:@"-"], [Diff diffWithOperation:DIFF_EQUAL andText:@"bcd"], [Diff diffWithOperation:DIFF_DELETE andText:@"="], [Diff diffWithOperation:DIFF_INSERT andText:@"-"], [Diff diffWithOperation:DIFF_EQUAL andText:@"efghijklmnopqrs"], [Diff diffWithOperation:DIFF_DELETE andText:@"EFGHIJKLMNOefg"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg" andNewString:@"a-bcd-efghijklmnopqrs" checkLines:NO], @"diff_main: Overlap #3."); + + diffs = [NSMutableArray arrayWithObjects:[Diff diffWithOperation:DIFF_INSERT andText:@" "], [Diff diffWithOperation:DIFF_EQUAL andText:@"a"], [Diff diffWithOperation:DIFF_INSERT andText:@"nd"], [Diff diffWithOperation:DIFF_EQUAL andText:@" [[Pennsylvania]]"], [Diff diffWithOperation:DIFF_DELETE andText:@" and [[New"], nil]; + STAssertEqualObjects(diffs, [dmp diff_mainOfOldString:@"a [[Pennsylvania]] and [[New" andNewString:@" and [[Pennsylvania]]" checkLines:NO], @"diff_main: Large equality."); + + dmp.Diff_Timeout = 0.1f; // 100ms + NSString *a = @"`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + NSString *b = @"I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + NSMutableString *aMutable = [NSMutableString stringWithString:a]; + NSMutableString *bMutable = [NSMutableString stringWithString:b]; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + [aMutable appendString:aMutable]; + [bMutable appendString:bMutable]; + } + a = aMutable; + b = bMutable; + NSTimeInterval startTime = [NSDate timeIntervalSinceReferenceDate]; + [dmp diff_mainOfOldString:a andNewString:b]; + NSTimeInterval endTime = [NSDate timeIntervalSinceReferenceDate]; + // Test that we took at least the timeout period. + STAssertTrue((dmp.Diff_Timeout <= (endTime - startTime)), @"Test that we took at least the timeout period."); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // This will fail when running this as PPC code thru Rosetta on Intel. + STAssertTrue(((dmp.Diff_Timeout * 2) > (endTime - startTime)), @"Test that we didn't take forever (be forgiving)."); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 200 character cutoff. + a = @"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = @"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + STAssertEqualObjects([dmp diff_mainOfOldString:a andNewString:b checkLines:YES], [dmp diff_mainOfOldString:a andNewString:b checkLines:NO], @"diff_main: Simple line-mode."); + + a = @"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = @"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + STAssertEqualObjects([dmp diff_mainOfOldString:a andNewString:b checkLines:YES], [dmp diff_mainOfOldString:a andNewString:b checkLines:NO], @"diff_main: Single line-mode."); + + a = @"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = @"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + NSArray *texts_linemode = [self diff_rebuildtexts:[dmp diff_mainOfOldString:a andNewString:b checkLines:YES]]; + NSArray *texts_textmode = [self diff_rebuildtexts:[dmp diff_mainOfOldString:a andNewString:b checkLines:NO]]; + STAssertEqualObjects(texts_textmode, texts_linemode, @"diff_main: Overlap line-mode."); + + // CHANGEME: Test null inputs + + [dmp release]; +} + + +#pragma mark Match Test Functions +// MATCH TEST FUNCTIONS + + +- (void)test_match_alphabetTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Initialise the bitmasks for Bitap. + NSMutableDictionary *bitmask = [NSMutableDictionary dictionary]; + + [bitmask diff_setUnsignedIntegerValue:4 forUnicharKey:'a']; + [bitmask diff_setUnsignedIntegerValue:2 forUnicharKey:'b']; + [bitmask diff_setUnsignedIntegerValue:1 forUnicharKey:'c']; + STAssertEqualObjects(bitmask, [dmp match_alphabet:@"abc"], @"match_alphabet: Unique."); + + [bitmask removeAllObjects]; + [bitmask diff_setUnsignedIntegerValue:37 forUnicharKey:'a']; + [bitmask diff_setUnsignedIntegerValue:18 forUnicharKey:'b']; + [bitmask diff_setUnsignedIntegerValue:8 forUnicharKey:'c']; + STAssertEqualObjects(bitmask, [dmp match_alphabet:@"abcaba"], @"match_alphabet: Duplicates."); + + [dmp release]; +} + +- (void)test_match_bitapTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + STAssertEquals((NSUInteger)5, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"fgh" near:5], @"match_bitap: Exact match #1."); + + STAssertEquals((NSUInteger)5, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"fgh" near:0], @"match_bitap: Exact match #2."); + + STAssertEquals((NSUInteger)4, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"efxhi" near:0], @"match_bitap: Fuzzy match #1."); + + STAssertEquals((NSUInteger)2, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"cdefxyhijk" near:5], @"match_bitap: Fuzzy match #2."); + + STAssertEquals((NSUInteger)NSNotFound, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"bxy" near:1], @"match_bitap: Fuzzy match #3."); + + STAssertEquals((NSUInteger)2, [dmp match_bitapOfText:@"123456789xx0" andPattern:@"3456789x0" near:2], @"match_bitap: Overflow."); + + STAssertEquals((NSUInteger)0, [dmp match_bitapOfText:@"abcdef" andPattern:@"xxabc" near:4], @"match_bitap: Before start match."); + + STAssertEquals((NSUInteger)3, [dmp match_bitapOfText:@"abcdef" andPattern:@"defyy" near:4], @"match_bitap: Beyond end match."); + + STAssertEquals((NSUInteger)0, [dmp match_bitapOfText:@"abcdef" andPattern:@"xabcdefy" near:0], @"match_bitap: Oversized pattern."); + + dmp.Match_Threshold = 0.4f; + STAssertEquals((NSUInteger)4, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"efxyhi" near:1], @"match_bitap: Threshold #1."); + + dmp.Match_Threshold = 0.3f; + STAssertEquals((NSUInteger)NSNotFound, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"efxyhi" near:1], @"match_bitap: Threshold #2."); + + dmp.Match_Threshold = 0.0f; + STAssertEquals((NSUInteger)1, [dmp match_bitapOfText:@"abcdefghijk" andPattern:@"bcdef" near:1], @"match_bitap: Threshold #3."); + + dmp.Match_Threshold = 0.5f; + STAssertEquals((NSUInteger)0, [dmp match_bitapOfText:@"abcdexyzabcde" andPattern:@"abccde" near:3], @"match_bitap: Multiple select #1."); + + STAssertEquals((NSUInteger)8, [dmp match_bitapOfText:@"abcdexyzabcde" andPattern:@"abccde" near:5], @"match_bitap: Multiple select #2."); + + dmp.Match_Distance = 10; // Strict location. + STAssertEquals((NSUInteger)NSNotFound, [dmp match_bitapOfText:@"abcdefghijklmnopqrstuvwxyz" andPattern:@"abcdefg" near:24], @"match_bitap: Distance test #1."); + + STAssertEquals((NSUInteger)0, [dmp match_bitapOfText:@"abcdefghijklmnopqrstuvwxyz" andPattern:@"abcdxxefg" near:1], @"match_bitap: Distance test #2."); + + dmp.Match_Distance = 1000; // Loose location. + STAssertEquals((NSUInteger)0, [dmp match_bitapOfText:@"abcdefghijklmnopqrstuvwxyz" andPattern:@"abcdefg" near:24], @"match_bitap: Distance test #3."); + + [dmp release]; +} + +- (void)test_match_mainTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + // Full match. + STAssertEquals((NSUInteger)0, [dmp match_mainForText:@"abcdef" pattern:@"abcdef" near:1000], @"match_main: Equality."); + + STAssertEquals((NSUInteger)NSNotFound, [dmp match_mainForText:@"" pattern:@"abcdef" near:1], @"match_main: Null text."); + + STAssertEquals((NSUInteger)3, [dmp match_mainForText:@"abcdef" pattern:@"" near:3], @"match_main: Null pattern."); + + STAssertEquals((NSUInteger)3, [dmp match_mainForText:@"abcdef" pattern:@"de" near:3], @"match_main: Exact match."); + + STAssertEquals((NSUInteger)3, [dmp match_mainForText:@"abcdef" pattern:@"defy" near:4], @"match_main: Beyond end match."); + + STAssertEquals((NSUInteger)0, [dmp match_mainForText:@"abcdef" pattern:@"abcdefy" near:0], @"match_main: Oversized pattern."); + + dmp.Match_Threshold = 0.7f; + STAssertEquals((NSUInteger)4, [dmp match_mainForText:@"I am the very model of a modern major general." pattern:@" that berry " near:5], @"match_main: Complex match."); + dmp.Match_Threshold = 0.5f; + + // CHANGEME: Test null inputs + + [dmp release]; +} + + +#pragma mark Patch Test Functions +// PATCH TEST FUNCTIONS + + +- (void)test_patch_patchObjTest { + // Patch Object. + Patch *p = [[Patch new] autorelease]; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_EQUAL andText:@"jump"], + [Diff diffWithOperation:DIFF_DELETE andText:@"s"], + [Diff diffWithOperation:DIFF_INSERT andText:@"ed"], + [Diff diffWithOperation:DIFF_EQUAL andText:@" over "], + [Diff diffWithOperation:DIFF_DELETE andText:@"the"], + [Diff diffWithOperation:DIFF_INSERT andText:@"a"], + [Diff diffWithOperation:DIFF_EQUAL andText:@"\nlaz"], nil]; + NSString *strp = @"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + STAssertEqualObjects(strp, [p description], @"Patch: description."); +} + +- (void)test_patch_fromTextTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + STAssertTrue(((NSMutableArray *)[dmp patch_fromText:@"" error:NULL]).count == 0, @"patch_fromText: #0."); + + NSString *strp = @"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + STAssertEqualObjects(strp, [[[dmp patch_fromText:strp error:NULL] objectAtIndex:0] description], @"patch_fromText: #1."); + + STAssertEqualObjects(@"@@ -1 +1 @@\n-a\n+b\n", [[[dmp patch_fromText:@"@@ -1 +1 @@\n-a\n+b\n" error:NULL] objectAtIndex:0] description], @"patch_fromText: #2."); + + STAssertEqualObjects(@"@@ -1,3 +0,0 @@\n-abc\n", [[[dmp patch_fromText:@"@@ -1,3 +0,0 @@\n-abc\n" error:NULL] objectAtIndex:0] description], @"patch_fromText: #3."); + + STAssertEqualObjects(@"@@ -0,0 +1,3 @@\n+abc\n", [[[dmp patch_fromText:@"@@ -0,0 +1,3 @@\n+abc\n" error:NULL] objectAtIndex:0] description], @"patch_fromText: #4."); + + // Generates error. + NSError *error = nil; + NSMutableArray *patches = [dmp patch_fromText:@"Bad\nPatch\n" error:&error]; + if (patches != nil || error == nil) { + // Error expected. + STFail(@"patch_fromText: #5."); + } + error = nil; + + [dmp release]; +} + +- (void)test_patch_toTextTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSString *strp = @"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + NSMutableArray *patches; + patches = [dmp patch_fromText:strp error:NULL]; + STAssertEqualObjects(strp, [dmp patch_toText:patches], @"toText Test #1"); + + strp = @"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = [dmp patch_fromText:strp error:NULL]; + STAssertEqualObjects(strp, [dmp patch_toText:patches], @"toText Test #2"); + + [dmp release]; +} + +- (void)test_patch_addContextTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + dmp.Patch_Margin = 4; + Patch *p; + p = [[dmp patch_fromText:@"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" error:NULL] objectAtIndex:0]; + [dmp patch_addContextToPatch:p sourceText:@"The quick brown fox jumps over the lazy dog."]; + STAssertEqualObjects(@"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", [p description], @"patch_addContext: Simple case."); + + p = [[dmp patch_fromText:@"@@ -21,4 +21,10 @@\n-jump\n+somersault\n" error:NULL] objectAtIndex:0]; + [dmp patch_addContextToPatch:p sourceText:@"The quick brown fox jumps."]; + STAssertEqualObjects(@"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", [p description], @"patch_addContext: Not enough trailing context."); + + p = [[dmp patch_fromText:@"@@ -3 +3,2 @@\n-e\n+at\n" error:NULL] objectAtIndex:0]; + [dmp patch_addContextToPatch:p sourceText:@"The quick brown fox jumps."]; + STAssertEqualObjects(@"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", [p description], @"patch_addContext: Not enough leading context."); + + p = [[dmp patch_fromText:@"@@ -3 +3,2 @@\n-e\n+at\n" error:NULL] objectAtIndex:0]; + [dmp patch_addContextToPatch:p sourceText:@"The quick brown fox jumps. The quick brown fox crashes."]; + STAssertEqualObjects(@"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", [p description], @"patch_addContext: Ambiguity."); + + [dmp release]; +} + +- (void)test_patch_makeTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSMutableArray *patches; + patches = [dmp patch_makeFromOldString:@"" andNewString:@""]; + STAssertEqualObjects(@"", [dmp patch_toText:patches], @"patch_make: Null case."); + + NSString *text1 = @"The quick brown fox jumps over the lazy dog."; + NSString *text2 = @"That quick brown fox jumped over a lazy dog."; + NSString *expectedPatch = @"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be @"-21,17 +21,18", not @"-22,17 +21,18" due to rolling context. + patches = [dmp patch_makeFromOldString:text2 andNewString:text1]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Text2+Text1 inputs."); + + expectedPatch = @"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = [dmp patch_makeFromOldString:text1 andNewString:text2]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Text1+Text2 inputs."); + + NSMutableArray *diffs = [dmp diff_mainOfOldString:text1 andNewString:text2 checkLines:NO]; + patches = [dmp patch_makeFromDiffs:diffs]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Diff input."); + + patches = [dmp patch_makeFromOldString:text1 andDiffs:diffs]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Text1+Diff inputs."); + + patches = [dmp patch_makeFromOldString:text1 newString:text2 diffs:diffs]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Text1+Text2+Diff inputs (deprecated)."); + + patches = [dmp patch_makeFromOldString:@"`1234567890-=[]\\;',./" andNewString:@"~!@#$%^&*()_+{}|:\"<>?"]; + STAssertEqualObjects(@"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + [dmp patch_toText:patches], + @"patch_toText: Character encoding."); + + diffs = [NSMutableArray arrayWithObjects: + [Diff diffWithOperation:DIFF_DELETE andText:@"`1234567890-=[]\\;',./"], + [Diff diffWithOperation:DIFF_INSERT andText:@"~!@#$%^&*()_+{}|:\"<>?"], nil]; + STAssertEqualObjects(diffs, + ((Patch *)[[dmp patch_fromText:@"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n" error:NULL] objectAtIndex:0]).diffs, + @"patch_fromText: Character decoding."); + + NSMutableString *text1Mutable = [NSMutableString string]; + for (int x = 0; x < 100; x++) { + [text1Mutable appendString:@"abcdef"]; + } + text1 = text1Mutable; + text2 = [text1 stringByAppendingString:@"123"]; + // CHANGEME: Why does this implementation produce a different, more brief patch? + //expectedPatch = @"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + expectedPatch = @"@@ -597,4 +597,7 @@\n cdef\n+123\n"; + patches = [dmp patch_makeFromOldString:text1 andNewString:text2]; + STAssertEqualObjects(expectedPatch, [dmp patch_toText:patches], @"patch_make: Long string with repeats."); + + // CHANGEME: Test null inputs + + [dmp release]; +} + + +- (void)test_patch_splitMaxTest { + // Assumes that Match_MaxBits is 32. + DiffMatchPatch *dmp = [DiffMatchPatch new]; + NSMutableArray *patches; + + patches = [dmp patch_makeFromOldString:@"abcdefghijklmnopqrstuvwxyz01234567890" andNewString:@"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"]; + [dmp patch_splitMax:patches]; + STAssertEqualObjects(@"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", [dmp patch_toText:patches], @"Assumes that Match_MaxBits is 32 #1"); + + patches = [dmp patch_makeFromOldString:@"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz" andNewString:@"abcdefuvwxyz"]; + NSString *oldToText = [dmp patch_toText:patches]; + [dmp patch_splitMax:patches]; + STAssertEqualObjects(oldToText, [dmp patch_toText:patches], @"Assumes that Match_MaxBits is 32 #2"); + + patches = [dmp patch_makeFromOldString:@"1234567890123456789012345678901234567890123456789012345678901234567890" andNewString:@"abc"]; + [dmp patch_splitMax:patches]; + STAssertEqualObjects(@"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", [dmp patch_toText:patches], @"Assumes that Match_MaxBits is 32 #3"); + + patches = [dmp patch_makeFromOldString:@"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1" andNewString:@"abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"]; + [dmp patch_splitMax:patches]; + STAssertEqualObjects(@"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", [dmp patch_toText:patches], @"Assumes that Match_MaxBits is 32 #4"); + + [dmp release]; +} + +- (void)test_patch_addPaddingTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + NSMutableArray *patches; + patches = [dmp patch_makeFromOldString:@"" andNewString:@"test"]; + STAssertEqualObjects(@"@@ -0,0 +1,4 @@\n+test\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges full."); + [dmp patch_addPadding:patches]; + STAssertEqualObjects(@"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges full."); + + patches = [dmp patch_makeFromOldString:@"XY" andNewString:@"XtestY"]; + STAssertEqualObjects(@"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges partial."); + [dmp patch_addPadding:patches]; + STAssertEqualObjects(@"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges partial."); + + patches = [dmp patch_makeFromOldString:@"XXXXYYYY" andNewString:@"XXXXtestYYYY"]; + STAssertEqualObjects(@"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges none."); + [dmp patch_addPadding:patches]; + STAssertEqualObjects(@"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", + [dmp patch_toText:patches], + @"patch_addPadding: Both edges none."); + + [dmp release]; +} + +- (void)test_patch_applyTest { + DiffMatchPatch *dmp = [DiffMatchPatch new]; + + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + NSMutableArray *patches; + patches = [dmp patch_makeFromOldString:@"" andNewString:@""]; + NSArray *results = [dmp patch_apply:patches toString:@"Hello world."]; + NSMutableArray *boolArray = [results objectAtIndex:1]; + NSString *resultStr = [NSString stringWithFormat:@"%@\t%lu", [results objectAtIndex:0], (unsigned long)boolArray.count]; + STAssertEqualObjects(@"Hello world.\t0", resultStr, @"patch_apply: Null case."); + + patches = [dmp patch_makeFromOldString:@"The quick brown fox jumps over the lazy dog." andNewString:@"That quick brown fox jumped over a lazy dog."]; + results = [dmp patch_apply:patches toString:@"The quick brown fox jumps over the lazy dog."]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr, @"patch_apply: Exact match."); + + results = [dmp patch_apply:patches toString:@"The quick red rabbit jumps over the tired tiger."]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr, @"patch_apply: Partial match."); + + results = [dmp patch_apply:patches toString:@"I am the very model of a modern major general."]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"I am the very model of a modern major general.\tfalse\tfalse", resultStr, @"patch_apply: Failed match."); + + patches = [dmp patch_makeFromOldString:@"x1234567890123456789012345678901234567890123456789012345678901234567890y" andNewString:@"xabcy"]; + results = [dmp patch_apply:patches toString:@"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"xabcy\ttrue\ttrue", resultStr, @"patch_apply: Big delete, small change."); + + patches = [dmp patch_makeFromOldString:@"x1234567890123456789012345678901234567890123456789012345678901234567890y" andNewString:@"xabcy"]; + results = [dmp patch_apply:patches toString:@"x12345678901234567890---------------++++++++++---------------12345678901234567890y"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr, @"patch_apply: Big delete, big change 1."); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = [dmp patch_makeFromOldString:@"x1234567890123456789012345678901234567890123456789012345678901234567890y" andNewString:@"xabcy"]; + results = [dmp patch_apply:patches toString:@"x12345678901234567890---------------++++++++++---------------12345678901234567890y"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"xabcy\ttrue\ttrue", resultStr, @"patch_apply: Big delete, big change 2."); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = [dmp patch_makeFromOldString:@"abcdefghijklmnopqrstuvwxyz--------------------1234567890" andNewString:@"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"]; + results = [dmp patch_apply:patches toString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0]), stringForBOOL([boolArray objectAtIndex:1])]; + STAssertEqualObjects(@"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr, @"patch_apply: Compensate for failed patch."); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = [dmp patch_makeFromOldString:@"" andNewString:@"test"]; + NSString *patchStr = [dmp patch_toText:patches]; + [dmp patch_apply:patches toString:@""]; + STAssertEqualObjects(patchStr, [dmp patch_toText:patches], @"patch_apply: No side effects."); + + patches = [dmp patch_makeFromOldString:@"The quick brown fox jumps over the lazy dog." andNewString:@"Woof"]; + patchStr = [dmp patch_toText:patches]; + [dmp patch_apply:patches toString:@"The quick brown fox jumps over the lazy dog."]; + STAssertEqualObjects(patchStr, [dmp patch_toText:patches], @"patch_apply: No side effects with major delete."); + + patches = [dmp patch_makeFromOldString:@"" andNewString:@"test"]; + results = [dmp patch_apply:patches toString:@""]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0])]; + STAssertEqualObjects(@"test\ttrue", resultStr, @"patch_apply: Edge exact match."); + + patches = [dmp patch_makeFromOldString:@"XY" andNewString:@"XtestY"]; + results = [dmp patch_apply:patches toString:@"XY"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0])]; + STAssertEqualObjects(@"XtestY\ttrue", resultStr, @"patch_apply: Near edge exact match."); + + patches = [dmp patch_makeFromOldString:@"y" andNewString:@"y123"]; + results = [dmp patch_apply:patches toString:@"x"]; + boolArray = [results objectAtIndex:1]; + resultStr = [NSString stringWithFormat:@"%@\t%@", [results objectAtIndex:0], stringForBOOL([boolArray objectAtIndex:0])]; + STAssertEqualObjects(@"x123\ttrue", resultStr, @"patch_apply: Edge partial match."); + + [dmp release]; +} + + +#pragma mark Test Utility Functions +// TEST UTILITY FUNCTIONS + + +- (NSArray *)diff_rebuildtexts:(NSMutableArray *)diffs; +{ + NSArray *text = [NSMutableArray arrayWithObjects:[NSMutableString string], [NSMutableString string], nil]; + for (Diff *myDiff in diffs) { + if (myDiff.operation != DIFF_INSERT) { + [[text objectAtIndex:0] appendString:myDiff.text]; + } + if (myDiff.operation != DIFF_DELETE) { + [[text objectAtIndex:1] appendString:myDiff.text]; + } + } + return text; +} + +@end diff --git a/objectivec/Tests/speedtest.m b/objectivec/Tests/speedtest.m new file mode 100755 index 0000000..2112c62 --- /dev/null +++ b/objectivec/Tests/speedtest.m @@ -0,0 +1,50 @@ +/* + * Diff Match and Patch -- Test harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: fraser@google.com (Neil Fraser) + * ObjC port: jan@geheimwerk.de (Jan Weiß) + */ + +#import + +#import + +int main (int argc, const char * argv[]) { + NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init]; + + NSString *text1 = [NSString stringWithContentsOfFile:@"Speedtest1.txt" + encoding:NSUTF8StringEncoding + error:NULL]; + + NSString *text2 = [NSString stringWithContentsOfFile:@"Speedtest2.txt" + encoding:NSUTF8StringEncoding + error:NULL]; + + DiffMatchPatch *dmp = [DiffMatchPatch new]; + dmp.Diff_Timeout = 0; + + NSTimeInterval start = [NSDate timeIntervalSinceReferenceDate]; + [dmp diff_mainOfOldString:text1 andNewString:text2]; + NSTimeInterval duration = [NSDate timeIntervalSinceReferenceDate] - start; + + [dmp release]; + + NSLog(@"Elapsed time: %.4lf", (double)duration); + + [pool drain]; + return 0; +} diff --git a/objectivec/speedtest_Prefix.pch b/objectivec/speedtest_Prefix.pch new file mode 100755 index 0000000..c0aa561 --- /dev/null +++ b/objectivec/speedtest_Prefix.pch @@ -0,0 +1,7 @@ +// +// Prefix header for all source files of the 'speedtest' target in the 'DiffMatchPatch' project. +// + +#ifdef __OBJC__ + #import +#endif diff --git a/python2/__init__.py b/python2/__init__.py new file mode 100644 index 0000000..bd6e8b6 --- /dev/null +++ b/python2/__init__.py @@ -0,0 +1,2 @@ +from .diff_match_patch import diff_match_patch, patch_obj + diff --git a/python2/diff_match_patch.py b/python2/diff_match_patch.py new file mode 100644 index 0000000..3ae6252 --- /dev/null +++ b/python2/diff_match_patch.py @@ -0,0 +1,1918 @@ +#!/usr/bin/python2.4 + +from __future__ import division + +"""Diff Match and Patch +Copyright 2018 The diff-match-patch Authors. +https://github.com/google/diff-match-patch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +"""Functions for diff, match and patch. + +Computes the difference between two texts to create a patch. +Applies the patch onto another text, allowing for errors. +""" + +__author__ = 'fraser@google.com (Neil Fraser)' + +import math +import re +import sys +import time +import urllib + +class diff_match_patch: + """Class containing the diff, match and patch methods. + + Also contains the behaviour settings. + """ + + def __init__(self): + """Inits a diff_match_patch object with default settings. + Redefine these in your program to override the defaults. + """ + + # Number of seconds to map a diff before giving up (0 for infinity). + self.Diff_Timeout = 1.0 + # Cost of an empty edit operation in terms of edit characters. + self.Diff_EditCost = 4 + # At what point is no match declared (0.0 = perfection, 1.0 = very loose). + self.Match_Threshold = 0.5 + # How far to search for a match (0 = exact location, 1000+ = broad match). + # A match this many characters away from the expected location will add + # 1.0 to the score (0.0 is a perfect match). + self.Match_Distance = 1000 + # When deleting a large block of text (over ~64 characters), how close do + # the contents have to be to match the expected contents. (0.0 = perfection, + # 1.0 = very loose). Note that Match_Threshold controls how closely the + # end points of a delete need to match. + self.Patch_DeleteThreshold = 0.5 + # Chunk size for context length. + self.Patch_Margin = 4 + + # The number of bits in an int. + # Python has no maximum, thus to disable patch splitting set to 0. + # However to avoid long patches in certain pathological cases, use 32. + # Multiple short patches (using native ints) are much faster than long ones. + self.Match_MaxBits = 32 + + # DIFF FUNCTIONS + + # The data structure representing a diff is an array of tuples: + # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] + # which means: delete "Hello", add "Goodbye" and keep " world." + DIFF_DELETE = -1 + DIFF_INSERT = 1 + DIFF_EQUAL = 0 + + def diff_main(self, text1, text2, checklines=True, deadline=None): + """Find the differences between two texts. Simplifies the problem by + stripping any common prefix or suffix off the texts before diffing. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Optional speedup flag. If present and false, then don't run + a line-level diff first to identify the changed areas. + Defaults to true, which does a faster, slightly less optimal diff. + deadline: Optional time when the diff should be complete by. Used + internally for recursive calls. Users should set DiffTimeout instead. + + Returns: + Array of changes. + """ + # Set a deadline by which time the diff must be complete. + if deadline == None: + # Unlike in most languages, Python counts time in seconds. + if self.Diff_Timeout <= 0: + deadline = sys.maxint + else: + deadline = time.time() + self.Diff_Timeout + + # Check for null inputs. + if text1 == None or text2 == None: + raise ValueError("Null inputs. (diff_main)") + + # Check for equality (speedup). + if text1 == text2: + if text1: + return [(self.DIFF_EQUAL, text1)] + return [] + + # Trim off common prefix (speedup). + commonlength = self.diff_commonPrefix(text1, text2) + commonprefix = text1[:commonlength] + text1 = text1[commonlength:] + text2 = text2[commonlength:] + + # Trim off common suffix (speedup). + commonlength = self.diff_commonSuffix(text1, text2) + if commonlength == 0: + commonsuffix = '' + else: + commonsuffix = text1[-commonlength:] + text1 = text1[:-commonlength] + text2 = text2[:-commonlength] + + # Compute the diff on the middle block. + diffs = self.diff_compute(text1, text2, checklines, deadline) + + # Restore the prefix and suffix. + if commonprefix: + diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] + if commonsuffix: + diffs.append((self.DIFF_EQUAL, commonsuffix)) + self.diff_cleanupMerge(diffs) + return diffs + + def diff_compute(self, text1, text2, checklines, deadline): + """Find the differences between two texts. Assumes that the texts do not + have any common prefix or suffix. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Speedup flag. If false, then don't run a line-level diff + first to identify the changed areas. + If true, then run a faster, slightly less optimal diff. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + if not text1: + # Just add some text (speedup). + return [(self.DIFF_INSERT, text2)] + + if not text2: + # Just delete some text (speedup). + return [(self.DIFF_DELETE, text1)] + + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + i = longtext.find(shorttext) + if i != -1: + # Shorter text is inside the longer text (speedup). + diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), + (self.DIFF_INSERT, longtext[i + len(shorttext):])] + # Swap insertions for deletions if diff is reversed. + if len(text1) > len(text2): + diffs[0] = (self.DIFF_DELETE, diffs[0][1]) + diffs[2] = (self.DIFF_DELETE, diffs[2][1]) + return diffs + + if len(shorttext) == 1: + # Single character string. + # After the previous speedup, the character can't be an equality. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + # Check to see if the problem can be split in two. + hm = self.diff_halfMatch(text1, text2) + if hm: + # A half-match was found, sort out the return data. + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + # Send both pairs off for separate processing. + diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) + diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) + # Merge the results. + return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b + + if checklines and len(text1) > 100 and len(text2) > 100: + return self.diff_lineMode(text1, text2, deadline) + + return self.diff_bisect(text1, text2, deadline) + + def diff_lineMode(self, text1, text2, deadline): + """Do a quick line-level diff on both strings, then rediff the parts for + greater accuracy. + This speedup can produce non-minimal diffs. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + + # Scan the text on a line-by-line basis first. + (text1, text2, linearray) = self.diff_linesToChars(text1, text2) + + diffs = self.diff_main(text1, text2, False, deadline) + + # Convert the diff back to original text. + self.diff_charsToLines(diffs, linearray) + # Eliminate freak matches (e.g. blank lines) + self.diff_cleanupSemantic(diffs) + + # Rediff any replacement blocks, this time character-by-character. + # Add a dummy entry at the end. + diffs.append((self.DIFF_EQUAL, '')) + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete >= 1 and count_insert >= 1: + # Delete the offending records and add the merged ones. + a = self.diff_main(text_delete, text_insert, False, deadline) + diffs[pointer - count_delete - count_insert : pointer] = a + pointer = pointer - count_delete - count_insert + len(a) + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + pointer += 1 + + diffs.pop() # Remove the dummy entry at the end. + + return diffs + + def diff_bisect(self, text1, text2, deadline): + """Find the 'middle snake' of a diff, split the problem in two + and return the recursively constructed diff. + See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + max_d = (text1_length + text2_length + 1) // 2 + v_offset = max_d + v_length = 2 * max_d + v1 = [-1] * v_length + v1[v_offset + 1] = 0 + v2 = v1[:] + delta = text1_length - text2_length + # If the total number of characters is odd, then the front path will + # collide with the reverse path. + front = (delta % 2 != 0) + # Offsets for start and end of k loop. + # Prevents mapping of space beyond the grid. + k1start = 0 + k1end = 0 + k2start = 0 + k2end = 0 + for d in xrange(max_d): + # Bail out if deadline is reached. + if time.time() > deadline: + break + + # Walk the front path one step. + for k1 in xrange(-d + k1start, d + 1 - k1end, 2): + k1_offset = v_offset + k1 + if k1 == -d or (k1 != d and + v1[k1_offset - 1] < v1[k1_offset + 1]): + x1 = v1[k1_offset + 1] + else: + x1 = v1[k1_offset - 1] + 1 + y1 = x1 - k1 + while (x1 < text1_length and y1 < text2_length and + text1[x1] == text2[y1]): + x1 += 1 + y1 += 1 + v1[k1_offset] = x1 + if x1 > text1_length: + # Ran off the right of the graph. + k1end += 2 + elif y1 > text2_length: + # Ran off the bottom of the graph. + k1start += 2 + elif front: + k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset] + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Walk the reverse path one step. + for k2 in xrange(-d + k2start, d + 1 - k2end, 2): + k2_offset = v_offset + k2 + if k2 == -d or (k2 != d and + v2[k2_offset - 1] < v2[k2_offset + 1]): + x2 = v2[k2_offset + 1] + else: + x2 = v2[k2_offset - 1] + 1 + y2 = x2 - k2 + while (x2 < text1_length and y2 < text2_length and + text1[-x2 - 1] == text2[-y2 - 1]): + x2 += 1 + y2 += 1 + v2[k2_offset] = x2 + if x2 > text1_length: + # Ran off the left of the graph. + k2end += 2 + elif y2 > text2_length: + # Ran off the top of the graph. + k2start += 2 + elif not front: + k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: + x1 = v1[k1_offset] + y1 = v_offset + x1 - k1_offset + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Diff took too long and hit the deadline or + # number of diffs equals number of characters, no commonality at all. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + def diff_bisectSplit(self, text1, text2, x, y, deadline): + """Given the location of the 'middle snake', split the diff in two parts + and recurse. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + x: Index of split point in text1. + y: Index of split point in text2. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + text1a = text1[:x] + text2a = text2[:y] + text1b = text1[x:] + text2b = text2[y:] + + # Compute both diffs serially. + diffs = self.diff_main(text1a, text2a, False, deadline) + diffsb = self.diff_main(text1b, text2b, False, deadline) + + return diffs + diffsb + + def diff_linesToChars(self, text1, text2): + """Split two texts into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + + Args: + text1: First string. + text2: Second string. + + Returns: + Three element tuple, containing the encoded text1, the encoded text2 and + the array of unique strings. The zeroth element of the array of unique + strings is intentionally blank. + """ + lineArray = [] # e.g. lineArray[4] == "Hello\n" + lineHash = {} # e.g. lineHash["Hello\n"] == 4 + + # "\x00" is a valid character, but various debuggers don't like it. + # So we'll insert a junk entry to avoid generating a null character. + lineArray.append('') + + def diff_linesToCharsMunge(text): + """Split a text into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + Modifies linearray and linehash through being a closure. + + Args: + text: String to encode. + + Returns: + Encoded string. + """ + chars = [] + # Walk the text, pulling out a substring for each line. + # text.split('\n') would would temporarily double our memory footprint. + # Modifying text would create many large strings to garbage collect. + lineStart = 0 + lineEnd = -1 + while lineEnd < len(text) - 1: + lineEnd = text.find('\n', lineStart) + if lineEnd == -1: + lineEnd = len(text) - 1 + line = text[lineStart:lineEnd + 1] + lineStart = lineEnd + 1 + + if line in lineHash: + chars.append(unichr(lineHash[line])) + else: + lineArray.append(line) + lineHash[line] = len(lineArray) - 1 + chars.append(unichr(len(lineArray) - 1)) + return "".join(chars) + + chars1 = diff_linesToCharsMunge(text1) + chars2 = diff_linesToCharsMunge(text2) + return (chars1, chars2, lineArray) + + def diff_charsToLines(self, diffs, lineArray): + """Rehydrate the text in a diff from a string of line hashes to real lines + of text. + + Args: + diffs: Array of diff tuples. + lineArray: Array of unique strings. + """ + for x in xrange(len(diffs)): + text = [] + for char in diffs[x][1]: + text.append(lineArray[ord(char)]) + diffs[x] = (diffs[x][0], "".join(text)) + + def diff_commonPrefix(self, text1, text2): + """Determine the common prefix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the start of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[0] != text2[0]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerstart = 0 + while pointermin < pointermid: + if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: + pointermin = pointermid + pointerstart = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonSuffix(self, text1, text2): + """Determine the common suffix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the end of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[-1] != text2[-1]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerend = 0 + while pointermin < pointermid: + if (text1[-pointermid:len(text1) - pointerend] == + text2[-pointermid:len(text2) - pointerend]): + pointermin = pointermid + pointerend = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonOverlap(self, text1, text2): + """Determine if the suffix of one string is the prefix of another. + + Args: + text1 First string. + text2 Second string. + + Returns: + The number of characters common to the end of the first + string and the start of the second string. + """ + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + # Eliminate the null case. + if text1_length == 0 or text2_length == 0: + return 0 + # Truncate the longer string. + if text1_length > text2_length: + text1 = text1[-text2_length:] + elif text1_length < text2_length: + text2 = text2[:text1_length] + text_length = min(text1_length, text2_length) + # Quick check for the worst case. + if text1 == text2: + return text_length + + # Start by looking for a single character match + # and increase length until no match is found. + # Performance analysis: http://neil.fraser.name/news/2010/11/04/ + best = 0 + length = 1 + while True: + pattern = text1[-length:] + found = text2.find(pattern) + if found == -1: + return best + length += found + if found == 0 or text1[-length:] == text2[:length]: + best = length + length += 1 + + def diff_halfMatch(self, text1, text2): + """Do the two texts share a substring which is at least half the length of + the longer text? + This speedup can produce non-minimal diffs. + + Args: + text1: First string. + text2: Second string. + + Returns: + Five element Array, containing the prefix of text1, the suffix of text1, + the prefix of text2, the suffix of text2 and the common middle. Or None + if there was no match. + """ + if self.Diff_Timeout <= 0: + # Don't risk returning a non-optimal diff if we have unlimited time. + return None + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): + return None # Pointless. + + def diff_halfMatchI(longtext, shorttext, i): + """Does a substring of shorttext exist within longtext such that the + substring is at least half the length of longtext? + Closure, but does not reference any external variables. + + Args: + longtext: Longer string. + shorttext: Shorter string. + i: Start index of quarter length substring within longtext. + + Returns: + Five element Array, containing the prefix of longtext, the suffix of + longtext, the prefix of shorttext, the suffix of shorttext and the + common middle. Or None if there was no match. + """ + seed = longtext[i:i + len(longtext) // 4] + best_common = '' + j = shorttext.find(seed) + while j != -1: + prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) + suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) + if len(best_common) < suffixLength + prefixLength: + best_common = (shorttext[j - suffixLength:j] + + shorttext[j:j + prefixLength]) + best_longtext_a = longtext[:i - suffixLength] + best_longtext_b = longtext[i + prefixLength:] + best_shorttext_a = shorttext[:j - suffixLength] + best_shorttext_b = shorttext[j + prefixLength:] + j = shorttext.find(seed, j + 1) + + if len(best_common) * 2 >= len(longtext): + return (best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common) + else: + return None + + # First check if the second quarter is the seed for a half-match. + hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) + # Check again based on the third quarter. + hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) + if not hm1 and not hm2: + return None + elif not hm2: + hm = hm1 + elif not hm1: + hm = hm2 + else: + # Both matched. Select the longest. + if len(hm1[4]) > len(hm2[4]): + hm = hm1 + else: + hm = hm2 + + # A half-match was found, sort out the return data. + if len(text1) > len(text2): + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + else: + (text2_a, text2_b, text1_a, text1_b, mid_common) = hm + return (text1_a, text1_b, text2_a, text2_b, mid_common) + + def diff_cleanupSemantic(self, diffs): + """Reduce the number of edits by eliminating semantically trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + # Number of chars that changed prior to the equality. + length_insertions1, length_deletions1 = 0, 0 + # Number of chars that changed after the equality. + length_insertions2, length_deletions2 = 0, 0 + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + equalities.append(pointer) + length_insertions1, length_insertions2 = length_insertions2, 0 + length_deletions1, length_deletions2 = length_deletions2, 0 + lastequality = diffs[pointer][1] + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_INSERT: + length_insertions2 += len(diffs[pointer][1]) + else: + length_deletions2 += len(diffs[pointer][1]) + # Eliminate an equality that is smaller or equal to the edits on both + # sides of it. + if (lastequality and (len(lastequality) <= + max(length_insertions1, length_deletions1)) and + (len(lastequality) <= max(length_insertions2, length_deletions2))): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + # Throw away the equality we just deleted. + equalities.pop() + # Throw away the previous equality (it needs to be reevaluated). + if len(equalities): + equalities.pop() + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + # Reset the counters. + length_insertions1, length_deletions1 = 0, 0 + length_insertions2, length_deletions2 = 0, 0 + lastequality = None + changes = True + pointer += 1 + + # Normalize the diff. + if changes: + self.diff_cleanupMerge(diffs) + self.diff_cleanupSemanticLossless(diffs) + + # Find any overlaps between deletions and insertions. + # e.g: abcxxxxxxdef + # -> abcxxxdef + # e.g: xxxabcdefxxx + # -> defxxxabc + # Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1 + while pointer < len(diffs): + if (diffs[pointer - 1][0] == self.DIFF_DELETE and + diffs[pointer][0] == self.DIFF_INSERT): + deletion = diffs[pointer - 1][1] + insertion = diffs[pointer][1] + overlap_length1 = self.diff_commonOverlap(deletion, insertion) + overlap_length2 = self.diff_commonOverlap(insertion, deletion) + if overlap_length1 >= overlap_length2: + if (overlap_length1 >= len(deletion) / 2.0 or + overlap_length1 >= len(insertion) / 2.0): + # Overlap found. Insert an equality and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, + insertion[:overlap_length1])) + diffs[pointer - 1] = (self.DIFF_DELETE, + deletion[:len(deletion) - overlap_length1]) + diffs[pointer + 1] = (self.DIFF_INSERT, + insertion[overlap_length1:]) + pointer += 1 + else: + if (overlap_length2 >= len(deletion) / 2.0 or + overlap_length2 >= len(insertion) / 2.0): + # Reverse overlap found. + # Insert an equality and swap and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) + diffs[pointer - 1] = (self.DIFF_INSERT, + insertion[:len(insertion) - overlap_length2]) + diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) + pointer += 1 + pointer += 1 + pointer += 1 + + def diff_cleanupSemanticLossless(self, diffs): + """Look for single edits surrounded on both sides by equalities + which can be shifted sideways to align the edit to a word boundary. + e.g: The cat came. -> The cat came. + + Args: + diffs: Array of diff tuples. + """ + + def diff_cleanupSemanticScore(one, two): + """Given two strings, compute a score representing whether the + internal boundary falls on logical boundaries. + Scores range from 6 (best) to 0 (worst). + Closure, but does not reference any external variables. + + Args: + one: First string. + two: Second string. + + Returns: + The score. + """ + if not one or not two: + # Edges are the best. + return 6 + + # Each port of this function behaves slightly differently due to + # subtle differences in each language's definition of things like + # 'whitespace'. Since this function's purpose is largely cosmetic, + # the choice has been made to use each language's native features + # rather than force total conformity. + char1 = one[-1] + char2 = two[0] + nonAlphaNumeric1 = not char1.isalnum() + nonAlphaNumeric2 = not char2.isalnum() + whitespace1 = nonAlphaNumeric1 and char1.isspace() + whitespace2 = nonAlphaNumeric2 and char2.isspace() + lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") + lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") + blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) + blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) + + if blankLine1 or blankLine2: + # Five points for blank lines. + return 5 + elif lineBreak1 or lineBreak2: + # Four points for line breaks. + return 4 + elif nonAlphaNumeric1 and not whitespace1 and whitespace2: + # Three points for end of sentences. + return 3 + elif whitespace1 or whitespace2: + # Two points for whitespace. + return 2 + elif nonAlphaNumeric1 or nonAlphaNumeric2: + # One point for non-alphanumeric. + return 1 + return 0 + + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + equality1 = diffs[pointer - 1][1] + edit = diffs[pointer][1] + equality2 = diffs[pointer + 1][1] + + # First, shift the edit as far left as possible. + commonOffset = self.diff_commonSuffix(equality1, edit) + if commonOffset: + commonString = edit[-commonOffset:] + equality1 = equality1[:-commonOffset] + edit = commonString + edit[:-commonOffset] + equality2 = commonString + equality2 + + # Second, step character by character right, looking for the best fit. + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + bestScore = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + while edit and equality2 and edit[0] == equality2[0]: + equality1 += edit[0] + edit = edit[1:] + equality2[0] + equality2 = equality2[1:] + score = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + # The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore: + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + + if diffs[pointer - 1][1] != bestEquality1: + # We have an improvement, save it back to the diff. + if bestEquality1: + diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) + else: + del diffs[pointer - 1] + pointer -= 1 + diffs[pointer] = (diffs[pointer][0], bestEdit) + if bestEquality2: + diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) + else: + del diffs[pointer + 1] + pointer -= 1 + pointer += 1 + + # Define some regex patterns for matching boundaries. + BLANKLINEEND = re.compile(r"\n\r?\n$"); + BLANKLINESTART = re.compile(r"^\r?\n\r?\n"); + + def diff_cleanupEfficiency(self, diffs): + """Reduce the number of edits by eliminating operationally trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + pre_ins = False # Is there an insertion operation before the last equality. + pre_del = False # Is there a deletion operation before the last equality. + post_ins = False # Is there an insertion operation after the last equality. + post_del = False # Is there a deletion operation after the last equality. + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + if (len(diffs[pointer][1]) < self.Diff_EditCost and + (post_ins or post_del)): + # Candidate found. + equalities.append(pointer) + pre_ins = post_ins + pre_del = post_del + lastequality = diffs[pointer][1] + else: + # Not a candidate, and can never become one. + equalities = [] + lastequality = None + + post_ins = post_del = False + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_DELETE: + post_del = True + else: + post_ins = True + + # Five types to be split: + # ABXYCD + # AXCD + # ABXC + # AXCD + # ABXC + + if lastequality and ((pre_ins and pre_del and post_ins and post_del) or + ((len(lastequality) < self.Diff_EditCost / 2) and + (pre_ins + pre_del + post_ins + post_del) == 3)): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + equalities.pop() # Throw away the equality we just deleted. + lastequality = None + if pre_ins and pre_del: + # No changes made which could affect previous entry, keep going. + post_ins = post_del = True + equalities = [] + else: + if len(equalities): + equalities.pop() # Throw away the previous equality. + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + post_ins = post_del = False + changes = True + pointer += 1 + + if changes: + self.diff_cleanupMerge(diffs) + + def diff_cleanupMerge(self, diffs): + """Reorder and merge like edit sections. Merge equalities. + Any edit section can move as long as it doesn't cross an equality. + + Args: + diffs: Array of diff tuples. + """ + diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1: + if count_delete != 0 and count_insert != 0: + # Factor out any common prefixies. + commonlength = self.diff_commonPrefix(text_insert, text_delete) + if commonlength != 0: + x = pointer - count_delete - count_insert - 1 + if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: + diffs[x] = (diffs[x][0], diffs[x][1] + + text_insert[:commonlength]) + else: + diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) + pointer += 1 + text_insert = text_insert[commonlength:] + text_delete = text_delete[commonlength:] + # Factor out any common suffixies. + commonlength = self.diff_commonSuffix(text_insert, text_delete) + if commonlength != 0: + diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + + diffs[pointer][1]) + text_insert = text_insert[:-commonlength] + text_delete = text_delete[:-commonlength] + # Delete the offending records and add the merged ones. + if count_delete == 0: + diffs[pointer - count_insert : pointer] = [ + (self.DIFF_INSERT, text_insert)] + elif count_insert == 0: + diffs[pointer - count_delete : pointer] = [ + (self.DIFF_DELETE, text_delete)] + else: + diffs[pointer - count_delete - count_insert : pointer] = [ + (self.DIFF_DELETE, text_delete), + (self.DIFF_INSERT, text_insert)] + pointer = pointer - count_delete - count_insert + 1 + if count_delete != 0: + pointer += 1 + if count_insert != 0: + pointer += 1 + elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: + # Merge this equality with the previous one. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer][1]) + del diffs[pointer] + else: + pointer += 1 + + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + if diffs[-1][1] == '': + diffs.pop() # Remove the dummy entry at the end. + + # Second pass: look for single edits surrounded on both sides by equalities + # which can be shifted sideways to eliminate an equality. + # e.g: ABAC -> ABAC + changes = False + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + if diffs[pointer][1].endswith(diffs[pointer - 1][1]): + # Shift the edit over the previous equality. + diffs[pointer] = (diffs[pointer][0], + diffs[pointer - 1][1] + + diffs[pointer][1][:-len(diffs[pointer - 1][1])]) + diffs[pointer + 1] = (diffs[pointer + 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + del diffs[pointer - 1] + changes = True + elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): + # Shift the edit over the next equality. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + diffs[pointer] = (diffs[pointer][0], + diffs[pointer][1][len(diffs[pointer + 1][1]):] + + diffs[pointer + 1][1]) + del diffs[pointer + 1] + changes = True + pointer += 1 + + # If shifts were made, the diff needs reordering and another shift sweep. + if changes: + self.diff_cleanupMerge(diffs) + + def diff_xIndex(self, diffs, loc): + """loc is a location in text1, compute and return the equivalent location + in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 + + Args: + diffs: Array of diff tuples. + loc: Location within text1. + + Returns: + Location within text2. + """ + chars1 = 0 + chars2 = 0 + last_chars1 = 0 + last_chars2 = 0 + for x in xrange(len(diffs)): + (op, text) = diffs[x] + if op != self.DIFF_INSERT: # Equality or deletion. + chars1 += len(text) + if op != self.DIFF_DELETE: # Equality or insertion. + chars2 += len(text) + if chars1 > loc: # Overshot the location. + break + last_chars1 = chars1 + last_chars2 = chars2 + + if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: + # The location was deleted. + return last_chars2 + # Add the remaining len(character). + return last_chars2 + (loc - last_chars1) + + def diff_prettyHtml(self, diffs): + """Convert a diff array into a pretty HTML report. + + Args: + diffs: Array of diff tuples. + + Returns: + HTML representation. + """ + html = [] + for (op, data) in diffs: + text = (data.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶
    ")) + if op == self.DIFF_INSERT: + html.append("%s" % text) + elif op == self.DIFF_DELETE: + html.append("%s" % text) + elif op == self.DIFF_EQUAL: + html.append("%s" % text) + return "".join(html) + + def diff_text1(self, diffs): + """Compute and return the source text (all equalities and deletions). + + Args: + diffs: Array of diff tuples. + + Returns: + Source text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_INSERT: + text.append(data) + return "".join(text) + + def diff_text2(self, diffs): + """Compute and return the destination text (all equalities and insertions). + + Args: + diffs: Array of diff tuples. + + Returns: + Destination text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_DELETE: + text.append(data) + return "".join(text) + + def diff_levenshtein(self, diffs): + """Compute the Levenshtein distance; the number of inserted, deleted or + substituted characters. + + Args: + diffs: Array of diff tuples. + + Returns: + Number of changes. + """ + levenshtein = 0 + insertions = 0 + deletions = 0 + for (op, data) in diffs: + if op == self.DIFF_INSERT: + insertions += len(data) + elif op == self.DIFF_DELETE: + deletions += len(data) + elif op == self.DIFF_EQUAL: + # A deletion and an insertion is one substitution. + levenshtein += max(insertions, deletions) + insertions = 0 + deletions = 0 + levenshtein += max(insertions, deletions) + return levenshtein + + def diff_toDelta(self, diffs): + """Crush the diff into an encoded string which describes the operations + required to transform text1 into text2. + E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + Operations are tab-separated. Inserted text is escaped using %xx notation. + + Args: + diffs: Array of diff tuples. + + Returns: + Delta text. + """ + text = [] + for (op, data) in diffs: + if op == self.DIFF_INSERT: + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# ")) + elif op == self.DIFF_DELETE: + text.append("-%d" % len(data)) + elif op == self.DIFF_EQUAL: + text.append("=%d" % len(data)) + return "\t".join(text) + + def diff_fromDelta(self, text1, delta): + """Given the original text1, and an encoded string which describes the + operations required to transform text1 into text2, compute the full diff. + + Args: + text1: Source string for the diff. + delta: Delta text. + + Returns: + Array of diff tuples. + + Raises: + ValueError: If invalid input. + """ + if type(delta) == unicode: + # Deltas should be composed of a subset of ascii chars, Unicode not + # required. If this encode raises UnicodeEncodeError, delta is invalid. + delta = delta.encode("ascii") + diffs = [] + pointer = 0 # Cursor in text1 + tokens = delta.split("\t") + for token in tokens: + if token == "": + # Blank tokens are ok (from a trailing \t). + continue + # Each token begins with a one character parameter which specifies the + # operation of this token (delete, insert, equality). + param = token[1:] + if token[0] == "+": + param = urllib.unquote(param).decode("utf-8") + diffs.append((self.DIFF_INSERT, param)) + elif token[0] == "-" or token[0] == "=": + try: + n = int(param) + except ValueError: + raise ValueError("Invalid number in diff_fromDelta: " + param) + if n < 0: + raise ValueError("Negative number in diff_fromDelta: " + param) + text = text1[pointer : pointer + n] + pointer += n + if token[0] == "=": + diffs.append((self.DIFF_EQUAL, text)) + else: + diffs.append((self.DIFF_DELETE, text)) + else: + # Anything else is an error. + raise ValueError("Invalid diff operation in diff_fromDelta: " + + token[0]) + if pointer != len(text1): + raise ValueError( + "Delta length (%d) does not equal source text length (%d)." % + (pointer, len(text1))) + return diffs + + # MATCH FUNCTIONS + + def match_main(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc'. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Check for null inputs. + if text == None or pattern == None: + raise ValueError("Null inputs. (match_main)") + + loc = max(0, min(loc, len(text))) + if text == pattern: + # Shortcut (potentially not guaranteed by the algorithm) + return 0 + elif not text: + # Nothing to match. + return -1 + elif text[loc:loc + len(pattern)] == pattern: + # Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else: + # Do a fuzzy compare. + match = self.match_bitap(text, pattern, loc) + return match + + def match_bitap(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc' using the + Bitap algorithm. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Python doesn't have a maxint limit, so ignore this check. + #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: + # raise ValueError("Pattern too long for this application.") + + # Initialise the alphabet. + s = self.match_alphabet(pattern) + + def match_bitapScore(e, x): + """Compute and return the score for a match with e errors and x location. + Accesses loc and pattern through being a closure. + + Args: + e: Number of errors in match. + x: Location of match. + + Returns: + Overall score for match (0.0 = good, 1.0 = bad). + """ + accuracy = float(e) / len(pattern) + proximity = abs(loc - x) + if not self.Match_Distance: + # Dodge divide by zero error. + return proximity and 1.0 or accuracy + return accuracy + (proximity / float(self.Match_Distance)) + + # Highest score beyond which we give up. + score_threshold = self.Match_Threshold + # Is there a nearby exact match? (speedup) + best_loc = text.find(pattern, loc) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + # What about in the other direction? (speedup) + best_loc = text.rfind(pattern, loc + len(pattern)) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + + # Initialise the bit arrays. + matchmask = 1 << (len(pattern) - 1) + best_loc = -1 + + bin_max = len(pattern) + len(text) + # Empty initialization added to appease pychecker. + last_rd = None + for d in xrange(len(pattern)): + # Scan for the best match each iteration allows for one more error. + # Run a binary search to determine how far from 'loc' we can stray at + # this error level. + bin_min = 0 + bin_mid = bin_max + while bin_min < bin_mid: + if match_bitapScore(d, loc + bin_mid) <= score_threshold: + bin_min = bin_mid + else: + bin_max = bin_mid + bin_mid = (bin_max - bin_min) // 2 + bin_min + + # Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + start = max(1, loc - bin_mid + 1) + finish = min(loc + bin_mid, len(text)) + len(pattern) + + rd = [0] * (finish + 2) + rd[finish + 1] = (1 << d) - 1 + for j in xrange(finish, start - 1, -1): + if len(text) <= j - 1: + # Out of range. + charMatch = 0 + else: + charMatch = s.get(text[j - 1], 0) + if d == 0: # First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + else: # Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( + ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] + if rd[j] & matchmask: + score = match_bitapScore(d, j - 1) + # This match will almost certainly be better than any existing match. + # But check anyway. + if score <= score_threshold: + # Told you so. + score_threshold = score + best_loc = j - 1 + if best_loc > loc: + # When passing loc, don't exceed our current distance from loc. + start = max(1, 2 * loc - best_loc) + else: + # Already passed loc, downhill from here on in. + break + # No hope for a (better) match at greater error levels. + if match_bitapScore(d + 1, loc) > score_threshold: + break + last_rd = rd + return best_loc + + def match_alphabet(self, pattern): + """Initialise the alphabet for the Bitap algorithm. + + Args: + pattern: The text to encode. + + Returns: + Hash of character locations. + """ + s = {} + for char in pattern: + s[char] = 0 + for i in xrange(len(pattern)): + s[pattern[i]] |= 1 << (len(pattern) - i - 1) + return s + + # PATCH FUNCTIONS + + def patch_addContext(self, patch, text): + """Increase the context until it is unique, + but don't let the pattern expand beyond Match_MaxBits. + + Args: + patch: The patch to grow. + text: Source text. + """ + if len(text) == 0: + return + pattern = text[patch.start2 : patch.start2 + patch.length1] + padding = 0 + + # Look for the first and last matches of pattern in text. If two different + # matches are found, increase the pattern length. + while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == + 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - + self.Patch_Margin)): + padding += self.Patch_Margin + pattern = text[max(0, patch.start2 - padding) : + patch.start2 + patch.length1 + padding] + # Add one chunk for good luck. + padding += self.Patch_Margin + + # Add the prefix. + prefix = text[max(0, patch.start2 - padding) : patch.start2] + if prefix: + patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] + # Add the suffix. + suffix = text[patch.start2 + patch.length1 : + patch.start2 + patch.length1 + padding] + if suffix: + patch.diffs.append((self.DIFF_EQUAL, suffix)) + + # Roll back the start points. + patch.start1 -= len(prefix) + patch.start2 -= len(prefix) + # Extend lengths. + patch.length1 += len(prefix) + len(suffix) + patch.length2 += len(prefix) + len(suffix) + + def patch_make(self, a, b=None, c=None): + """Compute a list of patches to turn text1 into text2. + Use diffs if provided, otherwise compute it ourselves. + There are four ways to call this function, depending on what data is + available to the caller: + Method 1: + a = text1, b = text2 + Method 2: + a = diffs + Method 3 (optimal): + a = text1, b = diffs + Method 4 (deprecated, use method 3): + a = text1, b = text2, c = diffs + + Args: + a: text1 (methods 1,3,4) or Array of diff tuples for text1 to + text2 (method 2). + b: text2 (methods 1,4) or Array of diff tuples for text1 to + text2 (method 3) or undefined (method 2). + c: Array of diff tuples for text1 to text2 (method 4) or + undefined (methods 1,2,3). + + Returns: + Array of Patch objects. + """ + text1 = None + diffs = None + # Note that texts may arrive as 'str' or 'unicode'. + if isinstance(a, basestring) and isinstance(b, basestring) and c is None: + # Method 1: text1, text2 + # Compute diffs from text1 and text2. + text1 = a + diffs = self.diff_main(text1, b, True) + if len(diffs) > 2: + self.diff_cleanupSemantic(diffs) + self.diff_cleanupEfficiency(diffs) + elif isinstance(a, list) and b is None and c is None: + # Method 2: diffs + # Compute text1 from diffs. + diffs = a + text1 = self.diff_text1(diffs) + elif isinstance(a, basestring) and isinstance(b, list) and c is None: + # Method 3: text1, diffs + text1 = a + diffs = b + elif (isinstance(a, basestring) and isinstance(b, basestring) and + isinstance(c, list)): + # Method 4: text1, text2, diffs + # text2 is not used. + text1 = a + diffs = c + else: + raise ValueError("Unknown call format to patch_make.") + + if not diffs: + return [] # Get rid of the None case. + patches = [] + patch = patch_obj() + char_count1 = 0 # Number of characters into the text1 string. + char_count2 = 0 # Number of characters into the text2 string. + prepatch_text = text1 # Recreate the patches to determine context info. + postpatch_text = text1 + for x in xrange(len(diffs)): + (diff_type, diff_text) = diffs[x] + if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: + # A new patch starts here. + patch.start1 = char_count1 + patch.start2 = char_count2 + if diff_type == self.DIFF_INSERT: + # Insertion + patch.diffs.append(diffs[x]) + patch.length2 += len(diff_text) + postpatch_text = (postpatch_text[:char_count2] + diff_text + + postpatch_text[char_count2:]) + elif diff_type == self.DIFF_DELETE: + # Deletion. + patch.length1 += len(diff_text) + patch.diffs.append(diffs[x]) + postpatch_text = (postpatch_text[:char_count2] + + postpatch_text[char_count2 + len(diff_text):]) + elif (diff_type == self.DIFF_EQUAL and + len(diff_text) <= 2 * self.Patch_Margin and + len(patch.diffs) != 0 and len(diffs) != x + 1): + # Small equality inside a patch. + patch.diffs.append(diffs[x]) + patch.length1 += len(diff_text) + patch.length2 += len(diff_text) + + if (diff_type == self.DIFF_EQUAL and + len(diff_text) >= 2 * self.Patch_Margin): + # Time for a new patch. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + patch = patch_obj() + # Unlike Unidiff, our patch lists have a rolling context. + # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + # Update prepatch text & pos to reflect the application of the + # just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + + # Update the current character count. + if diff_type != self.DIFF_INSERT: + char_count1 += len(diff_text) + if diff_type != self.DIFF_DELETE: + char_count2 += len(diff_text) + + # Pick up the leftover patch if not empty. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + return patches + + def patch_deepCopy(self, patches): + """Given an array of patches, return another array that is identical. + + Args: + patches: Array of Patch objects. + + Returns: + Array of Patch objects. + """ + patchesCopy = [] + for patch in patches: + patchCopy = patch_obj() + # No need to deep copy the tuples since they are immutable. + patchCopy.diffs = patch.diffs[:] + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy.append(patchCopy) + return patchesCopy + + def patch_apply(self, patches, text): + """Merge a set of patches onto the text. Return a patched text, as well + as a list of true/false values indicating which patches were applied. + + Args: + patches: Array of Patch objects. + text: Old text. + + Returns: + Two element Array, containing the new text and an array of boolean values. + """ + if not patches: + return (text, []) + + # Deep copy the patches so that no changes are made to originals. + patches = self.patch_deepCopy(patches) + + nullPadding = self.patch_addPadding(patches) + text = nullPadding + text + nullPadding + self.patch_splitMax(patches) + + # delta keeps track of the offset between the expected and actual location + # of the previous patch. If there are patches expected at positions 10 and + # 20, but the first patch was found at 12, delta is 2 and the second patch + # has an effective expected position of 22. + delta = 0 + results = [] + for patch in patches: + expected_loc = patch.start2 + delta + text1 = self.diff_text1(patch.diffs) + end_loc = -1 + if len(text1) > self.Match_MaxBits: + # patch_splitMax will only provide an oversized pattern in the case of + # a monster delete. + start_loc = self.match_main(text, text1[:self.Match_MaxBits], + expected_loc) + if start_loc != -1: + end_loc = self.match_main(text, text1[-self.Match_MaxBits:], + expected_loc + len(text1) - self.Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc: + # Can't find valid trailing context. Drop this patch. + start_loc = -1 + else: + start_loc = self.match_main(text, text1, expected_loc) + if start_loc == -1: + # No match found. :( + results.append(False) + # Subtract the delta for this failed patch from subsequent patches. + delta -= patch.length2 - patch.length1 + else: + # Found a match. :) + results.append(True) + delta = start_loc - expected_loc + if end_loc == -1: + text2 = text[start_loc : start_loc + len(text1)] + else: + text2 = text[start_loc : end_loc + self.Match_MaxBits] + if text1 == text2: + # Perfect match, just shove the replacement text in. + text = (text[:start_loc] + self.diff_text2(patch.diffs) + + text[start_loc + len(text1):]) + else: + # Imperfect match. + # Run a diff to get a framework of equivalent indices. + diffs = self.diff_main(text1, text2, False) + if (len(text1) > self.Match_MaxBits and + self.diff_levenshtein(diffs) / float(len(text1)) > + self.Patch_DeleteThreshold): + # The end points match, but the content is unacceptably bad. + results[-1] = False + else: + self.diff_cleanupSemanticLossless(diffs) + index1 = 0 + for (op, data) in patch.diffs: + if op != self.DIFF_EQUAL: + index2 = self.diff_xIndex(diffs, index1) + if op == self.DIFF_INSERT: # Insertion + text = text[:start_loc + index2] + data + text[start_loc + + index2:] + elif op == self.DIFF_DELETE: # Deletion + text = text[:start_loc + index2] + text[start_loc + + self.diff_xIndex(diffs, index1 + len(data)):] + if op != self.DIFF_DELETE: + index1 += len(data) + # Strip the padding off. + text = text[len(nullPadding):-len(nullPadding)] + return (text, results) + + def patch_addPadding(self, patches): + """Add some padding on text start and end so that edges can match + something. Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + + Returns: + The padding string added to each side. + """ + paddingLength = self.Patch_Margin + nullPadding = "" + for x in xrange(1, paddingLength + 1): + nullPadding += chr(x) + + # Bump all the patches forward. + for patch in patches: + patch.start1 += paddingLength + patch.start2 += paddingLength + + # Add some padding on start of first diff. + patch = patches[0] + diffs = patch.diffs + if not diffs or diffs[0][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) + patch.start1 -= paddingLength # Should be 0. + patch.start2 -= paddingLength # Should be 0. + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[0][1]): + # Grow first equality. + extraLength = paddingLength - len(diffs[0][1]) + newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] + diffs[0] = (diffs[0][0], newText) + patch.start1 -= extraLength + patch.start2 -= extraLength + patch.length1 += extraLength + patch.length2 += extraLength + + # Add some padding on end of last diff. + patch = patches[-1] + diffs = patch.diffs + if not diffs or diffs[-1][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.append((self.DIFF_EQUAL, nullPadding)) + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[-1][1]): + # Grow last equality. + extraLength = paddingLength - len(diffs[-1][1]) + newText = diffs[-1][1] + nullPadding[:extraLength] + diffs[-1] = (diffs[-1][0], newText) + patch.length1 += extraLength + patch.length2 += extraLength + + return nullPadding + + def patch_splitMax(self, patches): + """Look through the patches and break up any which are longer than the + maximum limit of the match algorithm. + Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + """ + patch_size = self.Match_MaxBits + if patch_size == 0: + # Python has the option of not splitting strings due to its ability + # to handle integers of arbitrary precision. + return + for x in xrange(len(patches)): + if patches[x].length1 <= patch_size: + continue + bigpatch = patches[x] + # Remove the big old patch. + del patches[x] + x -= 1 + start1 = bigpatch.start1 + start2 = bigpatch.start2 + precontext = '' + while len(bigpatch.diffs) != 0: + # Create one of several smaller patches. + patch = patch_obj() + empty = True + patch.start1 = start1 - len(precontext) + patch.start2 = start2 - len(precontext) + if precontext: + patch.length1 = patch.length2 = len(precontext) + patch.diffs.append((self.DIFF_EQUAL, precontext)) + + while (len(bigpatch.diffs) != 0 and + patch.length1 < patch_size - self.Patch_Margin): + (diff_type, diff_text) = bigpatch.diffs[0] + if diff_type == self.DIFF_INSERT: + # Insertions are harmless. + patch.length2 += len(diff_text) + start2 += len(diff_text) + patch.diffs.append(bigpatch.diffs.pop(0)) + empty = False + elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and + patch.diffs[0][0] == self.DIFF_EQUAL and + len(diff_text) > 2 * patch_size): + # This is a large deletion. Let it pass in one chunk. + patch.length1 += len(diff_text) + start1 += len(diff_text) + empty = False + patch.diffs.append((diff_type, diff_text)) + del bigpatch.diffs[0] + else: + # Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text[:patch_size - patch.length1 - + self.Patch_Margin] + patch.length1 += len(diff_text) + start1 += len(diff_text) + if diff_type == self.DIFF_EQUAL: + patch.length2 += len(diff_text) + start2 += len(diff_text) + else: + empty = False + + patch.diffs.append((diff_type, diff_text)) + if diff_text == bigpatch.diffs[0][1]: + del bigpatch.diffs[0] + else: + bigpatch.diffs[0] = (bigpatch.diffs[0][0], + bigpatch.diffs[0][1][len(diff_text):]) + + # Compute the head context for the next patch. + precontext = self.diff_text2(patch.diffs) + precontext = precontext[-self.Patch_Margin:] + # Append the end context for this patch. + postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] + if postcontext: + patch.length1 += len(postcontext) + patch.length2 += len(postcontext) + if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: + patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + + postcontext) + else: + patch.diffs.append((self.DIFF_EQUAL, postcontext)) + + if not empty: + x += 1 + patches.insert(x, patch) + + def patch_toText(self, patches): + """Take a list of patches and return a textual representation. + + Args: + patches: Array of Patch objects. + + Returns: + Text representation of patches. + """ + text = [] + for patch in patches: + text.append(str(patch)) + return "".join(text) + + def patch_fromText(self, textline): + """Parse a textual representation of patches and return a list of patch + objects. + + Args: + textline: Text representation of patches. + + Returns: + Array of Patch objects. + + Raises: + ValueError: If invalid input. + """ + if type(textline) == unicode: + # Patches should be composed of a subset of ascii chars, Unicode not + # required. If this encode raises UnicodeEncodeError, patch is invalid. + textline = textline.encode("ascii") + patches = [] + if not textline: + return patches + text = textline.split('\n') + while len(text) != 0: + m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) + if not m: + raise ValueError("Invalid patch string: " + text[0]) + patch = patch_obj() + patches.append(patch) + patch.start1 = int(m.group(1)) + if m.group(2) == '': + patch.start1 -= 1 + patch.length1 = 1 + elif m.group(2) == '0': + patch.length1 = 0 + else: + patch.start1 -= 1 + patch.length1 = int(m.group(2)) + + patch.start2 = int(m.group(3)) + if m.group(4) == '': + patch.start2 -= 1 + patch.length2 = 1 + elif m.group(4) == '0': + patch.length2 = 0 + else: + patch.start2 -= 1 + patch.length2 = int(m.group(4)) + + del text[0] + + while len(text) != 0: + if text[0]: + sign = text[0][0] + else: + sign = '' + line = urllib.unquote(text[0][1:]) + line = line.decode("utf-8") + if sign == '+': + # Insertion. + patch.diffs.append((self.DIFF_INSERT, line)) + elif sign == '-': + # Deletion. + patch.diffs.append((self.DIFF_DELETE, line)) + elif sign == ' ': + # Minor equality. + patch.diffs.append((self.DIFF_EQUAL, line)) + elif sign == '@': + # Start of next patch. + break + elif sign == '': + # Blank line? Whatever. + pass + else: + # WTF? + raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) + del text[0] + return patches + + +class patch_obj: + """Class representing one patch operation. + """ + + def __init__(self): + """Initializes with an empty list of diffs. + """ + self.diffs = [] + self.start1 = None + self.start2 = None + self.length1 = 0 + self.length2 = 0 + + def __str__(self): + """Emmulate GNU diff's format. + Header: @@ -382,8 +481,9 @@ + Indicies are printed as 1-based, not 0-based. + + Returns: + The GNU diff string. + """ + if self.length1 == 0: + coords1 = str(self.start1) + ",0" + elif self.length1 == 1: + coords1 = str(self.start1 + 1) + else: + coords1 = str(self.start1 + 1) + "," + str(self.length1) + if self.length2 == 0: + coords2 = str(self.start2) + ",0" + elif self.length2 == 1: + coords2 = str(self.start2 + 1) + else: + coords2 = str(self.start2 + 1) + "," + str(self.length2) + text = ["@@ -", coords1, " +", coords2, " @@\n"] + # Escape the body of the patch with %xx notation. + for (op, data) in self.diffs: + if op == diff_match_patch.DIFF_INSERT: + text.append("+") + elif op == diff_match_patch.DIFF_DELETE: + text.append("-") + elif op == diff_match_patch.DIFF_EQUAL: + text.append(" ") + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n") + return "".join(text) diff --git a/python2/diff_match_patch_test.py b/python2/diff_match_patch_test.py new file mode 100644 index 0000000..a034272 --- /dev/null +++ b/python2/diff_match_patch_test.py @@ -0,0 +1,868 @@ +#!/usr/bin/python2.4 + +"""Diff Match and Patch -- Test harness +Copyright 2018 The diff-match-patch Authors. +https://github.com/google/diff-match-patch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import sys +import time +import unittest +import diff_match_patch as dmp_module +# Force a module reload. Allows one to edit the DMP module and rerun the tests +# without leaving the Python interpreter. +reload(dmp_module) + +class DiffMatchPatchTest(unittest.TestCase): + + def setUp(self): + "Test harness for dmp_module." + self.dmp = dmp_module.diff_match_patch() + + def diff_rebuildtexts(self, diffs): + # Construct the two texts which made up the diff originally. + text1 = "" + text2 = "" + for x in range(0, len(diffs)): + if diffs[x][0] != dmp_module.diff_match_patch.DIFF_INSERT: + text1 += diffs[x][1] + if diffs[x][0] != dmp_module.diff_match_patch.DIFF_DELETE: + text2 += diffs[x][1] + return (text1, text2) + + +class DiffTest(DiffMatchPatchTest): + """DIFF TEST FUNCTIONS""" + + def testDiffCommonPrefix(self): + # Detect any common prefix. + # Null case. + self.assertEquals(0, self.dmp.diff_commonPrefix("abc", "xyz")) + + # Non-null case. + self.assertEquals(4, self.dmp.diff_commonPrefix("1234abcdef", "1234xyz")) + + # Whole case. + self.assertEquals(4, self.dmp.diff_commonPrefix("1234", "1234xyz")) + + def testDiffCommonSuffix(self): + # Detect any common suffix. + # Null case. + self.assertEquals(0, self.dmp.diff_commonSuffix("abc", "xyz")) + + # Non-null case. + self.assertEquals(4, self.dmp.diff_commonSuffix("abcdef1234", "xyz1234")) + + # Whole case. + self.assertEquals(4, self.dmp.diff_commonSuffix("1234", "xyz1234")) + + def testDiffCommonOverlap(self): + # Null case. + self.assertEquals(0, self.dmp.diff_commonOverlap("", "abcd")) + + # Whole case. + self.assertEquals(3, self.dmp.diff_commonOverlap("abc", "abcd")) + + # No overlap. + self.assertEquals(0, self.dmp.diff_commonOverlap("123456", "abcd")) + + # Overlap. + self.assertEquals(3, self.dmp.diff_commonOverlap("123456xxx", "xxxabcd")) + + # Unicode. + # Some overly clever languages (C#) may treat ligatures as equal to their + # component letters. E.g. U+FB01 == 'fi' + self.assertEquals(0, self.dmp.diff_commonOverlap("fi", u"\ufb01i")) + + def testDiffHalfMatch(self): + # Detect a halfmatch. + self.dmp.Diff_Timeout = 1 + # No match. + self.assertEquals(None, self.dmp.diff_halfMatch("1234567890", "abcdef")) + + self.assertEquals(None, self.dmp.diff_halfMatch("12345", "23")) + + # Single Match. + self.assertEquals(("12", "90", "a", "z", "345678"), self.dmp.diff_halfMatch("1234567890", "a345678z")) + + self.assertEquals(("a", "z", "12", "90", "345678"), self.dmp.diff_halfMatch("a345678z", "1234567890")) + + self.assertEquals(("abc", "z", "1234", "0", "56789"), self.dmp.diff_halfMatch("abc56789z", "1234567890")) + + self.assertEquals(("a", "xyz", "1", "7890", "23456"), self.dmp.diff_halfMatch("a23456xyz", "1234567890")) + + # Multiple Matches. + self.assertEquals(("12123", "123121", "a", "z", "1234123451234"), self.dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")) + + self.assertEquals(("", "-=-=-=-=-=", "x", "", "x-=-=-=-=-=-=-="), self.dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")) + + self.assertEquals(("-=-=-=-=-=", "", "", "y", "-=-=-=-=-=-=-=y"), self.dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")) + + # Non-optimal halfmatch. + # Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + self.assertEquals(("qHillo", "w", "x", "Hulloy", "HelloHe"), self.dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")) + + # Optimal no halfmatch. + self.dmp.Diff_Timeout = 0 + self.assertEquals(None, self.dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")) + + def testDiffLinesToChars(self): + # Convert lines down to characters. + self.assertEquals(("\x01\x02\x01", "\x02\x01\x02", ["", "alpha\n", "beta\n"]), self.dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")) + + self.assertEquals(("", "\x01\x02\x03\x03", ["", "alpha\r\n", "beta\r\n", "\r\n"]), self.dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")) + + self.assertEquals(("\x01", "\x02", ["", "a", "b"]), self.dmp.diff_linesToChars("a", "b")) + + # More than 256 to reveal any 8-bit limitations. + n = 300 + lineList = [] + charList = [] + for x in range(1, n + 1): + lineList.append(str(x) + "\n") + charList.append(unichr(x)) + self.assertEquals(n, len(lineList)) + lines = "".join(lineList) + chars = "".join(charList) + self.assertEquals(n, len(chars)) + lineList.insert(0, "") + self.assertEquals((chars, "", lineList), self.dmp.diff_linesToChars(lines, "")) + + def testDiffCharsToLines(self): + # Convert chars up to lines. + diffs = [(self.dmp.DIFF_EQUAL, "\x01\x02\x01"), (self.dmp.DIFF_INSERT, "\x02\x01\x02")] + self.dmp.diff_charsToLines(diffs, ["", "alpha\n", "beta\n"]) + self.assertEquals([(self.dmp.DIFF_EQUAL, "alpha\nbeta\nalpha\n"), (self.dmp.DIFF_INSERT, "beta\nalpha\nbeta\n")], diffs) + + # More than 256 to reveal any 8-bit limitations. + n = 300 + lineList = [] + charList = [] + for x in range(1, n + 1): + lineList.append(str(x) + "\n") + charList.append(unichr(x)) + self.assertEquals(n, len(lineList)) + lines = "".join(lineList) + chars = "".join(charList) + self.assertEquals(n, len(chars)) + lineList.insert(0, "") + diffs = [(self.dmp.DIFF_DELETE, chars)] + self.dmp.diff_charsToLines(diffs, lineList) + self.assertEquals([(self.dmp.DIFF_DELETE, lines)], diffs) + + def testDiffCleanupMerge(self): + # Cleanup a messy diff. + # Null case. + diffs = [] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([], diffs) + + # No change case. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c")], diffs) + + # Merge equalities. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_EQUAL, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "abc")], diffs) + + # Merge deletions. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_DELETE, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abc")], diffs) + + # Merge insertions. + diffs = [(self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_INSERT, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_INSERT, "abc")], diffs) + + # Merge interweave. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_DELETE, "c"), (self.dmp.DIFF_INSERT, "d"), (self.dmp.DIFF_EQUAL, "e"), (self.dmp.DIFF_EQUAL, "f")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "ac"), (self.dmp.DIFF_INSERT, "bd"), (self.dmp.DIFF_EQUAL, "ef")], diffs) + + # Prefix and suffix detection. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "abc"), (self.dmp.DIFF_DELETE, "dc")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "d"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_EQUAL, "c")], diffs) + + # Prefix and suffix detection with equalities. + diffs = [(self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "abc"), (self.dmp.DIFF_DELETE, "dc"), (self.dmp.DIFF_EQUAL, "y")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "xa"), (self.dmp.DIFF_DELETE, "d"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_EQUAL, "cy")], diffs) + + # Slide edit left. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_INSERT, "ba"), (self.dmp.DIFF_EQUAL, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_INSERT, "ab"), (self.dmp.DIFF_EQUAL, "ac")], diffs) + + # Slide edit right. + diffs = [(self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_INSERT, "ab"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "ca"), (self.dmp.DIFF_INSERT, "ba")], diffs) + + # Slide edit left recursive. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_DELETE, "ac"), (self.dmp.DIFF_EQUAL, "x")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "acx")], diffs) + + # Slide edit right recursive. + diffs = [(self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "ca"), (self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "xca"), (self.dmp.DIFF_DELETE, "cba")], diffs) + + def testDiffCleanupSemanticLossless(self): + # Slide diffs to match logical boundaries. + # Null case. + diffs = [] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([], diffs) + + # Blank lines. + diffs = [(self.dmp.DIFF_EQUAL, "AAA\r\n\r\nBBB"), (self.dmp.DIFF_INSERT, "\r\nDDD\r\n\r\nBBB"), (self.dmp.DIFF_EQUAL, "\r\nEEE")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "AAA\r\n\r\n"), (self.dmp.DIFF_INSERT, "BBB\r\nDDD\r\n\r\n"), (self.dmp.DIFF_EQUAL, "BBB\r\nEEE")], diffs) + + # Line boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "AAA\r\nBBB"), (self.dmp.DIFF_INSERT, " DDD\r\nBBB"), (self.dmp.DIFF_EQUAL, " EEE")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "AAA\r\n"), (self.dmp.DIFF_INSERT, "BBB DDD\r\n"), (self.dmp.DIFF_EQUAL, "BBB EEE")], diffs) + + # Word boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The c"), (self.dmp.DIFF_INSERT, "ow and the c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "The "), (self.dmp.DIFF_INSERT, "cow and the "), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # Alphanumeric boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The-c"), (self.dmp.DIFF_INSERT, "ow-and-the-c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "The-"), (self.dmp.DIFF_INSERT, "cow-and-the-"), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # Hitting the start. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "ax")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "aax")], diffs) + + # Hitting the end. + diffs = [(self.dmp.DIFF_EQUAL, "xa"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "xaa"), (self.dmp.DIFF_DELETE, "a")], diffs) + + # Sentence boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The xxx. The "), (self.dmp.DIFF_INSERT, "zzz. The "), (self.dmp.DIFF_EQUAL, "yyy.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "The xxx."), (self.dmp.DIFF_INSERT, " The zzz."), (self.dmp.DIFF_EQUAL, " The yyy.")], diffs) + + def testDiffCleanupSemantic(self): + # Cleanup semantically trivial equalities. + # Null case. + diffs = [] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([], diffs) + + # No elimination #1. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "cd"), (self.dmp.DIFF_EQUAL, "12"), (self.dmp.DIFF_DELETE, "e")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "cd"), (self.dmp.DIFF_EQUAL, "12"), (self.dmp.DIFF_DELETE, "e")], diffs) + + # No elimination #2. + diffs = [(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "ABC"), (self.dmp.DIFF_EQUAL, "1234"), (self.dmp.DIFF_DELETE, "wxyz")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "ABC"), (self.dmp.DIFF_EQUAL, "1234"), (self.dmp.DIFF_DELETE, "wxyz")], diffs) + + # Simple elimination. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "c")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "b")], diffs) + + # Backpass elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_EQUAL, "cd"), (self.dmp.DIFF_DELETE, "e"), (self.dmp.DIFF_EQUAL, "f"), (self.dmp.DIFF_INSERT, "g")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abcdef"), (self.dmp.DIFF_INSERT, "cdfg")], diffs) + + # Multiple eliminations. + diffs = [(self.dmp.DIFF_INSERT, "1"), (self.dmp.DIFF_EQUAL, "A"), (self.dmp.DIFF_DELETE, "B"), (self.dmp.DIFF_INSERT, "2"), (self.dmp.DIFF_EQUAL, "_"), (self.dmp.DIFF_INSERT, "1"), (self.dmp.DIFF_EQUAL, "A"), (self.dmp.DIFF_DELETE, "B"), (self.dmp.DIFF_INSERT, "2")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "AB_AB"), (self.dmp.DIFF_INSERT, "1A2_1A2")], diffs) + + # Word boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The c"), (self.dmp.DIFF_DELETE, "ow and the c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_EQUAL, "The "), (self.dmp.DIFF_DELETE, "cow and the "), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # No overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "abcxx"), (self.dmp.DIFF_INSERT, "xxdef")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abcxx"), (self.dmp.DIFF_INSERT, "xxdef")], diffs) + + # Overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "abcxxx"), (self.dmp.DIFF_INSERT, "xxxdef")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_INSERT, "def")], diffs) + + # Reverse overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "xxxabc"), (self.dmp.DIFF_INSERT, "defxxx")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_INSERT, "def"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_DELETE, "abc")], diffs) + + # Two overlap eliminations. + diffs = [(self.dmp.DIFF_DELETE, "abcd1212"), (self.dmp.DIFF_INSERT, "1212efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A3"), (self.dmp.DIFF_INSERT, "3BC")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abcd"), (self.dmp.DIFF_EQUAL, "1212"), (self.dmp.DIFF_INSERT, "efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A"), (self.dmp.DIFF_EQUAL, "3"), (self.dmp.DIFF_INSERT, "BC")], diffs) + + def testDiffCleanupEfficiency(self): + # Cleanup operationally trivial equalities. + self.dmp.Diff_EditCost = 4 + # Null case. + diffs = [] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([], diffs) + + # No elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")], diffs) + + # Four-edit elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abxyzcd"), (self.dmp.DIFF_INSERT, "12xyz34")], diffs) + + # Three-edit elimination. + diffs = [(self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "xcd"), (self.dmp.DIFF_INSERT, "12x34")], diffs) + + # Backpass elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "xy"), (self.dmp.DIFF_INSERT, "34"), (self.dmp.DIFF_EQUAL, "z"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "56")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abxyzcd"), (self.dmp.DIFF_INSERT, "12xy34z56")], diffs) + + # High cost elimination. + self.dmp.Diff_EditCost = 5 + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEquals([(self.dmp.DIFF_DELETE, "abwxyzcd"), (self.dmp.DIFF_INSERT, "12wxyz34")], diffs) + self.dmp.Diff_EditCost = 4 + + def testDiffPrettyHtml(self): + # Pretty print. + diffs = [(self.dmp.DIFF_EQUAL, "a\n"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c&d")] + self.assertEquals("
    <B>b</B>c&d", self.dmp.diff_prettyHtml(diffs)) + + def testDiffText(self): + # Compute the source and destination texts. + diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, " lazy")] + self.assertEquals("jumps over the lazy", self.dmp.diff_text1(diffs)) + + self.assertEquals("jumped over a lazy", self.dmp.diff_text2(diffs)) + + def testDiffDelta(self): + # Convert a diff into delta string. + diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, " lazy"), (self.dmp.DIFF_INSERT, "old dog")] + text1 = self.dmp.diff_text1(diffs) + self.assertEquals("jumps over the lazy", text1) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEquals("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta) + + # Convert delta string into a diff. + self.assertEquals(diffs, self.dmp.diff_fromDelta(text1, delta)) + + # Generates error (19 != 20). + try: + self.dmp.diff_fromDelta(text1 + "x", delta) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + # Generates error (19 != 18). + try: + self.dmp.diff_fromDelta(text1[1:], delta) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + # Generates error (%c3%xy invalid Unicode). + try: + self.dmp.diff_fromDelta("", "+%c3xy") + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + # Test deltas with special characters. + diffs = [(self.dmp.DIFF_EQUAL, u"\u0680 \x00 \t %"), (self.dmp.DIFF_DELETE, u"\u0681 \x01 \n ^"), (self.dmp.DIFF_INSERT, u"\u0682 \x02 \\ |")] + text1 = self.dmp.diff_text1(diffs) + self.assertEquals(u"\u0680 \x00 \t %\u0681 \x01 \n ^", text1) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEquals("=7\t-7\t+%DA%82 %02 %5C %7C", delta) + + # Convert delta string into a diff. + self.assertEquals(diffs, self.dmp.diff_fromDelta(text1, delta)) + + # Verify pool of unchanged characters. + diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")] + text2 = self.dmp.diff_text2(diffs) + self.assertEquals("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEquals("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta) + + # Convert delta string into a diff. + self.assertEquals(diffs, self.dmp.diff_fromDelta("", delta)) + + def testDiffXIndex(self): + # Translate a location in text1 to text2. + self.assertEquals(5, self.dmp.diff_xIndex([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "1234"), (self.dmp.DIFF_EQUAL, "xyz")], 2)) + + # Translation on deletion. + self.assertEquals(1, self.dmp.diff_xIndex([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "1234"), (self.dmp.DIFF_EQUAL, "xyz")], 3)) + + def testDiffLevenshtein(self): + # Levenshtein with trailing equality. + self.assertEquals(4, self.dmp.diff_levenshtein([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "1234"), (self.dmp.DIFF_EQUAL, "xyz")])) + # Levenshtein with leading equality. + self.assertEquals(4, self.dmp.diff_levenshtein([(self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "1234")])) + # Levenshtein with middle equality. + self.assertEquals(7, self.dmp.diff_levenshtein([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_INSERT, "1234")])) + + def testDiffBisect(self): + # Normal. + a = "cat" + b = "map" + # Since the resulting diff hasn't been normalized, it would be ok if + # the insertion and deletion pairs are swapped. + # If the order changes, tweak this test as required. + self.assertEquals([(self.dmp.DIFF_DELETE, "c"), (self.dmp.DIFF_INSERT, "m"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "t"), (self.dmp.DIFF_INSERT, "p")], self.dmp.diff_bisect(a, b, sys.maxint)) + + # Timeout. + self.assertEquals([(self.dmp.DIFF_DELETE, "cat"), (self.dmp.DIFF_INSERT, "map")], self.dmp.diff_bisect(a, b, 0)) + + def testDiffMain(self): + # Perform a trivial diff. + # Null case. + self.assertEquals([], self.dmp.diff_main("", "", False)) + + # Equality. + self.assertEquals([(self.dmp.DIFF_EQUAL, "abc")], self.dmp.diff_main("abc", "abc", False)) + + # Simple insertion. + self.assertEquals([(self.dmp.DIFF_EQUAL, "ab"), (self.dmp.DIFF_INSERT, "123"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("abc", "ab123c", False)) + + # Simple deletion. + self.assertEquals([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "123"), (self.dmp.DIFF_EQUAL, "bc")], self.dmp.diff_main("a123bc", "abc", False)) + + # Two insertions. + self.assertEquals([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_INSERT, "123"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_INSERT, "456"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("abc", "a123b456c", False)) + + # Two deletions. + self.assertEquals([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "123"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "456"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("a123b456c", "abc", False)) + + # Perform a real diff. + # Switch off the timeout. + self.dmp.Diff_Timeout = 0 + # Simple cases. + self.assertEquals([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "b")], self.dmp.diff_main("a", "b", False)) + + self.assertEquals([(self.dmp.DIFF_DELETE, "Apple"), (self.dmp.DIFF_INSERT, "Banana"), (self.dmp.DIFF_EQUAL, "s are a"), (self.dmp.DIFF_INSERT, "lso"), (self.dmp.DIFF_EQUAL, " fruit.")], self.dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", False)) + + self.assertEquals([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, u"\u0680"), (self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "\t"), (self.dmp.DIFF_INSERT, "\x00")], self.dmp.diff_main("ax\t", u"\u0680x\x00", False)) + + # Overlaps. + self.assertEquals([(self.dmp.DIFF_DELETE, "1"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "y"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "2"), (self.dmp.DIFF_INSERT, "xab")], self.dmp.diff_main("1ayb2", "abxab", False)) + + self.assertEquals([(self.dmp.DIFF_INSERT, "xaxcx"), (self.dmp.DIFF_EQUAL, "abc"), (self.dmp.DIFF_DELETE, "y")], self.dmp.diff_main("abcy", "xaxcxabc", False)) + + self.assertEquals([(self.dmp.DIFF_DELETE, "ABCD"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "="), (self.dmp.DIFF_INSERT, "-"), (self.dmp.DIFF_EQUAL, "bcd"), (self.dmp.DIFF_DELETE, "="), (self.dmp.DIFF_INSERT, "-"), (self.dmp.DIFF_EQUAL, "efghijklmnopqrs"), (self.dmp.DIFF_DELETE, "EFGHIJKLMNOefg")], self.dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", False)) + + # Large equality. + self.assertEquals([(self.dmp.DIFF_INSERT, " "), (self.dmp.DIFF_EQUAL,"a"), (self.dmp.DIFF_INSERT,"nd"), (self.dmp.DIFF_EQUAL," [[Pennsylvania]]"), (self.dmp.DIFF_DELETE," and [[New")], self.dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", False)) + + # Timeout. + self.dmp.Diff_Timeout = 0.1 # 100ms + a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" + b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" + # Increase the text lengths by 1024 times to ensure a timeout. + for x in range(10): + a = a + a + b = b + b + startTime = time.time() + self.dmp.diff_main(a, b) + endTime = time.time() + # Test that we took at least the timeout period. + self.assertTrue(self.dmp.Diff_Timeout <= endTime - startTime) + # Test that we didn't take forever (be forgiving). + # Theoretically this test could fail very occasionally if the + # OS task swaps or locks up for a second at the wrong moment. + self.assertTrue(self.dmp.Diff_Timeout * 2 > endTime - startTime) + self.dmp.Diff_Timeout = 0 + + # Test the linemode speedup. + # Must be long to pass the 100 char cutoff. + # Simple line-mode. + a = "1234567890\n" * 13 + b = "abcdefghij\n" * 13 + self.assertEquals(self.dmp.diff_main(a, b, False), self.dmp.diff_main(a, b, True)) + + # Single line-mode. + a = "1234567890" * 13 + b = "abcdefghij" * 13 + self.assertEquals(self.dmp.diff_main(a, b, False), self.dmp.diff_main(a, b, True)) + + # Overlap line-mode. + a = "1234567890\n" * 13 + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n" + texts_linemode = self.diff_rebuildtexts(self.dmp.diff_main(a, b, True)) + texts_textmode = self.diff_rebuildtexts(self.dmp.diff_main(a, b, False)) + self.assertEquals(texts_textmode, texts_linemode) + + # Test null inputs. + try: + self.dmp.diff_main(None, None) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + +class MatchTest(DiffMatchPatchTest): + """MATCH TEST FUNCTIONS""" + + def testMatchAlphabet(self): + # Initialise the bitmasks for Bitap. + self.assertEquals({"a":4, "b":2, "c":1}, self.dmp.match_alphabet("abc")) + + self.assertEquals({"a":37, "b":18, "c":8}, self.dmp.match_alphabet("abcaba")) + + def testMatchBitap(self): + self.dmp.Match_Distance = 100 + self.dmp.Match_Threshold = 0.5 + # Exact matches. + self.assertEquals(5, self.dmp.match_bitap("abcdefghijk", "fgh", 5)) + + self.assertEquals(5, self.dmp.match_bitap("abcdefghijk", "fgh", 0)) + + # Fuzzy matches. + self.assertEquals(4, self.dmp.match_bitap("abcdefghijk", "efxhi", 0)) + + self.assertEquals(2, self.dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)) + + self.assertEquals(-1, self.dmp.match_bitap("abcdefghijk", "bxy", 1)) + + # Overflow. + self.assertEquals(2, self.dmp.match_bitap("123456789xx0", "3456789x0", 2)) + + self.assertEquals(0, self.dmp.match_bitap("abcdef", "xxabc", 4)) + + self.assertEquals(3, self.dmp.match_bitap("abcdef", "defyy", 4)) + + self.assertEquals(0, self.dmp.match_bitap("abcdef", "xabcdefy", 0)) + + # Threshold test. + self.dmp.Match_Threshold = 0.4 + self.assertEquals(4, self.dmp.match_bitap("abcdefghijk", "efxyhi", 1)) + + self.dmp.Match_Threshold = 0.3 + self.assertEquals(-1, self.dmp.match_bitap("abcdefghijk", "efxyhi", 1)) + + self.dmp.Match_Threshold = 0.0 + self.assertEquals(1, self.dmp.match_bitap("abcdefghijk", "bcdef", 1)) + self.dmp.Match_Threshold = 0.5 + + # Multiple select. + self.assertEquals(0, self.dmp.match_bitap("abcdexyzabcde", "abccde", 3)) + + self.assertEquals(8, self.dmp.match_bitap("abcdexyzabcde", "abccde", 5)) + + # Distance test. + self.dmp.Match_Distance = 10 # Strict location. + self.assertEquals(-1, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)) + + self.assertEquals(0, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)) + + self.dmp.Match_Distance = 1000 # Loose location. + self.assertEquals(0, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)) + + + def testMatchMain(self): + # Full match. + # Shortcut matches. + self.assertEquals(0, self.dmp.match_main("abcdef", "abcdef", 1000)) + + self.assertEquals(-1, self.dmp.match_main("", "abcdef", 1)) + + self.assertEquals(3, self.dmp.match_main("abcdef", "", 3)) + + self.assertEquals(3, self.dmp.match_main("abcdef", "de", 3)) + + self.assertEquals(3, self.dmp.match_main("abcdef", "defy", 4)) + + self.assertEquals(0, self.dmp.match_main("abcdef", "abcdefy", 0)) + + # Complex match. + self.dmp.Match_Threshold = 0.7 + self.assertEquals(4, self.dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)) + self.dmp.Match_Threshold = 0.5 + + # Test null inputs. + try: + self.dmp.match_main(None, None, 0) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + +class PatchTest(DiffMatchPatchTest): + """PATCH TEST FUNCTIONS""" + + def testPatchObj(self): + # Patch Object. + p = dmp_module.patch_obj() + p.start1 = 20 + p.start2 = 21 + p.length1 = 18 + p.length2 = 17 + p.diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, "\nlaz")] + strp = str(p) + self.assertEquals("@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", strp) + + def testPatchFromText(self): + self.assertEquals([], self.dmp.patch_fromText("")) + + strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n" + self.assertEquals(strp, str(self.dmp.patch_fromText(strp)[0])) + + self.assertEquals("@@ -1 +1 @@\n-a\n+b\n", str(self.dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0])) + + self.assertEquals("@@ -1,3 +0,0 @@\n-abc\n", str(self.dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n")[0])) + + self.assertEquals("@@ -0,0 +1,3 @@\n+abc\n", str(self.dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n")[0])) + + # Generates error. + try: + self.dmp.patch_fromText("Bad\nPatch\n") + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + def testPatchToText(self): + strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n" + p = self.dmp.patch_fromText(strp) + self.assertEquals(strp, self.dmp.patch_toText(p)) + + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n" + p = self.dmp.patch_fromText(strp) + self.assertEquals(strp, self.dmp.patch_toText(p)) + + def testPatchAddContext(self): + self.dmp.Patch_Margin = 4 + p = self.dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog.") + self.assertEquals("@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", str(p)) + + # Same, but not enough trailing context. + p = self.dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps.") + self.assertEquals("@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", str(p)) + + # Same, but not enough leading context. + p = self.dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps.") + self.assertEquals("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", str(p)) + + # Same, but with ambiguity. + p = self.dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes.") + self.assertEquals("@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", str(p)) + + def testPatchMake(self): + # Null case. + patches = self.dmp.patch_make("", "") + self.assertEquals("", self.dmp.patch_toText(patches)) + + text1 = "The quick brown fox jumps over the lazy dog." + text2 = "That quick brown fox jumped over a lazy dog." + # Text2+Text1 inputs. + expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n" + # The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = self.dmp.patch_make(text2, text1) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Text2 inputs. + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n" + patches = self.dmp.patch_make(text1, text2) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Diff input. + diffs = self.dmp.diff_main(text1, text2, False) + patches = self.dmp.patch_make(diffs) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Diff inputs. + patches = self.dmp.patch_make(text1, diffs) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Text2+Diff inputs (deprecated). + patches = self.dmp.patch_make(text1, text2, diffs) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Character encoding. + patches = self.dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?") + self.assertEquals("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", self.dmp.patch_toText(patches)) + + # Character decoding. + diffs = [(self.dmp.DIFF_DELETE, "`1234567890-=[]\\;',./"), (self.dmp.DIFF_INSERT, "~!@#$%^&*()_+{}|:\"<>?")] + self.assertEquals(diffs, self.dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0].diffs) + + # Long string with repeats. + text1 = "" + for x in range(100): + text1 += "abcdef" + text2 = text1 + "123" + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n" + patches = self.dmp.patch_make(text1, text2) + self.assertEquals(expectedPatch, self.dmp.patch_toText(patches)) + + # Test null inputs. + try: + self.dmp.patch_make(None, None) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + def testPatchSplitMax(self): + # Assumes that Match_MaxBits is 32. + patches = self.dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0") + self.dmp.patch_splitMax(patches) + self.assertEquals("@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz") + oldToText = self.dmp.patch_toText(patches) + self.dmp.patch_splitMax(patches) + self.assertEquals(oldToText, self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc") + self.dmp.patch_splitMax(patches) + self.assertEquals("@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1") + self.dmp.patch_splitMax(patches) + self.assertEquals("@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", self.dmp.patch_toText(patches)) + + def testPatchAddPadding(self): + # Both edges full. + patches = self.dmp.patch_make("", "test") + self.assertEquals("@@ -0,0 +1,4 @@\n+test\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEquals("@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", self.dmp.patch_toText(patches)) + + # Both edges partial. + patches = self.dmp.patch_make("XY", "XtestY") + self.assertEquals("@@ -1,2 +1,6 @@\n X\n+test\n Y\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEquals("@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", self.dmp.patch_toText(patches)) + + # Both edges none. + patches = self.dmp.patch_make("XXXXYYYY", "XXXXtestYYYY") + self.assertEquals("@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEquals("@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", self.dmp.patch_toText(patches)) + + def testPatchApply(self): + self.dmp.Match_Distance = 1000 + self.dmp.Match_Threshold = 0.5 + self.dmp.Patch_DeleteThreshold = 0.5 + # Null case. + patches = self.dmp.patch_make("", "") + results = self.dmp.patch_apply(patches, "Hello world.") + self.assertEquals(("Hello world.", []), results) + + # Exact match. + patches = self.dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.") + results = self.dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog.") + self.assertEquals(("That quick brown fox jumped over a lazy dog.", [True, True]), results) + + # Partial match. + results = self.dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger.") + self.assertEquals(("That quick red rabbit jumped over a tired tiger.", [True, True]), results) + + # Failed match. + results = self.dmp.patch_apply(patches, "I am the very model of a modern major general.") + self.assertEquals(("I am the very model of a modern major general.", [False, False]), results) + + # Big delete, small change. + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y") + self.assertEquals(("xabcy", [True, True]), results) + + # Big delete, big change 1. + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y") + self.assertEquals(("xabc12345678901234567890---------------++++++++++---------------12345678901234567890y", [False, True]), results) + + # Big delete, big change 2. + self.dmp.Patch_DeleteThreshold = 0.6 + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y") + self.assertEquals(("xabcy", [True, True]), results) + self.dmp.Patch_DeleteThreshold = 0.5 + + # Compensate for failed patch. + self.dmp.Match_Threshold = 0.0 + self.dmp.Match_Distance = 0 + patches = self.dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890") + results = self.dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890") + self.assertEquals(("ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890", [False, True]), results) + self.dmp.Match_Threshold = 0.5 + self.dmp.Match_Distance = 1000 + + # No side effects. + patches = self.dmp.patch_make("", "test") + patchstr = self.dmp.patch_toText(patches) + results = self.dmp.patch_apply(patches, "") + self.assertEquals(patchstr, self.dmp.patch_toText(patches)) + + # No side effects with major delete. + patches = self.dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof") + patchstr = self.dmp.patch_toText(patches) + self.dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog.") + self.assertEquals(patchstr, self.dmp.patch_toText(patches)) + + # Edge exact match. + patches = self.dmp.patch_make("", "test") + self.dmp.patch_apply(patches, "") + self.assertEquals(("test", [True]), results) + + # Near edge exact match. + patches = self.dmp.patch_make("XY", "XtestY") + results = self.dmp.patch_apply(patches, "XY") + self.assertEquals(("XtestY", [True]), results) + + # Edge partial match. + patches = self.dmp.patch_make("y", "y123") + results = self.dmp.patch_apply(patches, "x") + self.assertEquals(("x123", [True]), results) + + +if __name__ == "__main__": + unittest.main() diff --git a/python3/__init__.py b/python3/__init__.py new file mode 100644 index 0000000..bd6e8b6 --- /dev/null +++ b/python3/__init__.py @@ -0,0 +1,2 @@ +from .diff_match_patch import diff_match_patch, patch_obj + diff --git a/python3/diff_match_patch.py b/python3/diff_match_patch.py new file mode 100644 index 0000000..0a9b5e8 --- /dev/null +++ b/python3/diff_match_patch.py @@ -0,0 +1,1906 @@ +#!/usr/bin/python3 + +"""Diff Match and Patch +Copyright 2018 The diff-match-patch Authors. +https://github.com/google/diff-match-patch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +"""Functions for diff, match and patch. + +Computes the difference between two texts to create a patch. +Applies the patch onto another text, allowing for errors. +""" + +__author__ = 'fraser@google.com (Neil Fraser)' + +import math +import re +import sys +import time +import urllib.parse + +class diff_match_patch: + """Class containing the diff, match and patch methods. + + Also contains the behaviour settings. + """ + + def __init__(self): + """Inits a diff_match_patch object with default settings. + Redefine these in your program to override the defaults. + """ + + # Number of seconds to map a diff before giving up (0 for infinity). + self.Diff_Timeout = 1.0 + # Cost of an empty edit operation in terms of edit characters. + self.Diff_EditCost = 4 + # At what point is no match declared (0.0 = perfection, 1.0 = very loose). + self.Match_Threshold = 0.5 + # How far to search for a match (0 = exact location, 1000+ = broad match). + # A match this many characters away from the expected location will add + # 1.0 to the score (0.0 is a perfect match). + self.Match_Distance = 1000 + # When deleting a large block of text (over ~64 characters), how close do + # the contents have to be to match the expected contents. (0.0 = perfection, + # 1.0 = very loose). Note that Match_Threshold controls how closely the + # end points of a delete need to match. + self.Patch_DeleteThreshold = 0.5 + # Chunk size for context length. + self.Patch_Margin = 4 + + # The number of bits in an int. + # Python has no maximum, thus to disable patch splitting set to 0. + # However to avoid long patches in certain pathological cases, use 32. + # Multiple short patches (using native ints) are much faster than long ones. + self.Match_MaxBits = 32 + + # DIFF FUNCTIONS + + # The data structure representing a diff is an array of tuples: + # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] + # which means: delete "Hello", add "Goodbye" and keep " world." + DIFF_DELETE = -1 + DIFF_INSERT = 1 + DIFF_EQUAL = 0 + + def diff_main(self, text1, text2, checklines=True, deadline=None): + """Find the differences between two texts. Simplifies the problem by + stripping any common prefix or suffix off the texts before diffing. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Optional speedup flag. If present and false, then don't run + a line-level diff first to identify the changed areas. + Defaults to true, which does a faster, slightly less optimal diff. + deadline: Optional time when the diff should be complete by. Used + internally for recursive calls. Users should set DiffTimeout instead. + + Returns: + Array of changes. + """ + # Set a deadline by which time the diff must be complete. + if deadline == None: + # Unlike in most languages, Python counts time in seconds. + if self.Diff_Timeout <= 0: + deadline = sys.maxsize + else: + deadline = time.time() + self.Diff_Timeout + + # Check for null inputs. + if text1 == None or text2 == None: + raise ValueError("Null inputs. (diff_main)") + + # Check for equality (speedup). + if text1 == text2: + if text1: + return [(self.DIFF_EQUAL, text1)] + return [] + + # Trim off common prefix (speedup). + commonlength = self.diff_commonPrefix(text1, text2) + commonprefix = text1[:commonlength] + text1 = text1[commonlength:] + text2 = text2[commonlength:] + + # Trim off common suffix (speedup). + commonlength = self.diff_commonSuffix(text1, text2) + if commonlength == 0: + commonsuffix = '' + else: + commonsuffix = text1[-commonlength:] + text1 = text1[:-commonlength] + text2 = text2[:-commonlength] + + # Compute the diff on the middle block. + diffs = self.diff_compute(text1, text2, checklines, deadline) + + # Restore the prefix and suffix. + if commonprefix: + diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] + if commonsuffix: + diffs.append((self.DIFF_EQUAL, commonsuffix)) + self.diff_cleanupMerge(diffs) + return diffs + + def diff_compute(self, text1, text2, checklines, deadline): + """Find the differences between two texts. Assumes that the texts do not + have any common prefix or suffix. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + checklines: Speedup flag. If false, then don't run a line-level diff + first to identify the changed areas. + If true, then run a faster, slightly less optimal diff. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + if not text1: + # Just add some text (speedup). + return [(self.DIFF_INSERT, text2)] + + if not text2: + # Just delete some text (speedup). + return [(self.DIFF_DELETE, text1)] + + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + i = longtext.find(shorttext) + if i != -1: + # Shorter text is inside the longer text (speedup). + diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), + (self.DIFF_INSERT, longtext[i + len(shorttext):])] + # Swap insertions for deletions if diff is reversed. + if len(text1) > len(text2): + diffs[0] = (self.DIFF_DELETE, diffs[0][1]) + diffs[2] = (self.DIFF_DELETE, diffs[2][1]) + return diffs + + if len(shorttext) == 1: + # Single character string. + # After the previous speedup, the character can't be an equality. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + # Check to see if the problem can be split in two. + hm = self.diff_halfMatch(text1, text2) + if hm: + # A half-match was found, sort out the return data. + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + # Send both pairs off for separate processing. + diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) + diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) + # Merge the results. + return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b + + if checklines and len(text1) > 100 and len(text2) > 100: + return self.diff_lineMode(text1, text2, deadline) + + return self.diff_bisect(text1, text2, deadline) + + def diff_lineMode(self, text1, text2, deadline): + """Do a quick line-level diff on both strings, then rediff the parts for + greater accuracy. + This speedup can produce non-minimal diffs. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time when the diff should be complete by. + + Returns: + Array of changes. + """ + + # Scan the text on a line-by-line basis first. + (text1, text2, linearray) = self.diff_linesToChars(text1, text2) + + diffs = self.diff_main(text1, text2, False, deadline) + + # Convert the diff back to original text. + self.diff_charsToLines(diffs, linearray) + # Eliminate freak matches (e.g. blank lines) + self.diff_cleanupSemantic(diffs) + + # Rediff any replacement blocks, this time character-by-character. + # Add a dummy entry at the end. + diffs.append((self.DIFF_EQUAL, '')) + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete >= 1 and count_insert >= 1: + # Delete the offending records and add the merged ones. + a = self.diff_main(text_delete, text_insert, False, deadline) + diffs[pointer - count_delete - count_insert : pointer] = a + pointer = pointer - count_delete - count_insert + len(a) + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + pointer += 1 + + diffs.pop() # Remove the dummy entry at the end. + + return diffs + + def diff_bisect(self, text1, text2, deadline): + """Find the 'middle snake' of a diff, split the problem in two + and return the recursively constructed diff. + See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + max_d = (text1_length + text2_length + 1) // 2 + v_offset = max_d + v_length = 2 * max_d + v1 = [-1] * v_length + v1[v_offset + 1] = 0 + v2 = v1[:] + delta = text1_length - text2_length + # If the total number of characters is odd, then the front path will + # collide with the reverse path. + front = (delta % 2 != 0) + # Offsets for start and end of k loop. + # Prevents mapping of space beyond the grid. + k1start = 0 + k1end = 0 + k2start = 0 + k2end = 0 + for d in range(max_d): + # Bail out if deadline is reached. + if time.time() > deadline: + break + + # Walk the front path one step. + for k1 in range(-d + k1start, d + 1 - k1end, 2): + k1_offset = v_offset + k1 + if k1 == -d or (k1 != d and + v1[k1_offset - 1] < v1[k1_offset + 1]): + x1 = v1[k1_offset + 1] + else: + x1 = v1[k1_offset - 1] + 1 + y1 = x1 - k1 + while (x1 < text1_length and y1 < text2_length and + text1[x1] == text2[y1]): + x1 += 1 + y1 += 1 + v1[k1_offset] = x1 + if x1 > text1_length: + # Ran off the right of the graph. + k1end += 2 + elif y1 > text2_length: + # Ran off the bottom of the graph. + k1start += 2 + elif front: + k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset] + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Walk the reverse path one step. + for k2 in range(-d + k2start, d + 1 - k2end, 2): + k2_offset = v_offset + k2 + if k2 == -d or (k2 != d and + v2[k2_offset - 1] < v2[k2_offset + 1]): + x2 = v2[k2_offset + 1] + else: + x2 = v2[k2_offset - 1] + 1 + y2 = x2 - k2 + while (x2 < text1_length and y2 < text2_length and + text1[-x2 - 1] == text2[-y2 - 1]): + x2 += 1 + y2 += 1 + v2[k2_offset] = x2 + if x2 > text1_length: + # Ran off the left of the graph. + k2end += 2 + elif y2 > text2_length: + # Ran off the top of the graph. + k2start += 2 + elif not front: + k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: + x1 = v1[k1_offset] + y1 = v_offset + x1 - k1_offset + # Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + if x1 >= x2: + # Overlap detected. + return self.diff_bisectSplit(text1, text2, x1, y1, deadline) + + # Diff took too long and hit the deadline or + # number of diffs equals number of characters, no commonality at all. + return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] + + def diff_bisectSplit(self, text1, text2, x, y, deadline): + """Given the location of the 'middle snake', split the diff in two parts + and recurse. + + Args: + text1: Old string to be diffed. + text2: New string to be diffed. + x: Index of split point in text1. + y: Index of split point in text2. + deadline: Time at which to bail if not yet complete. + + Returns: + Array of diff tuples. + """ + text1a = text1[:x] + text2a = text2[:y] + text1b = text1[x:] + text2b = text2[y:] + + # Compute both diffs serially. + diffs = self.diff_main(text1a, text2a, False, deadline) + diffsb = self.diff_main(text1b, text2b, False, deadline) + + return diffs + diffsb + + def diff_linesToChars(self, text1, text2): + """Split two texts into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + + Args: + text1: First string. + text2: Second string. + + Returns: + Three element tuple, containing the encoded text1, the encoded text2 and + the array of unique strings. The zeroth element of the array of unique + strings is intentionally blank. + """ + lineArray = [] # e.g. lineArray[4] == "Hello\n" + lineHash = {} # e.g. lineHash["Hello\n"] == 4 + + # "\x00" is a valid character, but various debuggers don't like it. + # So we'll insert a junk entry to avoid generating a null character. + lineArray.append('') + + def diff_linesToCharsMunge(text): + """Split a text into an array of strings. Reduce the texts to a string + of hashes where each Unicode character represents one line. + Modifies linearray and linehash through being a closure. + + Args: + text: String to encode. + + Returns: + Encoded string. + """ + chars = [] + # Walk the text, pulling out a substring for each line. + # text.split('\n') would would temporarily double our memory footprint. + # Modifying text would create many large strings to garbage collect. + lineStart = 0 + lineEnd = -1 + while lineEnd < len(text) - 1: + lineEnd = text.find('\n', lineStart) + if lineEnd == -1: + lineEnd = len(text) - 1 + line = text[lineStart:lineEnd + 1] + lineStart = lineEnd + 1 + + if line in lineHash: + chars.append(chr(lineHash[line])) + else: + lineArray.append(line) + lineHash[line] = len(lineArray) - 1 + chars.append(chr(len(lineArray) - 1)) + return "".join(chars) + + chars1 = diff_linesToCharsMunge(text1) + chars2 = diff_linesToCharsMunge(text2) + return (chars1, chars2, lineArray) + + def diff_charsToLines(self, diffs, lineArray): + """Rehydrate the text in a diff from a string of line hashes to real lines + of text. + + Args: + diffs: Array of diff tuples. + lineArray: Array of unique strings. + """ + for x in range(len(diffs)): + text = [] + for char in diffs[x][1]: + text.append(lineArray[ord(char)]) + diffs[x] = (diffs[x][0], "".join(text)) + + def diff_commonPrefix(self, text1, text2): + """Determine the common prefix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the start of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[0] != text2[0]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerstart = 0 + while pointermin < pointermid: + if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: + pointermin = pointermid + pointerstart = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonSuffix(self, text1, text2): + """Determine the common suffix of two strings. + + Args: + text1: First string. + text2: Second string. + + Returns: + The number of characters common to the end of each string. + """ + # Quick check for common null cases. + if not text1 or not text2 or text1[-1] != text2[-1]: + return 0 + # Binary search. + # Performance analysis: http://neil.fraser.name/news/2007/10/09/ + pointermin = 0 + pointermax = min(len(text1), len(text2)) + pointermid = pointermax + pointerend = 0 + while pointermin < pointermid: + if (text1[-pointermid:len(text1) - pointerend] == + text2[-pointermid:len(text2) - pointerend]): + pointermin = pointermid + pointerend = pointermin + else: + pointermax = pointermid + pointermid = (pointermax - pointermin) // 2 + pointermin + return pointermid + + def diff_commonOverlap(self, text1, text2): + """Determine if the suffix of one string is the prefix of another. + + Args: + text1 First string. + text2 Second string. + + Returns: + The number of characters common to the end of the first + string and the start of the second string. + """ + # Cache the text lengths to prevent multiple calls. + text1_length = len(text1) + text2_length = len(text2) + # Eliminate the null case. + if text1_length == 0 or text2_length == 0: + return 0 + # Truncate the longer string. + if text1_length > text2_length: + text1 = text1[-text2_length:] + elif text1_length < text2_length: + text2 = text2[:text1_length] + text_length = min(text1_length, text2_length) + # Quick check for the worst case. + if text1 == text2: + return text_length + + # Start by looking for a single character match + # and increase length until no match is found. + # Performance analysis: http://neil.fraser.name/news/2010/11/04/ + best = 0 + length = 1 + while True: + pattern = text1[-length:] + found = text2.find(pattern) + if found == -1: + return best + length += found + if found == 0 or text1[-length:] == text2[:length]: + best = length + length += 1 + + def diff_halfMatch(self, text1, text2): + """Do the two texts share a substring which is at least half the length of + the longer text? + This speedup can produce non-minimal diffs. + + Args: + text1: First string. + text2: Second string. + + Returns: + Five element Array, containing the prefix of text1, the suffix of text1, + the prefix of text2, the suffix of text2 and the common middle. Or None + if there was no match. + """ + if self.Diff_Timeout <= 0: + # Don't risk returning a non-optimal diff if we have unlimited time. + return None + if len(text1) > len(text2): + (longtext, shorttext) = (text1, text2) + else: + (shorttext, longtext) = (text1, text2) + if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): + return None # Pointless. + + def diff_halfMatchI(longtext, shorttext, i): + """Does a substring of shorttext exist within longtext such that the + substring is at least half the length of longtext? + Closure, but does not reference any external variables. + + Args: + longtext: Longer string. + shorttext: Shorter string. + i: Start index of quarter length substring within longtext. + + Returns: + Five element Array, containing the prefix of longtext, the suffix of + longtext, the prefix of shorttext, the suffix of shorttext and the + common middle. Or None if there was no match. + """ + seed = longtext[i:i + len(longtext) // 4] + best_common = '' + j = shorttext.find(seed) + while j != -1: + prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) + suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) + if len(best_common) < suffixLength + prefixLength: + best_common = (shorttext[j - suffixLength:j] + + shorttext[j:j + prefixLength]) + best_longtext_a = longtext[:i - suffixLength] + best_longtext_b = longtext[i + prefixLength:] + best_shorttext_a = shorttext[:j - suffixLength] + best_shorttext_b = shorttext[j + prefixLength:] + j = shorttext.find(seed, j + 1) + + if len(best_common) * 2 >= len(longtext): + return (best_longtext_a, best_longtext_b, + best_shorttext_a, best_shorttext_b, best_common) + else: + return None + + # First check if the second quarter is the seed for a half-match. + hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) + # Check again based on the third quarter. + hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) + if not hm1 and not hm2: + return None + elif not hm2: + hm = hm1 + elif not hm1: + hm = hm2 + else: + # Both matched. Select the longest. + if len(hm1[4]) > len(hm2[4]): + hm = hm1 + else: + hm = hm2 + + # A half-match was found, sort out the return data. + if len(text1) > len(text2): + (text1_a, text1_b, text2_a, text2_b, mid_common) = hm + else: + (text2_a, text2_b, text1_a, text1_b, mid_common) = hm + return (text1_a, text1_b, text2_a, text2_b, mid_common) + + def diff_cleanupSemantic(self, diffs): + """Reduce the number of edits by eliminating semantically trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + # Number of chars that changed prior to the equality. + length_insertions1, length_deletions1 = 0, 0 + # Number of chars that changed after the equality. + length_insertions2, length_deletions2 = 0, 0 + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + equalities.append(pointer) + length_insertions1, length_insertions2 = length_insertions2, 0 + length_deletions1, length_deletions2 = length_deletions2, 0 + lastequality = diffs[pointer][1] + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_INSERT: + length_insertions2 += len(diffs[pointer][1]) + else: + length_deletions2 += len(diffs[pointer][1]) + # Eliminate an equality that is smaller or equal to the edits on both + # sides of it. + if (lastequality and (len(lastequality) <= + max(length_insertions1, length_deletions1)) and + (len(lastequality) <= max(length_insertions2, length_deletions2))): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + # Throw away the equality we just deleted. + equalities.pop() + # Throw away the previous equality (it needs to be reevaluated). + if len(equalities): + equalities.pop() + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + # Reset the counters. + length_insertions1, length_deletions1 = 0, 0 + length_insertions2, length_deletions2 = 0, 0 + lastequality = None + changes = True + pointer += 1 + + # Normalize the diff. + if changes: + self.diff_cleanupMerge(diffs) + self.diff_cleanupSemanticLossless(diffs) + + # Find any overlaps between deletions and insertions. + # e.g: abcxxxxxxdef + # -> abcxxxdef + # e.g: xxxabcdefxxx + # -> defxxxabc + # Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1 + while pointer < len(diffs): + if (diffs[pointer - 1][0] == self.DIFF_DELETE and + diffs[pointer][0] == self.DIFF_INSERT): + deletion = diffs[pointer - 1][1] + insertion = diffs[pointer][1] + overlap_length1 = self.diff_commonOverlap(deletion, insertion) + overlap_length2 = self.diff_commonOverlap(insertion, deletion) + if overlap_length1 >= overlap_length2: + if (overlap_length1 >= len(deletion) / 2.0 or + overlap_length1 >= len(insertion) / 2.0): + # Overlap found. Insert an equality and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, + insertion[:overlap_length1])) + diffs[pointer - 1] = (self.DIFF_DELETE, + deletion[:len(deletion) - overlap_length1]) + diffs[pointer + 1] = (self.DIFF_INSERT, + insertion[overlap_length1:]) + pointer += 1 + else: + if (overlap_length2 >= len(deletion) / 2.0 or + overlap_length2 >= len(insertion) / 2.0): + # Reverse overlap found. + # Insert an equality and swap and trim the surrounding edits. + diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) + diffs[pointer - 1] = (self.DIFF_INSERT, + insertion[:len(insertion) - overlap_length2]) + diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) + pointer += 1 + pointer += 1 + pointer += 1 + + def diff_cleanupSemanticLossless(self, diffs): + """Look for single edits surrounded on both sides by equalities + which can be shifted sideways to align the edit to a word boundary. + e.g: The cat came. -> The cat came. + + Args: + diffs: Array of diff tuples. + """ + + def diff_cleanupSemanticScore(one, two): + """Given two strings, compute a score representing whether the + internal boundary falls on logical boundaries. + Scores range from 6 (best) to 0 (worst). + Closure, but does not reference any external variables. + + Args: + one: First string. + two: Second string. + + Returns: + The score. + """ + if not one or not two: + # Edges are the best. + return 6 + + # Each port of this function behaves slightly differently due to + # subtle differences in each language's definition of things like + # 'whitespace'. Since this function's purpose is largely cosmetic, + # the choice has been made to use each language's native features + # rather than force total conformity. + char1 = one[-1] + char2 = two[0] + nonAlphaNumeric1 = not char1.isalnum() + nonAlphaNumeric2 = not char2.isalnum() + whitespace1 = nonAlphaNumeric1 and char1.isspace() + whitespace2 = nonAlphaNumeric2 and char2.isspace() + lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") + lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") + blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) + blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) + + if blankLine1 or blankLine2: + # Five points for blank lines. + return 5 + elif lineBreak1 or lineBreak2: + # Four points for line breaks. + return 4 + elif nonAlphaNumeric1 and not whitespace1 and whitespace2: + # Three points for end of sentences. + return 3 + elif whitespace1 or whitespace2: + # Two points for whitespace. + return 2 + elif nonAlphaNumeric1 or nonAlphaNumeric2: + # One point for non-alphanumeric. + return 1 + return 0 + + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + equality1 = diffs[pointer - 1][1] + edit = diffs[pointer][1] + equality2 = diffs[pointer + 1][1] + + # First, shift the edit as far left as possible. + commonOffset = self.diff_commonSuffix(equality1, edit) + if commonOffset: + commonString = edit[-commonOffset:] + equality1 = equality1[:-commonOffset] + edit = commonString + edit[:-commonOffset] + equality2 = commonString + equality2 + + # Second, step character by character right, looking for the best fit. + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + bestScore = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + while edit and equality2 and edit[0] == equality2[0]: + equality1 += edit[0] + edit = edit[1:] + equality2[0] + equality2 = equality2[1:] + score = (diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2)) + # The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore: + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + + if diffs[pointer - 1][1] != bestEquality1: + # We have an improvement, save it back to the diff. + if bestEquality1: + diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) + else: + del diffs[pointer - 1] + pointer -= 1 + diffs[pointer] = (diffs[pointer][0], bestEdit) + if bestEquality2: + diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) + else: + del diffs[pointer + 1] + pointer -= 1 + pointer += 1 + + # Define some regex patterns for matching boundaries. + BLANKLINEEND = re.compile(r"\n\r?\n$"); + BLANKLINESTART = re.compile(r"^\r?\n\r?\n"); + + def diff_cleanupEfficiency(self, diffs): + """Reduce the number of edits by eliminating operationally trivial + equalities. + + Args: + diffs: Array of diff tuples. + """ + changes = False + equalities = [] # Stack of indices where equalities are found. + lastequality = None # Always equal to diffs[equalities[-1]][1] + pointer = 0 # Index of current position. + pre_ins = False # Is there an insertion operation before the last equality. + pre_del = False # Is there a deletion operation before the last equality. + post_ins = False # Is there an insertion operation after the last equality. + post_del = False # Is there a deletion operation after the last equality. + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. + if (len(diffs[pointer][1]) < self.Diff_EditCost and + (post_ins or post_del)): + # Candidate found. + equalities.append(pointer) + pre_ins = post_ins + pre_del = post_del + lastequality = diffs[pointer][1] + else: + # Not a candidate, and can never become one. + equalities = [] + lastequality = None + + post_ins = post_del = False + else: # An insertion or deletion. + if diffs[pointer][0] == self.DIFF_DELETE: + post_del = True + else: + post_ins = True + + # Five types to be split: + # ABXYCD + # AXCD + # ABXC + # AXCD + # ABXC + + if lastequality and ((pre_ins and pre_del and post_ins and post_del) or + ((len(lastequality) < self.Diff_EditCost / 2) and + (pre_ins + pre_del + post_ins + post_del) == 3)): + # Duplicate record. + diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality)) + # Change second copy to insert. + diffs[equalities[-1] + 1] = (self.DIFF_INSERT, + diffs[equalities[-1] + 1][1]) + equalities.pop() # Throw away the equality we just deleted. + lastequality = None + if pre_ins and pre_del: + # No changes made which could affect previous entry, keep going. + post_ins = post_del = True + equalities = [] + else: + if len(equalities): + equalities.pop() # Throw away the previous equality. + if len(equalities): + pointer = equalities[-1] + else: + pointer = -1 + post_ins = post_del = False + changes = True + pointer += 1 + + if changes: + self.diff_cleanupMerge(diffs) + + def diff_cleanupMerge(self, diffs): + """Reorder and merge like edit sections. Merge equalities. + Any edit section can move as long as it doesn't cross an equality. + + Args: + diffs: Array of diff tuples. + """ + diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. + pointer = 0 + count_delete = 0 + count_insert = 0 + text_delete = '' + text_insert = '' + while pointer < len(diffs): + if diffs[pointer][0] == self.DIFF_INSERT: + count_insert += 1 + text_insert += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_DELETE: + count_delete += 1 + text_delete += diffs[pointer][1] + pointer += 1 + elif diffs[pointer][0] == self.DIFF_EQUAL: + # Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1: + if count_delete != 0 and count_insert != 0: + # Factor out any common prefixies. + commonlength = self.diff_commonPrefix(text_insert, text_delete) + if commonlength != 0: + x = pointer - count_delete - count_insert - 1 + if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: + diffs[x] = (diffs[x][0], diffs[x][1] + + text_insert[:commonlength]) + else: + diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) + pointer += 1 + text_insert = text_insert[commonlength:] + text_delete = text_delete[commonlength:] + # Factor out any common suffixies. + commonlength = self.diff_commonSuffix(text_insert, text_delete) + if commonlength != 0: + diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + + diffs[pointer][1]) + text_insert = text_insert[:-commonlength] + text_delete = text_delete[:-commonlength] + # Delete the offending records and add the merged ones. + if count_delete == 0: + diffs[pointer - count_insert : pointer] = [ + (self.DIFF_INSERT, text_insert)] + elif count_insert == 0: + diffs[pointer - count_delete : pointer] = [ + (self.DIFF_DELETE, text_delete)] + else: + diffs[pointer - count_delete - count_insert : pointer] = [ + (self.DIFF_DELETE, text_delete), + (self.DIFF_INSERT, text_insert)] + pointer = pointer - count_delete - count_insert + 1 + if count_delete != 0: + pointer += 1 + if count_insert != 0: + pointer += 1 + elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: + # Merge this equality with the previous one. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer][1]) + del diffs[pointer] + else: + pointer += 1 + + count_insert = 0 + count_delete = 0 + text_delete = '' + text_insert = '' + + if diffs[-1][1] == '': + diffs.pop() # Remove the dummy entry at the end. + + # Second pass: look for single edits surrounded on both sides by equalities + # which can be shifted sideways to eliminate an equality. + # e.g: ABAC -> ABAC + changes = False + pointer = 1 + # Intentionally ignore the first and last element (don't need checking). + while pointer < len(diffs) - 1: + if (diffs[pointer - 1][0] == self.DIFF_EQUAL and + diffs[pointer + 1][0] == self.DIFF_EQUAL): + # This is a single edit surrounded by equalities. + if diffs[pointer][1].endswith(diffs[pointer - 1][1]): + # Shift the edit over the previous equality. + diffs[pointer] = (diffs[pointer][0], + diffs[pointer - 1][1] + + diffs[pointer][1][:-len(diffs[pointer - 1][1])]) + diffs[pointer + 1] = (diffs[pointer + 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + del diffs[pointer - 1] + changes = True + elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): + # Shift the edit over the next equality. + diffs[pointer - 1] = (diffs[pointer - 1][0], + diffs[pointer - 1][1] + diffs[pointer + 1][1]) + diffs[pointer] = (diffs[pointer][0], + diffs[pointer][1][len(diffs[pointer + 1][1]):] + + diffs[pointer + 1][1]) + del diffs[pointer + 1] + changes = True + pointer += 1 + + # If shifts were made, the diff needs reordering and another shift sweep. + if changes: + self.diff_cleanupMerge(diffs) + + def diff_xIndex(self, diffs, loc): + """loc is a location in text1, compute and return the equivalent location + in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 + + Args: + diffs: Array of diff tuples. + loc: Location within text1. + + Returns: + Location within text2. + """ + chars1 = 0 + chars2 = 0 + last_chars1 = 0 + last_chars2 = 0 + for x in range(len(diffs)): + (op, text) = diffs[x] + if op != self.DIFF_INSERT: # Equality or deletion. + chars1 += len(text) + if op != self.DIFF_DELETE: # Equality or insertion. + chars2 += len(text) + if chars1 > loc: # Overshot the location. + break + last_chars1 = chars1 + last_chars2 = chars2 + + if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: + # The location was deleted. + return last_chars2 + # Add the remaining len(character). + return last_chars2 + (loc - last_chars1) + + def diff_prettyHtml(self, diffs): + """Convert a diff array into a pretty HTML report. + + Args: + diffs: Array of diff tuples. + + Returns: + HTML representation. + """ + html = [] + for (op, data) in diffs: + text = (data.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶
    ")) + if op == self.DIFF_INSERT: + html.append("%s" % text) + elif op == self.DIFF_DELETE: + html.append("%s" % text) + elif op == self.DIFF_EQUAL: + html.append("%s" % text) + return "".join(html) + + def diff_text1(self, diffs): + """Compute and return the source text (all equalities and deletions). + + Args: + diffs: Array of diff tuples. + + Returns: + Source text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_INSERT: + text.append(data) + return "".join(text) + + def diff_text2(self, diffs): + """Compute and return the destination text (all equalities and insertions). + + Args: + diffs: Array of diff tuples. + + Returns: + Destination text. + """ + text = [] + for (op, data) in diffs: + if op != self.DIFF_DELETE: + text.append(data) + return "".join(text) + + def diff_levenshtein(self, diffs): + """Compute the Levenshtein distance; the number of inserted, deleted or + substituted characters. + + Args: + diffs: Array of diff tuples. + + Returns: + Number of changes. + """ + levenshtein = 0 + insertions = 0 + deletions = 0 + for (op, data) in diffs: + if op == self.DIFF_INSERT: + insertions += len(data) + elif op == self.DIFF_DELETE: + deletions += len(data) + elif op == self.DIFF_EQUAL: + # A deletion and an insertion is one substitution. + levenshtein += max(insertions, deletions) + insertions = 0 + deletions = 0 + levenshtein += max(insertions, deletions) + return levenshtein + + def diff_toDelta(self, diffs): + """Crush the diff into an encoded string which describes the operations + required to transform text1 into text2. + E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + Operations are tab-separated. Inserted text is escaped using %xx notation. + + Args: + diffs: Array of diff tuples. + + Returns: + Delta text. + """ + text = [] + for (op, data) in diffs: + if op == self.DIFF_INSERT: + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# ")) + elif op == self.DIFF_DELETE: + text.append("-%d" % len(data)) + elif op == self.DIFF_EQUAL: + text.append("=%d" % len(data)) + return "\t".join(text) + + def diff_fromDelta(self, text1, delta): + """Given the original text1, and an encoded string which describes the + operations required to transform text1 into text2, compute the full diff. + + Args: + text1: Source string for the diff. + delta: Delta text. + + Returns: + Array of diff tuples. + + Raises: + ValueError: If invalid input. + """ + diffs = [] + pointer = 0 # Cursor in text1 + tokens = delta.split("\t") + for token in tokens: + if token == "": + # Blank tokens are ok (from a trailing \t). + continue + # Each token begins with a one character parameter which specifies the + # operation of this token (delete, insert, equality). + param = token[1:] + if token[0] == "+": + param = urllib.parse.unquote(param) + diffs.append((self.DIFF_INSERT, param)) + elif token[0] == "-" or token[0] == "=": + try: + n = int(param) + except ValueError: + raise ValueError("Invalid number in diff_fromDelta: " + param) + if n < 0: + raise ValueError("Negative number in diff_fromDelta: " + param) + text = text1[pointer : pointer + n] + pointer += n + if token[0] == "=": + diffs.append((self.DIFF_EQUAL, text)) + else: + diffs.append((self.DIFF_DELETE, text)) + else: + # Anything else is an error. + raise ValueError("Invalid diff operation in diff_fromDelta: " + + token[0]) + if pointer != len(text1): + raise ValueError( + "Delta length (%d) does not equal source text length (%d)." % + (pointer, len(text1))) + return diffs + + # MATCH FUNCTIONS + + def match_main(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc'. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Check for null inputs. + if text == None or pattern == None: + raise ValueError("Null inputs. (match_main)") + + loc = max(0, min(loc, len(text))) + if text == pattern: + # Shortcut (potentially not guaranteed by the algorithm) + return 0 + elif not text: + # Nothing to match. + return -1 + elif text[loc:loc + len(pattern)] == pattern: + # Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else: + # Do a fuzzy compare. + match = self.match_bitap(text, pattern, loc) + return match + + def match_bitap(self, text, pattern, loc): + """Locate the best instance of 'pattern' in 'text' near 'loc' using the + Bitap algorithm. + + Args: + text: The text to search. + pattern: The pattern to search for. + loc: The location to search around. + + Returns: + Best match index or -1. + """ + # Python doesn't have a maxint limit, so ignore this check. + #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: + # raise ValueError("Pattern too long for this application.") + + # Initialise the alphabet. + s = self.match_alphabet(pattern) + + def match_bitapScore(e, x): + """Compute and return the score for a match with e errors and x location. + Accesses loc and pattern through being a closure. + + Args: + e: Number of errors in match. + x: Location of match. + + Returns: + Overall score for match (0.0 = good, 1.0 = bad). + """ + accuracy = float(e) / len(pattern) + proximity = abs(loc - x) + if not self.Match_Distance: + # Dodge divide by zero error. + return proximity and 1.0 or accuracy + return accuracy + (proximity / float(self.Match_Distance)) + + # Highest score beyond which we give up. + score_threshold = self.Match_Threshold + # Is there a nearby exact match? (speedup) + best_loc = text.find(pattern, loc) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + # What about in the other direction? (speedup) + best_loc = text.rfind(pattern, loc + len(pattern)) + if best_loc != -1: + score_threshold = min(match_bitapScore(0, best_loc), score_threshold) + + # Initialise the bit arrays. + matchmask = 1 << (len(pattern) - 1) + best_loc = -1 + + bin_max = len(pattern) + len(text) + # Empty initialization added to appease pychecker. + last_rd = None + for d in range(len(pattern)): + # Scan for the best match each iteration allows for one more error. + # Run a binary search to determine how far from 'loc' we can stray at + # this error level. + bin_min = 0 + bin_mid = bin_max + while bin_min < bin_mid: + if match_bitapScore(d, loc + bin_mid) <= score_threshold: + bin_min = bin_mid + else: + bin_max = bin_mid + bin_mid = (bin_max - bin_min) // 2 + bin_min + + # Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + start = max(1, loc - bin_mid + 1) + finish = min(loc + bin_mid, len(text)) + len(pattern) + + rd = [0] * (finish + 2) + rd[finish + 1] = (1 << d) - 1 + for j in range(finish, start - 1, -1): + if len(text) <= j - 1: + # Out of range. + charMatch = 0 + else: + charMatch = s.get(text[j - 1], 0) + if d == 0: # First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch + else: # Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( + ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] + if rd[j] & matchmask: + score = match_bitapScore(d, j - 1) + # This match will almost certainly be better than any existing match. + # But check anyway. + if score <= score_threshold: + # Told you so. + score_threshold = score + best_loc = j - 1 + if best_loc > loc: + # When passing loc, don't exceed our current distance from loc. + start = max(1, 2 * loc - best_loc) + else: + # Already passed loc, downhill from here on in. + break + # No hope for a (better) match at greater error levels. + if match_bitapScore(d + 1, loc) > score_threshold: + break + last_rd = rd + return best_loc + + def match_alphabet(self, pattern): + """Initialise the alphabet for the Bitap algorithm. + + Args: + pattern: The text to encode. + + Returns: + Hash of character locations. + """ + s = {} + for char in pattern: + s[char] = 0 + for i in range(len(pattern)): + s[pattern[i]] |= 1 << (len(pattern) - i - 1) + return s + + # PATCH FUNCTIONS + + def patch_addContext(self, patch, text): + """Increase the context until it is unique, + but don't let the pattern expand beyond Match_MaxBits. + + Args: + patch: The patch to grow. + text: Source text. + """ + if len(text) == 0: + return + pattern = text[patch.start2 : patch.start2 + patch.length1] + padding = 0 + + # Look for the first and last matches of pattern in text. If two different + # matches are found, increase the pattern length. + while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == + 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - + self.Patch_Margin)): + padding += self.Patch_Margin + pattern = text[max(0, patch.start2 - padding) : + patch.start2 + patch.length1 + padding] + # Add one chunk for good luck. + padding += self.Patch_Margin + + # Add the prefix. + prefix = text[max(0, patch.start2 - padding) : patch.start2] + if prefix: + patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] + # Add the suffix. + suffix = text[patch.start2 + patch.length1 : + patch.start2 + patch.length1 + padding] + if suffix: + patch.diffs.append((self.DIFF_EQUAL, suffix)) + + # Roll back the start points. + patch.start1 -= len(prefix) + patch.start2 -= len(prefix) + # Extend lengths. + patch.length1 += len(prefix) + len(suffix) + patch.length2 += len(prefix) + len(suffix) + + def patch_make(self, a, b=None, c=None): + """Compute a list of patches to turn text1 into text2. + Use diffs if provided, otherwise compute it ourselves. + There are four ways to call this function, depending on what data is + available to the caller: + Method 1: + a = text1, b = text2 + Method 2: + a = diffs + Method 3 (optimal): + a = text1, b = diffs + Method 4 (deprecated, use method 3): + a = text1, b = text2, c = diffs + + Args: + a: text1 (methods 1,3,4) or Array of diff tuples for text1 to + text2 (method 2). + b: text2 (methods 1,4) or Array of diff tuples for text1 to + text2 (method 3) or undefined (method 2). + c: Array of diff tuples for text1 to text2 (method 4) or + undefined (methods 1,2,3). + + Returns: + Array of Patch objects. + """ + text1 = None + diffs = None + if isinstance(a, str) and isinstance(b, str) and c is None: + # Method 1: text1, text2 + # Compute diffs from text1 and text2. + text1 = a + diffs = self.diff_main(text1, b, True) + if len(diffs) > 2: + self.diff_cleanupSemantic(diffs) + self.diff_cleanupEfficiency(diffs) + elif isinstance(a, list) and b is None and c is None: + # Method 2: diffs + # Compute text1 from diffs. + diffs = a + text1 = self.diff_text1(diffs) + elif isinstance(a, str) and isinstance(b, list) and c is None: + # Method 3: text1, diffs + text1 = a + diffs = b + elif (isinstance(a, str) and isinstance(b, str) and + isinstance(c, list)): + # Method 4: text1, text2, diffs + # text2 is not used. + text1 = a + diffs = c + else: + raise ValueError("Unknown call format to patch_make.") + + if not diffs: + return [] # Get rid of the None case. + patches = [] + patch = patch_obj() + char_count1 = 0 # Number of characters into the text1 string. + char_count2 = 0 # Number of characters into the text2 string. + prepatch_text = text1 # Recreate the patches to determine context info. + postpatch_text = text1 + for x in range(len(diffs)): + (diff_type, diff_text) = diffs[x] + if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: + # A new patch starts here. + patch.start1 = char_count1 + patch.start2 = char_count2 + if diff_type == self.DIFF_INSERT: + # Insertion + patch.diffs.append(diffs[x]) + patch.length2 += len(diff_text) + postpatch_text = (postpatch_text[:char_count2] + diff_text + + postpatch_text[char_count2:]) + elif diff_type == self.DIFF_DELETE: + # Deletion. + patch.length1 += len(diff_text) + patch.diffs.append(diffs[x]) + postpatch_text = (postpatch_text[:char_count2] + + postpatch_text[char_count2 + len(diff_text):]) + elif (diff_type == self.DIFF_EQUAL and + len(diff_text) <= 2 * self.Patch_Margin and + len(patch.diffs) != 0 and len(diffs) != x + 1): + # Small equality inside a patch. + patch.diffs.append(diffs[x]) + patch.length1 += len(diff_text) + patch.length2 += len(diff_text) + + if (diff_type == self.DIFF_EQUAL and + len(diff_text) >= 2 * self.Patch_Margin): + # Time for a new patch. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + patch = patch_obj() + # Unlike Unidiff, our patch lists have a rolling context. + # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + # Update prepatch text & pos to reflect the application of the + # just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + + # Update the current character count. + if diff_type != self.DIFF_INSERT: + char_count1 += len(diff_text) + if diff_type != self.DIFF_DELETE: + char_count2 += len(diff_text) + + # Pick up the leftover patch if not empty. + if len(patch.diffs) != 0: + self.patch_addContext(patch, prepatch_text) + patches.append(patch) + return patches + + def patch_deepCopy(self, patches): + """Given an array of patches, return another array that is identical. + + Args: + patches: Array of Patch objects. + + Returns: + Array of Patch objects. + """ + patchesCopy = [] + for patch in patches: + patchCopy = patch_obj() + # No need to deep copy the tuples since they are immutable. + patchCopy.diffs = patch.diffs[:] + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy.append(patchCopy) + return patchesCopy + + def patch_apply(self, patches, text): + """Merge a set of patches onto the text. Return a patched text, as well + as a list of true/false values indicating which patches were applied. + + Args: + patches: Array of Patch objects. + text: Old text. + + Returns: + Two element Array, containing the new text and an array of boolean values. + """ + if not patches: + return (text, []) + + # Deep copy the patches so that no changes are made to originals. + patches = self.patch_deepCopy(patches) + + nullPadding = self.patch_addPadding(patches) + text = nullPadding + text + nullPadding + self.patch_splitMax(patches) + + # delta keeps track of the offset between the expected and actual location + # of the previous patch. If there are patches expected at positions 10 and + # 20, but the first patch was found at 12, delta is 2 and the second patch + # has an effective expected position of 22. + delta = 0 + results = [] + for patch in patches: + expected_loc = patch.start2 + delta + text1 = self.diff_text1(patch.diffs) + end_loc = -1 + if len(text1) > self.Match_MaxBits: + # patch_splitMax will only provide an oversized pattern in the case of + # a monster delete. + start_loc = self.match_main(text, text1[:self.Match_MaxBits], + expected_loc) + if start_loc != -1: + end_loc = self.match_main(text, text1[-self.Match_MaxBits:], + expected_loc + len(text1) - self.Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc: + # Can't find valid trailing context. Drop this patch. + start_loc = -1 + else: + start_loc = self.match_main(text, text1, expected_loc) + if start_loc == -1: + # No match found. :( + results.append(False) + # Subtract the delta for this failed patch from subsequent patches. + delta -= patch.length2 - patch.length1 + else: + # Found a match. :) + results.append(True) + delta = start_loc - expected_loc + if end_loc == -1: + text2 = text[start_loc : start_loc + len(text1)] + else: + text2 = text[start_loc : end_loc + self.Match_MaxBits] + if text1 == text2: + # Perfect match, just shove the replacement text in. + text = (text[:start_loc] + self.diff_text2(patch.diffs) + + text[start_loc + len(text1):]) + else: + # Imperfect match. + # Run a diff to get a framework of equivalent indices. + diffs = self.diff_main(text1, text2, False) + if (len(text1) > self.Match_MaxBits and + self.diff_levenshtein(diffs) / float(len(text1)) > + self.Patch_DeleteThreshold): + # The end points match, but the content is unacceptably bad. + results[-1] = False + else: + self.diff_cleanupSemanticLossless(diffs) + index1 = 0 + for (op, data) in patch.diffs: + if op != self.DIFF_EQUAL: + index2 = self.diff_xIndex(diffs, index1) + if op == self.DIFF_INSERT: # Insertion + text = text[:start_loc + index2] + data + text[start_loc + + index2:] + elif op == self.DIFF_DELETE: # Deletion + text = text[:start_loc + index2] + text[start_loc + + self.diff_xIndex(diffs, index1 + len(data)):] + if op != self.DIFF_DELETE: + index1 += len(data) + # Strip the padding off. + text = text[len(nullPadding):-len(nullPadding)] + return (text, results) + + def patch_addPadding(self, patches): + """Add some padding on text start and end so that edges can match + something. Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + + Returns: + The padding string added to each side. + """ + paddingLength = self.Patch_Margin + nullPadding = "" + for x in range(1, paddingLength + 1): + nullPadding += chr(x) + + # Bump all the patches forward. + for patch in patches: + patch.start1 += paddingLength + patch.start2 += paddingLength + + # Add some padding on start of first diff. + patch = patches[0] + diffs = patch.diffs + if not diffs or diffs[0][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) + patch.start1 -= paddingLength # Should be 0. + patch.start2 -= paddingLength # Should be 0. + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[0][1]): + # Grow first equality. + extraLength = paddingLength - len(diffs[0][1]) + newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] + diffs[0] = (diffs[0][0], newText) + patch.start1 -= extraLength + patch.start2 -= extraLength + patch.length1 += extraLength + patch.length2 += extraLength + + # Add some padding on end of last diff. + patch = patches[-1] + diffs = patch.diffs + if not diffs or diffs[-1][0] != self.DIFF_EQUAL: + # Add nullPadding equality. + diffs.append((self.DIFF_EQUAL, nullPadding)) + patch.length1 += paddingLength + patch.length2 += paddingLength + elif paddingLength > len(diffs[-1][1]): + # Grow last equality. + extraLength = paddingLength - len(diffs[-1][1]) + newText = diffs[-1][1] + nullPadding[:extraLength] + diffs[-1] = (diffs[-1][0], newText) + patch.length1 += extraLength + patch.length2 += extraLength + + return nullPadding + + def patch_splitMax(self, patches): + """Look through the patches and break up any which are longer than the + maximum limit of the match algorithm. + Intended to be called only from within patch_apply. + + Args: + patches: Array of Patch objects. + """ + patch_size = self.Match_MaxBits + if patch_size == 0: + # Python has the option of not splitting strings due to its ability + # to handle integers of arbitrary precision. + return + for x in range(len(patches)): + if patches[x].length1 <= patch_size: + continue + bigpatch = patches[x] + # Remove the big old patch. + del patches[x] + x -= 1 + start1 = bigpatch.start1 + start2 = bigpatch.start2 + precontext = '' + while len(bigpatch.diffs) != 0: + # Create one of several smaller patches. + patch = patch_obj() + empty = True + patch.start1 = start1 - len(precontext) + patch.start2 = start2 - len(precontext) + if precontext: + patch.length1 = patch.length2 = len(precontext) + patch.diffs.append((self.DIFF_EQUAL, precontext)) + + while (len(bigpatch.diffs) != 0 and + patch.length1 < patch_size - self.Patch_Margin): + (diff_type, diff_text) = bigpatch.diffs[0] + if diff_type == self.DIFF_INSERT: + # Insertions are harmless. + patch.length2 += len(diff_text) + start2 += len(diff_text) + patch.diffs.append(bigpatch.diffs.pop(0)) + empty = False + elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and + patch.diffs[0][0] == self.DIFF_EQUAL and + len(diff_text) > 2 * patch_size): + # This is a large deletion. Let it pass in one chunk. + patch.length1 += len(diff_text) + start1 += len(diff_text) + empty = False + patch.diffs.append((diff_type, diff_text)) + del bigpatch.diffs[0] + else: + # Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text[:patch_size - patch.length1 - + self.Patch_Margin] + patch.length1 += len(diff_text) + start1 += len(diff_text) + if diff_type == self.DIFF_EQUAL: + patch.length2 += len(diff_text) + start2 += len(diff_text) + else: + empty = False + + patch.diffs.append((diff_type, diff_text)) + if diff_text == bigpatch.diffs[0][1]: + del bigpatch.diffs[0] + else: + bigpatch.diffs[0] = (bigpatch.diffs[0][0], + bigpatch.diffs[0][1][len(diff_text):]) + + # Compute the head context for the next patch. + precontext = self.diff_text2(patch.diffs) + precontext = precontext[-self.Patch_Margin:] + # Append the end context for this patch. + postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] + if postcontext: + patch.length1 += len(postcontext) + patch.length2 += len(postcontext) + if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: + patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + + postcontext) + else: + patch.diffs.append((self.DIFF_EQUAL, postcontext)) + + if not empty: + x += 1 + patches.insert(x, patch) + + def patch_toText(self, patches): + """Take a list of patches and return a textual representation. + + Args: + patches: Array of Patch objects. + + Returns: + Text representation of patches. + """ + text = [] + for patch in patches: + text.append(str(patch)) + return "".join(text) + + def patch_fromText(self, textline): + """Parse a textual representation of patches and return a list of patch + objects. + + Args: + textline: Text representation of patches. + + Returns: + Array of Patch objects. + + Raises: + ValueError: If invalid input. + """ + patches = [] + if not textline: + return patches + text = textline.split('\n') + while len(text) != 0: + m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) + if not m: + raise ValueError("Invalid patch string: " + text[0]) + patch = patch_obj() + patches.append(patch) + patch.start1 = int(m.group(1)) + if m.group(2) == '': + patch.start1 -= 1 + patch.length1 = 1 + elif m.group(2) == '0': + patch.length1 = 0 + else: + patch.start1 -= 1 + patch.length1 = int(m.group(2)) + + patch.start2 = int(m.group(3)) + if m.group(4) == '': + patch.start2 -= 1 + patch.length2 = 1 + elif m.group(4) == '0': + patch.length2 = 0 + else: + patch.start2 -= 1 + patch.length2 = int(m.group(4)) + + del text[0] + + while len(text) != 0: + if text[0]: + sign = text[0][0] + else: + sign = '' + line = urllib.parse.unquote(text[0][1:]) + if sign == '+': + # Insertion. + patch.diffs.append((self.DIFF_INSERT, line)) + elif sign == '-': + # Deletion. + patch.diffs.append((self.DIFF_DELETE, line)) + elif sign == ' ': + # Minor equality. + patch.diffs.append((self.DIFF_EQUAL, line)) + elif sign == '@': + # Start of next patch. + break + elif sign == '': + # Blank line? Whatever. + pass + else: + # WTF? + raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) + del text[0] + return patches + + +class patch_obj: + """Class representing one patch operation. + """ + + def __init__(self): + """Initializes with an empty list of diffs. + """ + self.diffs = [] + self.start1 = None + self.start2 = None + self.length1 = 0 + self.length2 = 0 + + def __str__(self): + """Emmulate GNU diff's format. + Header: @@ -382,8 +481,9 @@ + Indicies are printed as 1-based, not 0-based. + + Returns: + The GNU diff string. + """ + if self.length1 == 0: + coords1 = str(self.start1) + ",0" + elif self.length1 == 1: + coords1 = str(self.start1 + 1) + else: + coords1 = str(self.start1 + 1) + "," + str(self.length1) + if self.length2 == 0: + coords2 = str(self.start2) + ",0" + elif self.length2 == 1: + coords2 = str(self.start2 + 1) + else: + coords2 = str(self.start2 + 1) + "," + str(self.length2) + text = ["@@ -", coords1, " +", coords2, " @@\n"] + # Escape the body of the patch with %xx notation. + for (op, data) in self.diffs: + if op == diff_match_patch.DIFF_INSERT: + text.append("+") + elif op == diff_match_patch.DIFF_DELETE: + text.append("-") + elif op == diff_match_patch.DIFF_EQUAL: + text.append(" ") + # High ascii will raise UnicodeDecodeError. Use Unicode instead. + data = data.encode("utf-8") + text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") + return "".join(text) diff --git a/python3/diff_match_patch_test.py b/python3/diff_match_patch_test.py new file mode 100644 index 0000000..bd9cf29 --- /dev/null +++ b/python3/diff_match_patch_test.py @@ -0,0 +1,870 @@ +#!/usr/bin/python3 + +"""Diff Match and Patch -- Test harness +Copyright 2018 The diff-match-patch Authors. +https://github.com/google/diff-match-patch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import imp +import sys +import time +import unittest +import diff_match_patch as dmp_module +# Force a module reload. Allows one to edit the DMP module and rerun the tests +# without leaving the Python interpreter. +imp.reload(dmp_module) + +class DiffMatchPatchTest(unittest.TestCase): + + def setUp(self): + "Test harness for dmp_module." + self.dmp = dmp_module.diff_match_patch() + + def diff_rebuildtexts(self, diffs): + # Construct the two texts which made up the diff originally. + text1 = "" + text2 = "" + for x in range(0, len(diffs)): + if diffs[x][0] != dmp_module.diff_match_patch.DIFF_INSERT: + text1 += diffs[x][1] + if diffs[x][0] != dmp_module.diff_match_patch.DIFF_DELETE: + text2 += diffs[x][1] + return (text1, text2) + + +class DiffTest(DiffMatchPatchTest): + """DIFF TEST FUNCTIONS""" + + def testDiffCommonPrefix(self): + # Detect any common prefix. + # Null case. + self.assertEqual(0, self.dmp.diff_commonPrefix("abc", "xyz")) + + # Non-null case. + self.assertEqual(4, self.dmp.diff_commonPrefix("1234abcdef", "1234xyz")) + + # Whole case. + self.assertEqual(4, self.dmp.diff_commonPrefix("1234", "1234xyz")) + + def testDiffCommonSuffix(self): + # Detect any common suffix. + # Null case. + self.assertEqual(0, self.dmp.diff_commonSuffix("abc", "xyz")) + + # Non-null case. + self.assertEqual(4, self.dmp.diff_commonSuffix("abcdef1234", "xyz1234")) + + # Whole case. + self.assertEqual(4, self.dmp.diff_commonSuffix("1234", "xyz1234")) + + def testDiffCommonOverlap(self): + # Null case. + self.assertEqual(0, self.dmp.diff_commonOverlap("", "abcd")) + + # Whole case. + self.assertEqual(3, self.dmp.diff_commonOverlap("abc", "abcd")) + + # No overlap. + self.assertEqual(0, self.dmp.diff_commonOverlap("123456", "abcd")) + + # Overlap. + self.assertEqual(3, self.dmp.diff_commonOverlap("123456xxx", "xxxabcd")) + + # Unicode. + # Some overly clever languages (C#) may treat ligatures as equal to their + # component letters. E.g. U+FB01 == 'fi' + self.assertEqual(0, self.dmp.diff_commonOverlap("fi", "\ufb01i")) + + def testDiffHalfMatch(self): + # Detect a halfmatch. + self.dmp.Diff_Timeout = 1 + # No match. + self.assertEqual(None, self.dmp.diff_halfMatch("1234567890", "abcdef")) + + self.assertEqual(None, self.dmp.diff_halfMatch("12345", "23")) + + # Single Match. + self.assertEqual(("12", "90", "a", "z", "345678"), self.dmp.diff_halfMatch("1234567890", "a345678z")) + + self.assertEqual(("a", "z", "12", "90", "345678"), self.dmp.diff_halfMatch("a345678z", "1234567890")) + + self.assertEqual(("abc", "z", "1234", "0", "56789"), self.dmp.diff_halfMatch("abc56789z", "1234567890")) + + self.assertEqual(("a", "xyz", "1", "7890", "23456"), self.dmp.diff_halfMatch("a23456xyz", "1234567890")) + + # Multiple Matches. + self.assertEqual(("12123", "123121", "a", "z", "1234123451234"), self.dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")) + + self.assertEqual(("", "-=-=-=-=-=", "x", "", "x-=-=-=-=-=-=-="), self.dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")) + + self.assertEqual(("-=-=-=-=-=", "", "", "y", "-=-=-=-=-=-=-=y"), self.dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")) + + # Non-optimal halfmatch. + # Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + self.assertEqual(("qHillo", "w", "x", "Hulloy", "HelloHe"), self.dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")) + + # Optimal no halfmatch. + self.dmp.Diff_Timeout = 0 + self.assertEqual(None, self.dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")) + + def testDiffLinesToChars(self): + # Convert lines down to characters. + self.assertEqual(("\x01\x02\x01", "\x02\x01\x02", ["", "alpha\n", "beta\n"]), self.dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")) + + self.assertEqual(("", "\x01\x02\x03\x03", ["", "alpha\r\n", "beta\r\n", "\r\n"]), self.dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")) + + self.assertEqual(("\x01", "\x02", ["", "a", "b"]), self.dmp.diff_linesToChars("a", "b")) + + # More than 256 to reveal any 8-bit limitations. + n = 300 + lineList = [] + charList = [] + for x in range(1, n + 1): + lineList.append(str(x) + "\n") + charList.append(chr(x)) + self.assertEqual(n, len(lineList)) + lines = "".join(lineList) + chars = "".join(charList) + self.assertEqual(n, len(chars)) + lineList.insert(0, "") + self.assertEqual((chars, "", lineList), self.dmp.diff_linesToChars(lines, "")) + + def testDiffCharsToLines(self): + # Convert chars up to lines. + diffs = [(self.dmp.DIFF_EQUAL, "\x01\x02\x01"), (self.dmp.DIFF_INSERT, "\x02\x01\x02")] + self.dmp.diff_charsToLines(diffs, ["", "alpha\n", "beta\n"]) + self.assertEqual([(self.dmp.DIFF_EQUAL, "alpha\nbeta\nalpha\n"), (self.dmp.DIFF_INSERT, "beta\nalpha\nbeta\n")], diffs) + + # More than 256 to reveal any 8-bit limitations. + n = 300 + lineList = [] + charList = [] + for x in range(1, n + 1): + lineList.append(str(x) + "\n") + charList.append(chr(x)) + self.assertEqual(n, len(lineList)) + lines = "".join(lineList) + chars = "".join(charList) + self.assertEqual(n, len(chars)) + lineList.insert(0, "") + diffs = [(self.dmp.DIFF_DELETE, chars)] + self.dmp.diff_charsToLines(diffs, lineList) + self.assertEqual([(self.dmp.DIFF_DELETE, lines)], diffs) + + def testDiffCleanupMerge(self): + # Cleanup a messy diff. + # Null case. + diffs = [] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([], diffs) + + # No change case. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c")], diffs) + + # Merge equalities. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_EQUAL, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "abc")], diffs) + + # Merge deletions. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_DELETE, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abc")], diffs) + + # Merge insertions. + diffs = [(self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_INSERT, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_INSERT, "abc")], diffs) + + # Merge interweave. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_DELETE, "c"), (self.dmp.DIFF_INSERT, "d"), (self.dmp.DIFF_EQUAL, "e"), (self.dmp.DIFF_EQUAL, "f")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "ac"), (self.dmp.DIFF_INSERT, "bd"), (self.dmp.DIFF_EQUAL, "ef")], diffs) + + # Prefix and suffix detection. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "abc"), (self.dmp.DIFF_DELETE, "dc")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "d"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_EQUAL, "c")], diffs) + + # Prefix and suffix detection with equalities. + diffs = [(self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "abc"), (self.dmp.DIFF_DELETE, "dc"), (self.dmp.DIFF_EQUAL, "y")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "xa"), (self.dmp.DIFF_DELETE, "d"), (self.dmp.DIFF_INSERT, "b"), (self.dmp.DIFF_EQUAL, "cy")], diffs) + + # Slide edit left. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_INSERT, "ba"), (self.dmp.DIFF_EQUAL, "c")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_INSERT, "ab"), (self.dmp.DIFF_EQUAL, "ac")], diffs) + + # Slide edit right. + diffs = [(self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_INSERT, "ab"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "ca"), (self.dmp.DIFF_INSERT, "ba")], diffs) + + # Slide edit left recursive. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_DELETE, "ac"), (self.dmp.DIFF_EQUAL, "x")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "acx")], diffs) + + # Slide edit right recursive. + diffs = [(self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "ca"), (self.dmp.DIFF_EQUAL, "c"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupMerge(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "xca"), (self.dmp.DIFF_DELETE, "cba")], diffs) + + def testDiffCleanupSemanticLossless(self): + # Slide diffs to match logical boundaries. + # Null case. + diffs = [] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([], diffs) + + # Blank lines. + diffs = [(self.dmp.DIFF_EQUAL, "AAA\r\n\r\nBBB"), (self.dmp.DIFF_INSERT, "\r\nDDD\r\n\r\nBBB"), (self.dmp.DIFF_EQUAL, "\r\nEEE")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "AAA\r\n\r\n"), (self.dmp.DIFF_INSERT, "BBB\r\nDDD\r\n\r\n"), (self.dmp.DIFF_EQUAL, "BBB\r\nEEE")], diffs) + + # Line boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "AAA\r\nBBB"), (self.dmp.DIFF_INSERT, " DDD\r\nBBB"), (self.dmp.DIFF_EQUAL, " EEE")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "AAA\r\n"), (self.dmp.DIFF_INSERT, "BBB DDD\r\n"), (self.dmp.DIFF_EQUAL, "BBB EEE")], diffs) + + # Word boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The c"), (self.dmp.DIFF_INSERT, "ow and the c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "The "), (self.dmp.DIFF_INSERT, "cow and the "), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # Alphanumeric boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The-c"), (self.dmp.DIFF_INSERT, "ow-and-the-c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "The-"), (self.dmp.DIFF_INSERT, "cow-and-the-"), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # Hitting the start. + diffs = [(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "ax")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "aax")], diffs) + + # Hitting the end. + diffs = [(self.dmp.DIFF_EQUAL, "xa"), (self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "a")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "xaa"), (self.dmp.DIFF_DELETE, "a")], diffs) + + # Sentence boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The xxx. The "), (self.dmp.DIFF_INSERT, "zzz. The "), (self.dmp.DIFF_EQUAL, "yyy.")] + self.dmp.diff_cleanupSemanticLossless(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "The xxx."), (self.dmp.DIFF_INSERT, " The zzz."), (self.dmp.DIFF_EQUAL, " The yyy.")], diffs) + + def testDiffCleanupSemantic(self): + # Cleanup semantically trivial equalities. + # Null case. + diffs = [] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([], diffs) + + # No elimination #1. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "cd"), (self.dmp.DIFF_EQUAL, "12"), (self.dmp.DIFF_DELETE, "e")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "cd"), (self.dmp.DIFF_EQUAL, "12"), (self.dmp.DIFF_DELETE, "e")], diffs) + + # No elimination #2. + diffs = [(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "ABC"), (self.dmp.DIFF_EQUAL, "1234"), (self.dmp.DIFF_DELETE, "wxyz")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "ABC"), (self.dmp.DIFF_EQUAL, "1234"), (self.dmp.DIFF_DELETE, "wxyz")], diffs) + + # Simple elimination. + diffs = [(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "c")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "b")], diffs) + + # Backpass elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_EQUAL, "cd"), (self.dmp.DIFF_DELETE, "e"), (self.dmp.DIFF_EQUAL, "f"), (self.dmp.DIFF_INSERT, "g")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abcdef"), (self.dmp.DIFF_INSERT, "cdfg")], diffs) + + # Multiple eliminations. + diffs = [(self.dmp.DIFF_INSERT, "1"), (self.dmp.DIFF_EQUAL, "A"), (self.dmp.DIFF_DELETE, "B"), (self.dmp.DIFF_INSERT, "2"), (self.dmp.DIFF_EQUAL, "_"), (self.dmp.DIFF_INSERT, "1"), (self.dmp.DIFF_EQUAL, "A"), (self.dmp.DIFF_DELETE, "B"), (self.dmp.DIFF_INSERT, "2")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "AB_AB"), (self.dmp.DIFF_INSERT, "1A2_1A2")], diffs) + + # Word boundaries. + diffs = [(self.dmp.DIFF_EQUAL, "The c"), (self.dmp.DIFF_DELETE, "ow and the c"), (self.dmp.DIFF_EQUAL, "at.")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_EQUAL, "The "), (self.dmp.DIFF_DELETE, "cow and the "), (self.dmp.DIFF_EQUAL, "cat.")], diffs) + + # No overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "abcxx"), (self.dmp.DIFF_INSERT, "xxdef")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abcxx"), (self.dmp.DIFF_INSERT, "xxdef")], diffs) + + # Overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "abcxxx"), (self.dmp.DIFF_INSERT, "xxxdef")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_INSERT, "def")], diffs) + + # Reverse overlap elimination. + diffs = [(self.dmp.DIFF_DELETE, "xxxabc"), (self.dmp.DIFF_INSERT, "defxxx")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_INSERT, "def"), (self.dmp.DIFF_EQUAL, "xxx"), (self.dmp.DIFF_DELETE, "abc")], diffs) + + # Two overlap eliminations. + diffs = [(self.dmp.DIFF_DELETE, "abcd1212"), (self.dmp.DIFF_INSERT, "1212efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A3"), (self.dmp.DIFF_INSERT, "3BC")] + self.dmp.diff_cleanupSemantic(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abcd"), (self.dmp.DIFF_EQUAL, "1212"), (self.dmp.DIFF_INSERT, "efghi"), (self.dmp.DIFF_EQUAL, "----"), (self.dmp.DIFF_DELETE, "A"), (self.dmp.DIFF_EQUAL, "3"), (self.dmp.DIFF_INSERT, "BC")], diffs) + + def testDiffCleanupEfficiency(self): + # Cleanup operationally trivial equalities. + self.dmp.Diff_EditCost = 4 + # Null case. + diffs = [] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([], diffs) + + # No elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")], diffs) + + # Four-edit elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abxyzcd"), (self.dmp.DIFF_INSERT, "12xyz34")], diffs) + + # Three-edit elimination. + diffs = [(self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "xcd"), (self.dmp.DIFF_INSERT, "12x34")], diffs) + + # Backpass elimination. + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "xy"), (self.dmp.DIFF_INSERT, "34"), (self.dmp.DIFF_EQUAL, "z"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "56")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abxyzcd"), (self.dmp.DIFF_INSERT, "12xy34z56")], diffs) + + # High cost elimination. + self.dmp.Diff_EditCost = 5 + diffs = [(self.dmp.DIFF_DELETE, "ab"), (self.dmp.DIFF_INSERT, "12"), (self.dmp.DIFF_EQUAL, "wxyz"), (self.dmp.DIFF_DELETE, "cd"), (self.dmp.DIFF_INSERT, "34")] + self.dmp.diff_cleanupEfficiency(diffs) + self.assertEqual([(self.dmp.DIFF_DELETE, "abwxyzcd"), (self.dmp.DIFF_INSERT, "12wxyz34")], diffs) + self.dmp.Diff_EditCost = 4 + + def testDiffPrettyHtml(self): + # Pretty print. + diffs = [(self.dmp.DIFF_EQUAL, "a\n"), (self.dmp.DIFF_DELETE, "b"), (self.dmp.DIFF_INSERT, "c&d")] + self.assertEqual("
    <B>b</B>c&d", self.dmp.diff_prettyHtml(diffs)) + + def testDiffText(self): + # Compute the source and destination texts. + diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, " lazy")] + self.assertEqual("jumps over the lazy", self.dmp.diff_text1(diffs)) + + self.assertEqual("jumped over a lazy", self.dmp.diff_text2(diffs)) + + def testDiffDelta(self): + # Convert a diff into delta string. + diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, " lazy"), (self.dmp.DIFF_INSERT, "old dog")] + text1 = self.dmp.diff_text1(diffs) + self.assertEqual("jumps over the lazy", text1) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEqual("=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta) + + # Convert delta string into a diff. + self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta)) + + # Generates error (19 != 20). + try: + self.dmp.diff_fromDelta(text1 + "x", delta) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + # Generates error (19 != 18). + try: + self.dmp.diff_fromDelta(text1[1:], delta) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + # Generates error (%c3%xy invalid Unicode). + # Note: Python 3 can decode this. + #try: + # self.dmp.diff_fromDelta("", "+%c3xy") + # self.assertFalse(True) + #except ValueError: + # # Exception expected. + # pass + + # Test deltas with special characters. + diffs = [(self.dmp.DIFF_EQUAL, "\u0680 \x00 \t %"), (self.dmp.DIFF_DELETE, "\u0681 \x01 \n ^"), (self.dmp.DIFF_INSERT, "\u0682 \x02 \\ |")] + text1 = self.dmp.diff_text1(diffs) + self.assertEqual("\u0680 \x00 \t %\u0681 \x01 \n ^", text1) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEqual("=7\t-7\t+%DA%82 %02 %5C %7C", delta) + + # Convert delta string into a diff. + self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta)) + + # Verify pool of unchanged characters. + diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")] + text2 = self.dmp.diff_text2(diffs) + self.assertEqual("A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2) + + delta = self.dmp.diff_toDelta(diffs) + self.assertEqual("+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta) + + # Convert delta string into a diff. + self.assertEqual(diffs, self.dmp.diff_fromDelta("", delta)) + + def testDiffXIndex(self): + # Translate a location in text1 to text2. + self.assertEqual(5, self.dmp.diff_xIndex([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "1234"), (self.dmp.DIFF_EQUAL, "xyz")], 2)) + + # Translation on deletion. + self.assertEqual(1, self.dmp.diff_xIndex([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "1234"), (self.dmp.DIFF_EQUAL, "xyz")], 3)) + + def testDiffLevenshtein(self): + # Levenshtein with trailing equality. + self.assertEqual(4, self.dmp.diff_levenshtein([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "1234"), (self.dmp.DIFF_EQUAL, "xyz")])) + # Levenshtein with leading equality. + self.assertEqual(4, self.dmp.diff_levenshtein([(self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_INSERT, "1234")])) + # Levenshtein with middle equality. + self.assertEqual(7, self.dmp.diff_levenshtein([(self.dmp.DIFF_DELETE, "abc"), (self.dmp.DIFF_EQUAL, "xyz"), (self.dmp.DIFF_INSERT, "1234")])) + + def testDiffBisect(self): + # Normal. + a = "cat" + b = "map" + # Since the resulting diff hasn't been normalized, it would be ok if + # the insertion and deletion pairs are swapped. + # If the order changes, tweak this test as required. + self.assertEqual([(self.dmp.DIFF_DELETE, "c"), (self.dmp.DIFF_INSERT, "m"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "t"), (self.dmp.DIFF_INSERT, "p")], self.dmp.diff_bisect(a, b, sys.maxsize)) + + # Timeout. + self.assertEqual([(self.dmp.DIFF_DELETE, "cat"), (self.dmp.DIFF_INSERT, "map")], self.dmp.diff_bisect(a, b, 0)) + + def testDiffMain(self): + # Perform a trivial diff. + # Null case. + self.assertEqual([], self.dmp.diff_main("", "", False)) + + # Equality. + self.assertEqual([(self.dmp.DIFF_EQUAL, "abc")], self.dmp.diff_main("abc", "abc", False)) + + # Simple insertion. + self.assertEqual([(self.dmp.DIFF_EQUAL, "ab"), (self.dmp.DIFF_INSERT, "123"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("abc", "ab123c", False)) + + # Simple deletion. + self.assertEqual([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "123"), (self.dmp.DIFF_EQUAL, "bc")], self.dmp.diff_main("a123bc", "abc", False)) + + # Two insertions. + self.assertEqual([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_INSERT, "123"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_INSERT, "456"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("abc", "a123b456c", False)) + + # Two deletions. + self.assertEqual([(self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "123"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "456"), (self.dmp.DIFF_EQUAL, "c")], self.dmp.diff_main("a123b456c", "abc", False)) + + # Perform a real diff. + # Switch off the timeout. + self.dmp.Diff_Timeout = 0 + # Simple cases. + self.assertEqual([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "b")], self.dmp.diff_main("a", "b", False)) + + self.assertEqual([(self.dmp.DIFF_DELETE, "Apple"), (self.dmp.DIFF_INSERT, "Banana"), (self.dmp.DIFF_EQUAL, "s are a"), (self.dmp.DIFF_INSERT, "lso"), (self.dmp.DIFF_EQUAL, " fruit.")], self.dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", False)) + + self.assertEqual([(self.dmp.DIFF_DELETE, "a"), (self.dmp.DIFF_INSERT, "\u0680"), (self.dmp.DIFF_EQUAL, "x"), (self.dmp.DIFF_DELETE, "\t"), (self.dmp.DIFF_INSERT, "\x00")], self.dmp.diff_main("ax\t", "\u0680x\x00", False)) + + # Overlaps. + self.assertEqual([(self.dmp.DIFF_DELETE, "1"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "y"), (self.dmp.DIFF_EQUAL, "b"), (self.dmp.DIFF_DELETE, "2"), (self.dmp.DIFF_INSERT, "xab")], self.dmp.diff_main("1ayb2", "abxab", False)) + + self.assertEqual([(self.dmp.DIFF_INSERT, "xaxcx"), (self.dmp.DIFF_EQUAL, "abc"), (self.dmp.DIFF_DELETE, "y")], self.dmp.diff_main("abcy", "xaxcxabc", False)) + + self.assertEqual([(self.dmp.DIFF_DELETE, "ABCD"), (self.dmp.DIFF_EQUAL, "a"), (self.dmp.DIFF_DELETE, "="), (self.dmp.DIFF_INSERT, "-"), (self.dmp.DIFF_EQUAL, "bcd"), (self.dmp.DIFF_DELETE, "="), (self.dmp.DIFF_INSERT, "-"), (self.dmp.DIFF_EQUAL, "efghijklmnopqrs"), (self.dmp.DIFF_DELETE, "EFGHIJKLMNOefg")], self.dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", False)) + + # Large equality. + self.assertEqual([(self.dmp.DIFF_INSERT, " "), (self.dmp.DIFF_EQUAL,"a"), (self.dmp.DIFF_INSERT,"nd"), (self.dmp.DIFF_EQUAL," [[Pennsylvania]]"), (self.dmp.DIFF_DELETE," and [[New")], self.dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", False)) + + # Timeout. + self.dmp.Diff_Timeout = 0.1 # 100ms + a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" + b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" + # Increase the text lengths by 1024 times to ensure a timeout. + for x in range(10): + a = a + a + b = b + b + startTime = time.time() + self.dmp.diff_main(a, b) + endTime = time.time() + # Test that we took at least the timeout period. + self.assertTrue(self.dmp.Diff_Timeout <= endTime - startTime) + # Test that we didn't take forever (be forgiving). + # Theoretically this test could fail very occasionally if the + # OS task swaps or locks up for a second at the wrong moment. + self.assertTrue(self.dmp.Diff_Timeout * 2 > endTime - startTime) + self.dmp.Diff_Timeout = 0 + + # Test the linemode speedup. + # Must be long to pass the 100 char cutoff. + # Simple line-mode. + a = "1234567890\n" * 13 + b = "abcdefghij\n" * 13 + self.assertEqual(self.dmp.diff_main(a, b, False), self.dmp.diff_main(a, b, True)) + + # Single line-mode. + a = "1234567890" * 13 + b = "abcdefghij" * 13 + self.assertEqual(self.dmp.diff_main(a, b, False), self.dmp.diff_main(a, b, True)) + + # Overlap line-mode. + a = "1234567890\n" * 13 + b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n" + texts_linemode = self.diff_rebuildtexts(self.dmp.diff_main(a, b, True)) + texts_textmode = self.diff_rebuildtexts(self.dmp.diff_main(a, b, False)) + self.assertEqual(texts_textmode, texts_linemode) + + # Test null inputs. + try: + self.dmp.diff_main(None, None) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + +class MatchTest(DiffMatchPatchTest): + """MATCH TEST FUNCTIONS""" + + def testMatchAlphabet(self): + # Initialise the bitmasks for Bitap. + self.assertEqual({"a":4, "b":2, "c":1}, self.dmp.match_alphabet("abc")) + + self.assertEqual({"a":37, "b":18, "c":8}, self.dmp.match_alphabet("abcaba")) + + def testMatchBitap(self): + self.dmp.Match_Distance = 100 + self.dmp.Match_Threshold = 0.5 + # Exact matches. + self.assertEqual(5, self.dmp.match_bitap("abcdefghijk", "fgh", 5)) + + self.assertEqual(5, self.dmp.match_bitap("abcdefghijk", "fgh", 0)) + + # Fuzzy matches. + self.assertEqual(4, self.dmp.match_bitap("abcdefghijk", "efxhi", 0)) + + self.assertEqual(2, self.dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)) + + self.assertEqual(-1, self.dmp.match_bitap("abcdefghijk", "bxy", 1)) + + # Overflow. + self.assertEqual(2, self.dmp.match_bitap("123456789xx0", "3456789x0", 2)) + + self.assertEqual(0, self.dmp.match_bitap("abcdef", "xxabc", 4)) + + self.assertEqual(3, self.dmp.match_bitap("abcdef", "defyy", 4)) + + self.assertEqual(0, self.dmp.match_bitap("abcdef", "xabcdefy", 0)) + + # Threshold test. + self.dmp.Match_Threshold = 0.4 + self.assertEqual(4, self.dmp.match_bitap("abcdefghijk", "efxyhi", 1)) + + self.dmp.Match_Threshold = 0.3 + self.assertEqual(-1, self.dmp.match_bitap("abcdefghijk", "efxyhi", 1)) + + self.dmp.Match_Threshold = 0.0 + self.assertEqual(1, self.dmp.match_bitap("abcdefghijk", "bcdef", 1)) + self.dmp.Match_Threshold = 0.5 + + # Multiple select. + self.assertEqual(0, self.dmp.match_bitap("abcdexyzabcde", "abccde", 3)) + + self.assertEqual(8, self.dmp.match_bitap("abcdexyzabcde", "abccde", 5)) + + # Distance test. + self.dmp.Match_Distance = 10 # Strict location. + self.assertEqual(-1, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)) + + self.assertEqual(0, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)) + + self.dmp.Match_Distance = 1000 # Loose location. + self.assertEqual(0, self.dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)) + + + def testMatchMain(self): + # Full match. + # Shortcut matches. + self.assertEqual(0, self.dmp.match_main("abcdef", "abcdef", 1000)) + + self.assertEqual(-1, self.dmp.match_main("", "abcdef", 1)) + + self.assertEqual(3, self.dmp.match_main("abcdef", "", 3)) + + self.assertEqual(3, self.dmp.match_main("abcdef", "de", 3)) + + self.assertEqual(3, self.dmp.match_main("abcdef", "defy", 4)) + + self.assertEqual(0, self.dmp.match_main("abcdef", "abcdefy", 0)) + + # Complex match. + self.dmp.Match_Threshold = 0.7 + self.assertEqual(4, self.dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)) + self.dmp.Match_Threshold = 0.5 + + # Test null inputs. + try: + self.dmp.match_main(None, None, 0) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + +class PatchTest(DiffMatchPatchTest): + """PATCH TEST FUNCTIONS""" + + def testPatchObj(self): + # Patch Object. + p = dmp_module.patch_obj() + p.start1 = 20 + p.start2 = 21 + p.length1 = 18 + p.length2 = 17 + p.diffs = [(self.dmp.DIFF_EQUAL, "jump"), (self.dmp.DIFF_DELETE, "s"), (self.dmp.DIFF_INSERT, "ed"), (self.dmp.DIFF_EQUAL, " over "), (self.dmp.DIFF_DELETE, "the"), (self.dmp.DIFF_INSERT, "a"), (self.dmp.DIFF_EQUAL, "\nlaz")] + strp = str(p) + self.assertEqual("@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n", strp) + + def testPatchFromText(self): + self.assertEqual([], self.dmp.patch_fromText("")) + + strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n" + self.assertEqual(strp, str(self.dmp.patch_fromText(strp)[0])) + + self.assertEqual("@@ -1 +1 @@\n-a\n+b\n", str(self.dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0])) + + self.assertEqual("@@ -1,3 +0,0 @@\n-abc\n", str(self.dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n")[0])) + + self.assertEqual("@@ -0,0 +1,3 @@\n+abc\n", str(self.dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n")[0])) + + # Generates error. + try: + self.dmp.patch_fromText("Bad\nPatch\n") + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + def testPatchToText(self): + strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n" + p = self.dmp.patch_fromText(strp) + self.assertEqual(strp, self.dmp.patch_toText(p)) + + strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n" + p = self.dmp.patch_fromText(strp) + self.assertEqual(strp, self.dmp.patch_toText(p)) + + def testPatchAddContext(self): + self.dmp.Patch_Margin = 4 + p = self.dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog.") + self.assertEqual("@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", str(p)) + + # Same, but not enough trailing context. + p = self.dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps.") + self.assertEqual("@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", str(p)) + + # Same, but not enough leading context. + p = self.dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps.") + self.assertEqual("@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", str(p)) + + # Same, but with ambiguity. + p = self.dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0] + self.dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes.") + self.assertEqual("@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", str(p)) + + def testPatchMake(self): + # Null case. + patches = self.dmp.patch_make("", "") + self.assertEqual("", self.dmp.patch_toText(patches)) + + text1 = "The quick brown fox jumps over the lazy dog." + text2 = "That quick brown fox jumped over a lazy dog." + # Text2+Text1 inputs. + expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n" + # The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = self.dmp.patch_make(text2, text1) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Text2 inputs. + expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n" + patches = self.dmp.patch_make(text1, text2) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Diff input. + diffs = self.dmp.diff_main(text1, text2, False) + patches = self.dmp.patch_make(diffs) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Diff inputs. + patches = self.dmp.patch_make(text1, diffs) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Text1+Text2+Diff inputs (deprecated). + patches = self.dmp.patch_make(text1, text2, diffs) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Character encoding. + patches = self.dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?") + self.assertEqual("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", self.dmp.patch_toText(patches)) + + # Character decoding. + diffs = [(self.dmp.DIFF_DELETE, "`1234567890-=[]\\;',./"), (self.dmp.DIFF_INSERT, "~!@#$%^&*()_+{}|:\"<>?")] + self.assertEqual(diffs, self.dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0].diffs) + + # Long string with repeats. + text1 = "" + for x in range(100): + text1 += "abcdef" + text2 = text1 + "123" + expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n" + patches = self.dmp.patch_make(text1, text2) + self.assertEqual(expectedPatch, self.dmp.patch_toText(patches)) + + # Test null inputs. + try: + self.dmp.patch_make(None, None) + self.assertFalse(True) + except ValueError: + # Exception expected. + pass + + def testPatchSplitMax(self): + # Assumes that Match_MaxBits is 32. + patches = self.dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0") + self.dmp.patch_splitMax(patches) + self.assertEqual("@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz") + oldToText = self.dmp.patch_toText(patches) + self.dmp.patch_splitMax(patches) + self.assertEqual(oldToText, self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc") + self.dmp.patch_splitMax(patches) + self.assertEqual("@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", self.dmp.patch_toText(patches)) + + patches = self.dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1") + self.dmp.patch_splitMax(patches) + self.assertEqual("@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", self.dmp.patch_toText(patches)) + + def testPatchAddPadding(self): + # Both edges full. + patches = self.dmp.patch_make("", "test") + self.assertEqual("@@ -0,0 +1,4 @@\n+test\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEqual("@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", self.dmp.patch_toText(patches)) + + # Both edges partial. + patches = self.dmp.patch_make("XY", "XtestY") + self.assertEqual("@@ -1,2 +1,6 @@\n X\n+test\n Y\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEqual("@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", self.dmp.patch_toText(patches)) + + # Both edges none. + patches = self.dmp.patch_make("XXXXYYYY", "XXXXtestYYYY") + self.assertEqual("@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", self.dmp.patch_toText(patches)) + self.dmp.patch_addPadding(patches) + self.assertEqual("@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", self.dmp.patch_toText(patches)) + + def testPatchApply(self): + self.dmp.Match_Distance = 1000 + self.dmp.Match_Threshold = 0.5 + self.dmp.Patch_DeleteThreshold = 0.5 + # Null case. + patches = self.dmp.patch_make("", "") + results = self.dmp.patch_apply(patches, "Hello world.") + self.assertEqual(("Hello world.", []), results) + + # Exact match. + patches = self.dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog.") + results = self.dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog.") + self.assertEqual(("That quick brown fox jumped over a lazy dog.", [True, True]), results) + + # Partial match. + results = self.dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger.") + self.assertEqual(("That quick red rabbit jumped over a tired tiger.", [True, True]), results) + + # Failed match. + results = self.dmp.patch_apply(patches, "I am the very model of a modern major general.") + self.assertEqual(("I am the very model of a modern major general.", [False, False]), results) + + # Big delete, small change. + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y") + self.assertEqual(("xabcy", [True, True]), results) + + # Big delete, big change 1. + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y") + self.assertEqual(("xabc12345678901234567890---------------++++++++++---------------12345678901234567890y", [False, True]), results) + + # Big delete, big change 2. + self.dmp.Patch_DeleteThreshold = 0.6 + patches = self.dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy") + results = self.dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y") + self.assertEqual(("xabcy", [True, True]), results) + self.dmp.Patch_DeleteThreshold = 0.5 + + # Compensate for failed patch. + self.dmp.Match_Threshold = 0.0 + self.dmp.Match_Distance = 0 + patches = self.dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890") + results = self.dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890") + self.assertEqual(("ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890", [False, True]), results) + self.dmp.Match_Threshold = 0.5 + self.dmp.Match_Distance = 1000 + + # No side effects. + patches = self.dmp.patch_make("", "test") + patchstr = self.dmp.patch_toText(patches) + results = self.dmp.patch_apply(patches, "") + self.assertEqual(patchstr, self.dmp.patch_toText(patches)) + + # No side effects with major delete. + patches = self.dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof") + patchstr = self.dmp.patch_toText(patches) + self.dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog.") + self.assertEqual(patchstr, self.dmp.patch_toText(patches)) + + # Edge exact match. + patches = self.dmp.patch_make("", "test") + self.dmp.patch_apply(patches, "") + self.assertEqual(("test", [True]), results) + + # Near edge exact match. + patches = self.dmp.patch_make("XY", "XtestY") + results = self.dmp.patch_apply(patches, "XY") + self.assertEqual(("XtestY", [True]), results) + + # Edge partial match. + patches = self.dmp.patch_make("y", "y123") + results = self.dmp.patch_apply(patches, "x") + self.assertEqual(("x123", [True]), results) + + +if __name__ == "__main__": + unittest.main()