ENH: Stop printing false positive differences when logging cached nodes (#3376)

oesteban · effigies · web-flow · commit c0d450e16cbc · 2021-10-13T15:41:15.000-04:00
* enh: cast lists into tuples when printing inputs diffs

* fix: correctly deal with dictionaries, insert ellipsis for very long diffs

* sty: fix some style errors

* Apply suggestions from code review

Co-authored-by: Chris Markiewicz &lt;effigies@gmail.com&gt;

* enh: apply comments from review

Co-authored-by: Chris Markiewicz &lt;effigies@gmail.com&gt;

* TEST: Thoroughly test dict_diff()

* FIX: Correct call of indent; drop compatibility shim

Co-authored-by: Chris Markiewicz &lt;effigies@gmail.com&gt;
Co-authored-by: Christopher J. Markiewicz &lt;markiewicz@stanford.edu&gt;
diff --git a/nipype/utils/misc.py b/nipype/utils/misc.py
@@ -13,16 +13,7 @@
 
 import numpy as np
 
-try:
-    from textwrap import indent as textwrap_indent
-except ImportError:
-
-    def textwrap_indent(text, prefix):
-        """A textwrap.indent replacement for Python < 3.3"""
-        if not prefix:
-            return text
-        splittext = text.splitlines(True)
-        return prefix + prefix.join(splittext)
+import textwrap
 
 
 def human_order_sorted(l):
@@ -296,12 +287,16 @@ def dict_diff(dold, dnew, indent=0):
 
     typical use -- log difference for hashed_inputs
     """
-    # First check inputs, since they usually are lists of tuples
-    # and dicts are required.
-    if isinstance(dnew, list):
-        dnew = dict(dnew)
-    if isinstance(dold, list):
-        dold = dict(dold)
+    try:
+        dnew, dold = dict(dnew), dict(dold)
+    except Exception:
+        return textwrap.indent(
+            f"""\
+Diff between nipype inputs failed:
+* Cached inputs: {dold}
+* New inputs: {dnew}""",
+            " " * indent,
+        )
 
     # Compare against hashed_inputs
     # Keys: should rarely differ
@@ -321,26 +316,36 @@ def dict_diff(dold, dnew, indent=0):
 
     diffkeys = len(diff)
 
+    def _shorten(value):
+        if isinstance(value, str) and len(value) > 50:
+            return f"{value[:10]}...{value[-10:]}"
+        if isinstance(value, (tuple, list)) and len(value) > 10:
+            return tuple(list(value[:2]) + ["..."] + list(value[-2:]))
+        return value
+
+    def _uniformize(val):
+        if isinstance(val, dict):
+            return {k: _uniformize(v) for k, v in val.items()}
+        if isinstance(val, (list, tuple)):
+            return tuple(_uniformize(el) for el in val)
+        return val
+
     # Values in common keys would differ quite often,
     # so we need to join the messages together
     for k in new_keys.intersection(old_keys):
-        try:
-            new, old = dnew[k], dold[k]
-            same = new == old
-            if not same:
-                # Since JSON does not discriminate between lists and
-                # tuples, we might need to cast them into the same type
-                # as the last resort.  And lets try to be more generic
-                same = old.__class__(new) == old
-        except Exception:
-            same = False
-        if not same:
-            diff += ["  * %s: %r != %r" % (k, dnew[k], dold[k])]
+        # Reading from JSON produces lists, but internally we typically
+        # use tuples. At this point these dictionary values can be
+        # immutable (and therefore the preference for tuple).
+        new = _uniformize(dnew[k])
+        old = _uniformize(dold[k])
+
+        if new != old:
+            diff += ["  * %s: %r != %r" % (k, _shorten(new), _shorten(old))]
 
     if len(diff) > diffkeys:
         diff.insert(diffkeys, "Some dictionary entries had differing values:")
 
-    return textwrap_indent("\n".join(diff), " " * indent)
+    return textwrap.indent("\n".join(diff), " " * indent)
 
 
 def rgetcwd(error=True):
diff --git a/nipype/utils/tests/test_misc.py b/nipype/utils/tests/test_misc.py
@@ -6,7 +6,13 @@
 
 import pytest
 
-from nipype.utils.misc import container_to_string, str2bool, flatten, unflatten
+from nipype.utils.misc import (
+    container_to_string,
+    str2bool,
+    flatten,
+    unflatten,
+    dict_diff,
+)
 
 
 def test_cont_to_str():
@@ -95,3 +101,43 @@ def test_rgetcwd(monkeypatch, tmpdir):
     monkeypatch.delenv("PWD")
     with pytest.raises(OSError):
         rgetcwd(error=False)
+
+
+def test_dict_diff():
+    abtuple = [("a", "b")]
+    abdict = dict(abtuple)
+
+    # Unchanged
+    assert dict_diff(abdict, abdict) == ""
+    assert dict_diff(abdict, abtuple) == ""
+    assert dict_diff(abtuple, abdict) == ""
+    assert dict_diff(abtuple, abtuple) == ""
+
+    # Changed keys
+    diff = dict_diff({"a": "b"}, {"b": "a"})
+    assert "Dictionaries had differing keys" in diff
+    assert "keys not previously seen: {'b'}" in diff
+    assert "keys not presently seen: {'a'}" in diff
+
+    # Trigger recursive uniformization
+    complicated_val1 = [{"a": ["b"], "c": ("d", "e")}]
+    complicated_val2 = [{"a": ["x"], "c": ("d", "e")}]
+    uniformized_val1 = ({"a": ("b",), "c": ("d", "e")},)
+    uniformized_val2 = ({"a": ("x",), "c": ("d", "e")},)
+
+    diff = dict_diff({"a": complicated_val1}, {"a": complicated_val2})
+    assert "Some dictionary entries had differing values:" in diff
+    assert "a: {!r} != {!r}".format(uniformized_val2, uniformized_val1) in diff
+
+    # Trigger shortening
+    diff = dict_diff({"a": "b" * 60}, {"a": "c" * 70})
+    assert "Some dictionary entries had differing values:" in diff
+    assert "a: 'cccccccccc...cccccccccc' != 'bbbbbbbbbb...bbbbbbbbbb'" in diff
+
+    # Fail the dict conversion
+    diff = dict_diff({}, "not a dict")
+    assert diff == (
+        "Diff between nipype inputs failed:\n"
+        "* Cached inputs: {}\n"
+        "* New inputs: not a dict"
+    )