codrsquad · thatch · Jul 15, 2025 · Jul 11, 2025 · Jul 15, 2025 · jag250
diff --git a/markdown/ick.toml b/markdown/ick.toml
@@ -0,0 +1,5 @@
+[[rule]]
+impl = "python"
+name = "long_links"
+deps = ["tree-sitter", "tree-sitter-markdown"]
+inputs = ["*.md"]
diff --git a/markdown/long_links.py b/markdown/long_links.py
@@ -0,0 +1,71 @@
+import sys
+from pathlib import Path
+
+from tree_sitter_markdown import language, inline_language
+from tree_sitter import Parser, Language
+
+block_language = Language(language())
+block_parser = Parser(block_language)
+
+link_reference_def = block_language.query("(link_reference_definition) @node")
+section = block_language.query("(section) @node")
+inline = block_language.query("(inline) @node")
+
+inline_language = Language(inline_language())
+inline_parser = Parser(inline_language)
+inline_link = inline_language.query("(inline_link) @node")
+
+def node_matches(query, node):
+    for idx, match in query.matches(node):
+        yield match["node"][0]
+
+def child_for_type(node, typ):
+    for c in node.children:
+        if c.type == typ:
+            return c
+    raise IndexError(typ)
+
+def main(filenames: list[str]) -> int:
+    exit_code = 0
+    for f in filenames:
+        edits = []
+        link_references = {}
+        lines_to_add = []
+
+        buf = Path(f).read_bytes()
+        tree = block_parser.parse(buf)
+        # .root_node = document
+        # .children[] = section
+        for node in node_matches(link_reference_def, tree.root_node):
+            link_references[child_for_type(node, "link_label").text] = child_for_type(node, "link_destination").text
+
+        for inline_node in node_matches(inline, tree.root_node):
+            inline_tree = inline_parser.parse(inline_node.text)
+
+            for link in node_matches(inline_link, inline_tree.root_node):
+                dest = child_for_type(link, "link_destination")
+                if len(dest.text) > 30:
+                    link_text = b"[" + child_for_type(link, "link_text").text + b"]"
+                    if link_text not in link_references:
+                        link_references[link_text] = dest.text
+                        # TODO link_text might have newlines, should replace with single space?
+                        lines_to_add.append(link_text + b": " + dest.text)
+                    edits.append((link.start_byte + inline_node.start_byte, link.end_byte + inline_node.start_byte, link_text + b"[]"))
+
+        if edits:
+            for i, j, new_bytes in sorted(edits, reverse=True):
+                buf = buf[:i] + new_bytes + buf[j:]
+
+            while not buf.endswith(b"\n\n"):
+                buf += b"\n"
+
+            for line_to_add in lines_to_add:
+                buf += line_to_add + b"\n"
+
+            # print(buf.decode())
+            Path(f).write_bytes(buf)
+
+    return exit_code
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/markdown/tests/long_links/ast_types/input/README.md b/markdown/tests/long_links/ast_types/input/README.md
@@ -0,0 +1 @@
+See [ast-types](https://github.com/benjamn/ast-types) (especially the [def/core.js](https://github.com/benjamn/ast-types/blob/master/def/core.js)) module for a thorough overview of the `ast` api.
diff --git a/markdown/tests/long_links/ast_types/output/README.md b/markdown/tests/long_links/ast_types/output/README.md
@@ -0,0 +1,4 @@
+See [ast-types][] (especially the [def/core.js][]) module for a thorough overview of the `ast` api.
+
+[ast-types]: https://github.com/benjamn/ast-types
+[def/core.js]: https://github.com/benjamn/ast-types/blob/master/def/core.js
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		See [ast-types](https://github.com/benjamn/ast-types) (especially the [def/core.js](https://github.com/benjamn/ast-types/blob/master/def/core.js)) module for a thorough overview of the `ast` api.