From c5c6f0dcc872f9662047f14f86dcc8c7ac1bd9c5 Mon Sep 17 00:00:00 2001 From: Nellie McKesson Date: Sat, 25 Feb 2023 17:37:46 -0800 Subject: [PATCH] fixes for links --- jekyll-assets/css/style.css | 1 - scripts/doxygen_json_mappings/h1.json | 29 +-------- scripts/transform_doxygen_html.py | 89 ++++++++++++++++++++------- 3 files changed, 71 insertions(+), 48 deletions(-) diff --git a/jekyll-assets/css/style.css b/jekyll-assets/css/style.css index fd42c94cd4..902acb82d1 100644 --- a/jekyll-assets/css/style.css +++ b/jekyll-assets/css/style.css @@ -615,7 +615,6 @@ td.paramname { ul.memberdecls { list-style-type: none; padding-left: 0; - border-bottom: 1px solid black; } ul.memberdecls li.memitem { diff --git a/scripts/doxygen_json_mappings/h1.json b/scripts/doxygen_json_mappings/h1.json index 549fa73d93..6453e2f316 100644 --- a/scripts/doxygen_json_mappings/h1.json +++ b/scripts/doxygen_json_mappings/h1.json @@ -6,37 +6,14 @@ "attributes": [], "parents": [], "children": [], - "child_mappings": [ - { - "input": { - "description": "Link anchor", - "element": "a", - "attributes": [ - { - "name": "class", - "value": ["anchor"] - } - ], - "parents": [], - "children": [] - }, - "output": { - "tree": [] - } - } - ] + "child_mappings": [] }, "output": { "tree": [ { "position": 0, - "element": "p", - "attributes": [ - { - "name": "class", - "value": ["adoc-h2"] - } - ], + "element": "h2", + "attributes": [], "children": [] } ] diff --git a/scripts/transform_doxygen_html.py b/scripts/transform_doxygen_html.py index 3abccff248..72e8a5a2f7 100755 --- a/scripts/transform_doxygen_html.py +++ b/scripts/transform_doxygen_html.py @@ -7,6 +7,7 @@ import random import string import copy +import hashlib from lxml import etree @@ -33,12 +34,22 @@ def write_output(filepath, content): f.close() return -def add_ids(root): +def make_hash(string): + hash_object = hashlib.sha1(bytes(string, 'utf-8')) + new_hash = hash_object.hexdigest() + if len(new_hash) > 20: + new_hash = new_hash[:20] + return new_hash + +def add_ids(root, html_file): els = root.xpath(".//body//*[not(@id)]") + counter = 0 for el in els: - newid = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(8)]) - newid = "p" + newid + hash_string = str(counter)+html_file+''.join(get_all_text(el)) + newid = make_hash(hash_string) + newid = "rpip" + newid el.set("id", newid) + counter += 1 return root def strip_attribute(att, root): @@ -212,6 +223,27 @@ def transform_element(item, root, is_child=False): print("ERROR: ", e, exc_tb.tb_lineno) return root +def fix_duplicate_ids(root, html_file): + try: + existing = [] + matches = root.xpath(".//*[contains(@id, 'rpip')]") + counter = 0 + for match in matches: + myid = match.get("id") + if myid in existing: + id_string = str(counter)+html_file+''.join(get_all_text(match)) + newid = make_hash(id_string) + newid = "rpip"+newid + match.set("id", newid) + existing.append(newid) + counter += 1 + else: + existing.append(myid) + except Exception as e: + exc_type, exc_obj, exc_tb = sys.exc_info() + print("ERROR: ", e, exc_tb.tb_lineno) + return root + def fix_internal_links(root, html_file, updated_links): try: # first let's make sure internal links are all unique @@ -220,7 +252,9 @@ def fix_internal_links(root, html_file, updated_links): match = matches[0] href = match.get("href") if re.match("^#", href) is not None and len(href) < 30: - newid = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(30)]) + # make a new hash string + hash_string = html_file+''.join(get_all_text(match))+match.get("href") + newid = make_hash(hash_string) newid = "ga" + newid updated_links[html_file+href] = html_file+"#"+newid match.set("href", "#"+newid) @@ -413,24 +447,36 @@ def get_document_title(root): print("ERROR: ", e, exc_tb.tb_lineno) return title_text +def retag_heading(head, headtype): + try: + text = ''.join(get_all_text(head)) + newel = etree.Element("p") + newel.set("class", "adoc-"+headtype) + anchors = head.xpath("./a[@class='anchor' and @id]") + if len(anchors) > 0: + anchor = anchors[0] + else: + anchor = None + if anchor is not None and anchor.text is None: + newel.set("id", anchor.get("id")) + else: + newel.set("id", head.get("id")) + newel.text = text + head.addnext(newel) + head.getparent().remove(head) + except Exception as e: + exc_type, exc_obj, exc_tb = sys.exc_info() + print("ERROR: ", e, exc_tb.tb_lineno) + return + def prep_for_adoc(root): try: - h2s = root.findall(".//div[@class='contents']/h2") + h2s = root.xpath(".//div[@class='contents']/h2|.//div[@class='contents']/div[@class='textblock']/h2") for head in h2s: - text = ''.join(get_all_text(head)) - newel = etree.Element("p") - newel.set("class", "adoc-h2") - newel.text = text - head.addnext(newel) - head.getparent().remove(head) - h3s = root.findall(".//div[@class='contents']/h3") + retag_heading(head, "h2") + h3s = root.xpath(".//div[@class='contents']/h3|.//div[@class='contents']/div[@class='textblock']/h3") for head in h3s: - text = ''.join(get_all_text(head)) - newel = etree.Element("p") - newel.set("class", "adoc-h3") - newel.text = text - head.addnext(newel) - head.getparent().remove(head) + retag_heading(head, "h3") except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() print("ERROR: ", e, exc_tb.tb_lineno) @@ -442,8 +488,8 @@ def make_adoc(root_string, title_text, filename): root_string = re.sub("<\/div>\s*?$", "", root_string, flags=re.S) root_string = re.sub('
', "", root_string) root_string = "[#"+my_id+"]\n== " + title_text + "\n\n++++\n" + root_string - root_string = re.sub('(]+class="adoc-h2"[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n=== \\2\n\n++++\n', root_string, flags=re.S) - root_string = re.sub('(]+class="adoc-h3"[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n==== \\2\n\n++++\n', root_string, flags=re.S) + root_string = re.sub('(]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[#\\2]\n=== \\4\n\n++++\n', root_string, flags=re.S) + root_string = re.sub('(]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[#\\2]\n==== \\4\n\n++++\n', root_string, flags=re.S) root_string = root_string + "\n++++\n" except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() @@ -642,7 +688,7 @@ def handler(html_path, output_path, header_path, doxyfile_path, site_config_path root = etree.HTML(html_content) # give everything an id - root = add_ids(root) + root = add_ids(root, html_file) # loop over each json file skip = ["table_memname.json"] for mapping in complete_json_mappings: @@ -661,6 +707,7 @@ def handler(html_path, output_path, header_path, doxyfile_path, site_config_path root = prep_for_adoc(root) # fix some heading levels root = fix_heading_levels(root) + root = fix_duplicate_ids(root, html_file) # cleanup root = strip_attribute("data-processed", root) # get the document title