forked from raspberrypi/documentation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_nav.py
executable file
·169 lines (155 loc) · 8.47 KB
/
create_nav.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
import sys
import os
import json
import re
def change_file_ext(filename, extension):
return os.path.splitext(filename)[0] + '.' + extension
def strip_adoc(heading):
return re.sub(r'\b(_|\*)(.+?)\1\b', r'\2', heading.replace('`', ''))
file_headings = dict()
def heading_to_anchor(filepath, heading, anchor):
if anchor is None:
# The replace(' -- ', '') is needed because AsciiDoc transforms ' -- ' to ' — ' (narrow-space, em-dash, narrow-space) which then collapses down to '' when calculating the anchor
anchor = re.sub(r'\-+', '-', re.sub(r'[^-\w]', '', heading.lower().replace(' -- ', '').replace(' ', '-').replace('.', '-')))
if filepath not in file_headings:
file_headings[filepath] = set()
proposed_anchor = anchor
num = 1 # this isn't a logic bug, the first duplicate anchor gets suffixed with "-2"
while proposed_anchor in file_headings[filepath]:
num += 1
proposed_anchor = '{}-{}'.format(anchor, num)
file_headings[filepath].add(proposed_anchor)
return proposed_anchor
needed_internal_links = dict()
def collect_xref_internal_inks(line, filepath, output_dir, adoc_dir):
for m in re.finditer(r'xref:(.+?)(?:#(.+?))?\[.*?\]', line):
link = m.group(1)
anchor = m.group(2)
if not link.endswith('.adoc'):
raise Exception("{} links to non-adoc file {}".format(filepath, link))
link_path = os.path.normpath(os.path.join(output_dir, link))
link_relpath = os.path.relpath(link_path, adoc_dir)
linkinfo = {'url': link_relpath}
if anchor:
linkinfo['anchor'] = anchor
needed_internal_links[filepath].append(linkinfo)
return
def collect_simple_internal_links(line, filepath, mainfile, output_dir, adoc_dir):
# looking for links like this: <<overlay_prefix,overlay_prefix>>
for m in re.finditer(r'<<(.+?),(.+?)>>', line):
anchor = m.group(1)
link_path = re.sub(adoc_dir, "", mainfile)
link_path = re.sub("^/", "", link_path)
link_path = os.path.normpath(link_path)
link_relpath = os.path.relpath(link_path, adoc_dir)
linkinfo = {'url': link_path}
if anchor:
linkinfo['anchor'] = anchor
needed_internal_links[filepath].append(linkinfo)
return
def collect_all_internal_links(line, filepath, mainfile, output_dir, adoc_dir):
collect_xref_internal_inks(line, filepath, output_dir, adoc_dir)
collect_simple_internal_links(line, filepath, mainfile, output_dir, adoc_dir)
return
# need to get the main file path somehow...
def read_file_with_includes(filepath, filelevel, mainfile, output_dir=None):
if output_dir is None:
output_dir = os.path.dirname(filepath)
content = ''
if filelevel == 1:
mainfile = filepath
with open(filepath) as adoc_fh:
if filepath not in needed_internal_links:
needed_internal_links[filepath] = []
parent_dir = os.path.dirname(filepath)
for line in adoc_fh.readlines():
collect_all_internal_links(line, filepath, mainfile, output_dir, adoc_dir)
m = re.match(r'^include::(.*)\[\]\s*$', line)
if m:
filelevel += 1
new_content, filelevel = read_file_with_includes(os.path.join(parent_dir, m.group(1)), filelevel, mainfile, output_dir)
content += new_content
else:
content += line
return content, filelevel
min_level = 2 # this has to be 2
max_level = 3 # this can be 2 or 3
if __name__ == "__main__":
index_json = sys.argv[1]
adoc_dir = sys.argv[2]
output_json = sys.argv[3]
with open(index_json) as json_fh:
data = json.load(json_fh)
output_data = []
available_anchors = dict()
for tab in data['tabs']:
nav = []
if 'path' in tab:
for subitem in tab['subitems']:
if 'subpath' in subitem:
fullpath = os.path.join(tab['path'], subitem['subpath'])
if fullpath in available_anchors:
raise Exception("{} occurs twice in {}".format(fullpath, index_json))
available_anchors[fullpath] = set()
nav.append({
'path': os.path.join('/', change_file_ext(fullpath, 'html')),
'title': subitem['title'],
'sections': [],
})
level = min_level
adjusted_path = re.sub("^/", "", fullpath)
top_level_file = os.path.join(adoc_dir, adjusted_path)
adoc_content, filelevel = read_file_with_includes(top_level_file, 1, top_level_file)
last_line_was_discrete = False
header_id = None
for line in adoc_content.split('\n'):
m = re.match(r'^\[\[(.*)\]\]\s*$', line)
if m:
header_id = m.group(1)
else:
m = re.match(r'^\[(.*)\]\s*$', line)
if m:
attrs = m.group(1).split(',')
last_line_was_discrete = 'discrete' in attrs
header_id = None
else:
m = re.match(r'^(=+)\s+(.+?)\s*$', line)
if m:
newlevel = len(m.group(1))
# Need to compute anchors for *every* header (updates file_headings)
heading = strip_adoc(m.group(2))
anchor = heading_to_anchor(top_level_file, heading, header_id)
if anchor in available_anchors[fullpath]:
raise Exception("Anchor {} appears twice in {}".format(anchor, fullpath))
available_anchors[fullpath].add(anchor)
if min_level <= newlevel <= max_level and not last_line_was_discrete:
entry = {'heading': heading, 'anchor': anchor}
if newlevel > level:
nav[-1]['sections'][-1]['subsections'] = []
level = newlevel
if level == 2:
nav[-1]['sections'].append(entry)
elif level == 3:
nav[-1]['sections'][-1]['subsections'].append(entry)
last_line_was_discrete = False
header_id = None
elif 'from_json' in tab:
tab_dir = os.path.join(adoc_dir, tab['directory'])
if os.path.exists(tab_dir):
# TODO: Need to do something here to create the appropriate nav entries for tab['from_json']
pass
else:
raise Exception("Tab '{}' in '{}' has neither '{}' nor '{}'".format(tab['title'], index_json, 'path', 'from_json'))
output_data.append({'title': tab['title'], 'path': '{}'.format(tab.get('path', tab.get('from_json'))), 'toc': nav})
for filepath in sorted(needed_internal_links):
for linkinfo in needed_internal_links[filepath]:
if not linkinfo['url'].startswith('pico-sdk/'): # these pages aren't created by a non-doxygen build
adjusted_url = "/" + linkinfo['url']
if adjusted_url not in available_anchors:
raise Exception("{} has an internal-link to {} but that destination doesn't exist".format(filepath, adjusted_url))
if 'anchor' in linkinfo:
if linkinfo['anchor'] not in available_anchors[adjusted_url]:
raise Exception("{} has an internal-link to {}#{} but that anchor doesn't exist. Available anchors: {}".format(filepath, adjusted_url, linkinfo['anchor'], ', '.join(sorted(available_anchors[adjusted_url]))))
with open(output_json, 'w') as out_fh:
json.dump(output_data, out_fh, indent=4)