Skip to content

Commit 2852e19

Browse files
committed
Add GitHub Pages HTML report with interactive tabs
- Add HTML generation option to unified_analysis.py (--html flag) - Create tabbed interface for all analysis sections - Add CSS/JS files in docs/ folder for styling and functionality - Generate docs/index.html compatible with GitHub Pages - Preserve all explanatory text and data from Markdown version
1 parent ad03608 commit 2852e19

File tree

5 files changed

+912
-21
lines changed

5 files changed

+912
-21
lines changed

Graph Analysis/unified_analysis.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,238 @@ def ensure_iterable_records(data: Any) -> List[Any]:
338338
return []
339339

340340

341+
# ---------------- HTML Report Writer ----------------
342+
343+
def write_html_report(
344+
output_file: str,
345+
timestamp: str,
346+
summary: Dict[str, Any],
347+
attend_deg: Tuple[Dict[str, int], Counter],
348+
attend_top: List[Tuple[str, int]],
349+
attend_dist: List[Tuple[int, int]],
350+
field_deg: Tuple[Dict[str, int], Counter],
351+
field_top: List[Tuple[str, int]],
352+
field_dist: List[Tuple[int, int]],
353+
path_info: Dict[str, Any],
354+
parent_top: List[Tuple[str, int]],
355+
centrality: Dict[str, Dict[str, float]],
356+
clustering: Tuple[float, List[Tuple[str, float]]],
357+
components: Dict[str, Any],
358+
) -> None:
359+
os.makedirs(os.path.dirname(output_file), exist_ok=True)
360+
361+
with open(output_file, "w", encoding="utf-8") as f:
362+
f.write("""<!DOCTYPE html>
363+
<html lang="en">
364+
<head>
365+
<meta charset="UTF-8">
366+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
367+
<title>Unified Graph Analysis Report</title>
368+
<link rel="stylesheet" href="style.css">
369+
</head>
370+
<body>
371+
<div class="container">
372+
<header>
373+
<h1>Unified Graph Analysis Report</h1>
374+
<p class="timestamp">Generated on: <strong>""" + timestamp + """</strong></p>
375+
</header>
376+
377+
<div class="tabs">
378+
<button class="tab-button active" onclick="showTab('summary')">Summary</button>
379+
<button class="tab-button" onclick="showTab('coattendance')">Co-attendance Degree</button>
380+
<button class="tab-button" onclick="showTab('field-degree')">Field Degree</button>
381+
<button class="tab-button" onclick="showTab('path-structure')">Path Structure</button>
382+
<button class="tab-button" onclick="showTab('centrality')">Centrality</button>
383+
<button class="tab-button" onclick="showTab('clustering')">Clustering</button>
384+
<button class="tab-button" onclick="showTab('components')">Components</button>
385+
</div>
386+
387+
<div class="tab-content">
388+
<!-- Summary Tab -->
389+
<div id="summary" class="tab-pane active">
390+
<h2>Summary</h2>
391+
<p class="explanation">These are high-level counts of nodes/edges for each graph constructed during analysis.</p>
392+
<ul class="summary-list">
393+
""")
394+
for k, v in summary.items():
395+
f.write(f" <li><strong>{k}:</strong> {v}</li>\n")
396+
f.write(""" </ul>
397+
</div>
398+
399+
<!-- Co-attendance Degree Tab -->
400+
<div id="coattendance" class="tab-pane">
401+
<h2>Degree (Co-attendance) Analysis</h2>
402+
<p class="explanation">People are connected if they attend the same meeting; a person's degree is how many unique people they co-attended with.</p>
403+
404+
<h3>Top Nodes by Degree</h3>
405+
<p class="explanation">These are the people connected to the most unique others across meetings.</p>
406+
<table>
407+
<thead>
408+
<tr><th>Rank</th><th>Node</th><th>Degree</th></tr>
409+
</thead>
410+
<tbody>
411+
""")
412+
for i, (node, deg) in enumerate(attend_top, 1):
413+
label = _truncate_label(node, 80)
414+
f.write(f" <tr><td>{i}</td><td>{label}</td><td>{deg}</td></tr>\n")
415+
f.write(""" </tbody>
416+
</table>
417+
418+
<h3>Degree Distribution</h3>
419+
<p class="explanation">How many people fall into each degree (number of unique co-attendees) bucket.</p>
420+
<table>
421+
<thead>
422+
<tr><th>Degree</th><th>Count of Nodes</th></tr>
423+
</thead>
424+
<tbody>
425+
""")
426+
for d, c in attend_dist:
427+
f.write(f" <tr><td>{d}</td><td>{c}</td></tr>\n")
428+
f.write(""" </tbody>
429+
</table>
430+
</div>
431+
432+
<!-- Field Degree Tab -->
433+
<div id="field-degree" class="tab-pane">
434+
<h2>JSON Field Degree Analysis</h2>
435+
<p class="explanation">Fields are connected when they appear together inside the same JSON object; a field's degree is the number of distinct fields it co-occurs with.</p>
436+
437+
<h3>Top Fields by Degree</h3>
438+
<p class="explanation">These fields co-occur with the largest variety of other fields.</p>
439+
<table>
440+
<thead>
441+
<tr><th>Rank</th><th>Field</th><th>Degree</th></tr>
442+
</thead>
443+
<tbody>
444+
""")
445+
for i, (node, deg) in enumerate(field_top, 1):
446+
label = _truncate_label(node, 80)
447+
f.write(f" <tr><td>{i}</td><td>{label}</td><td>{deg}</td></tr>\n")
448+
f.write(""" </tbody>
449+
</table>
450+
451+
<h3>Degree Distribution</h3>
452+
<p class="explanation">How many fields have each degree (number of distinct co-occurring fields).</p>
453+
<table>
454+
<thead>
455+
<tr><th>Degree</th><th>Count of Fields</th></tr>
456+
</thead>
457+
<tbody>
458+
""")
459+
for d, c in field_dist:
460+
f.write(f" <tr><td>{d}</td><td>{c}</td></tr>\n")
461+
f.write(""" </tbody>
462+
</table>
463+
</div>
464+
465+
<!-- Path Structure Tab -->
466+
<div id="path-structure" class="tab-pane">
467+
<h2>JSON Path Structure Analysis</h2>
468+
<p class="explanation">Each JSON path represents a unique nested route (keys/array indices); depth shows how deeply information is nested.</p>
469+
470+
<ul class="summary-list">
471+
<li><strong>Total Unique Paths:</strong> """ + str(path_info['total_paths']) + """</li>
472+
<li><strong>Maximum Depth:</strong> """ + str(path_info['max_depth']) + """</li>
473+
<li><strong>Average Depth:</strong> """ + f"{path_info['avg_depth']:.2f}" + """</li>
474+
</ul>
475+
476+
<h3>Deepest JSON Paths (sample)</h3>
477+
<p class="explanation">The deepest examples indicate where the data structure is most nested.</p>
478+
<ul class="path-list">
479+
""")
480+
for p in path_info["deepest_paths"][:10]:
481+
f.write(f" <li><code>{p}</code></li>\n")
482+
f.write(""" </ul>
483+
484+
<h3>Most Common Parent Paths</h3>
485+
<p class="explanation">Parents that appear most often, suggesting common structural hubs.</p>
486+
<table>
487+
<thead>
488+
<tr><th>Rank</th><th>Parent Path</th><th>Count</th></tr>
489+
</thead>
490+
<tbody>
491+
""")
492+
for i, (parent, cnt) in enumerate(parent_top, 1):
493+
f.write(f" <tr><td>{i}</td><td><code>{parent}</code></td><td>{cnt}</td></tr>\n")
494+
f.write(""" </tbody>
495+
</table>
496+
</div>
497+
498+
<!-- Centrality Tab -->
499+
<div id="centrality" class="tab-pane">
500+
<h2>Field Centrality (Co-occurrence)</h2>
501+
<p class="explanation">Centrality scores highlight fields that are well-connected (degree), act as bridges (betweenness), are close to others (closeness), or connect to other influential fields (eigenvector).</p>
502+
503+
<table>
504+
<thead>
505+
<tr><th>Rank</th><th>Field</th><th>Degree</th><th>Betweenness</th><th>Closeness</th><th>Eigenvector</th></tr>
506+
</thead>
507+
<tbody>
508+
""")
509+
metrics = centrality
510+
top_fields = sorted(metrics["degree"].keys(), key=lambda x: metrics["degree"][x], reverse=True)[:10]
511+
for i, node in enumerate(top_fields, 1):
512+
f.write(
513+
f" <tr><td>{i}</td><td>{node}</td>"
514+
f"<td>{metrics['degree'].get(node, 0):.3f}</td>"
515+
f"<td>{metrics['betweenness'].get(node, 0):.3f}</td>"
516+
f"<td>{metrics['closeness'].get(node, 0):.3f}</td>"
517+
f"<td>{metrics['eigenvector'].get(node, 0):.3f}</td></tr>\n"
518+
)
519+
f.write(""" </tbody>
520+
</table>
521+
</div>
522+
523+
<!-- Clustering Tab -->
524+
<div id="clustering" class="tab-pane">
525+
<h2>Clustering (Field Co-occurrence Graph)</h2>
526+
<p class="explanation">Clustering measures how tightly a field's neighbors are connected to each other (higher means more triads).</p>
527+
528+
<p><strong>Average Clustering Coefficient:</strong> """)
529+
avg_clust, top_clust_nodes = clustering
530+
f.write(f"{avg_clust:.3f}")
531+
f.write("""</p>
532+
533+
<h3>Top Nodes by Clustering Coefficient</h3>
534+
<p class="explanation">Fields whose immediate neighborhoods are most tightly interlinked.</p>
535+
<table>
536+
<thead>
537+
<tr><th>Rank</th><th>Field</th><th>Clustering</th></tr>
538+
</thead>
539+
<tbody>
540+
""")
541+
for i, (node, val) in enumerate(top_clust_nodes, 1):
542+
f.write(f" <tr><td>{i}</td><td>{node}</td><td>{val:.3f}</td></tr>\n")
543+
f.write(""" </tbody>
544+
</table>
545+
</div>
546+
547+
<!-- Connected Components Tab -->
548+
<div id="components" class="tab-pane">
549+
<h2>Connected Components (Field Co-occurrence Graph)</h2>
550+
<p class="explanation">Components are groups of fields that are all reachable from each other; multiple components suggest separate substructures.</p>
551+
552+
<ul class="summary-list">
553+
<li><strong>Number of Components:</strong> """ + str(components['component_count']) + """</li>
554+
<li><strong>Component Sizes (top 10):</strong> """ + str(components['component_sizes'][:10]) + """</li>
555+
</ul>
556+
557+
<h3>Sample of Largest Component Nodes (top 10)</h3>
558+
<ul class="component-list">
559+
""")
560+
for n in components["largest_component_sample"][:10]:
561+
f.write(f" <li>{n}</li>\n")
562+
f.write(""" </ul>
563+
</div>
564+
</div>
565+
</div>
566+
567+
<script src="script.js"></script>
568+
</body>
569+
</html>
570+
""")
571+
572+
341573
def main() -> None:
342574
parser = argparse.ArgumentParser(description="Unified Graph Analysis")
343575
parser.add_argument(
@@ -356,6 +588,16 @@ def main() -> None:
356588
default=10,
357589
help="Top-N rows to include in tables",
358590
)
591+
parser.add_argument(
592+
"--html",
593+
action="store_true",
594+
help="Generate HTML report in addition to Markdown",
595+
)
596+
parser.add_argument(
597+
"--html-output",
598+
default="docs/index.html",
599+
help="HTML report output path",
600+
)
359601
args = parser.parse_args()
360602

361603
data = load_json(args.input)
@@ -414,6 +656,26 @@ def main() -> None:
414656
)
415657
print(f"✅ Unified report written to: {args.output}")
416658

659+
if args.html:
660+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
661+
write_html_report(
662+
output_file=args.html_output,
663+
timestamp=timestamp,
664+
summary=summary,
665+
attend_deg=(attend_deg_dict, attend_deg_counts),
666+
attend_top=attend_top,
667+
attend_dist=attend_dist,
668+
field_deg=(fdeg_dict, fdeg_counts),
669+
field_top=field_top,
670+
field_dist=field_dist,
671+
path_info=pmetrics,
672+
parent_top=parent_top,
673+
centrality=centrality,
674+
clustering=(avg_clust, top_clust_nodes),
675+
components=components,
676+
)
677+
print(f"✅ HTML report written to: {args.html_output}")
678+
417679

418680
if __name__ == "__main__":
419681
main()

0 commit comments

Comments
 (0)