@@ -338,6 +338,238 @@ def ensure_iterable_records(data: Any) -> List[Any]:
338338 return []
339339
340340
341+ # ---------------- HTML Report Writer ----------------
342+
343+ def write_html_report (
344+ output_file : str ,
345+ timestamp : str ,
346+ summary : Dict [str , Any ],
347+ attend_deg : Tuple [Dict [str , int ], Counter ],
348+ attend_top : List [Tuple [str , int ]],
349+ attend_dist : List [Tuple [int , int ]],
350+ field_deg : Tuple [Dict [str , int ], Counter ],
351+ field_top : List [Tuple [str , int ]],
352+ field_dist : List [Tuple [int , int ]],
353+ path_info : Dict [str , Any ],
354+ parent_top : List [Tuple [str , int ]],
355+ centrality : Dict [str , Dict [str , float ]],
356+ clustering : Tuple [float , List [Tuple [str , float ]]],
357+ components : Dict [str , Any ],
358+ ) -> None :
359+ os .makedirs (os .path .dirname (output_file ), exist_ok = True )
360+
361+ with open (output_file , "w" , encoding = "utf-8" ) as f :
362+ f .write ("""<!DOCTYPE html>
363+ <html lang="en">
364+ <head>
365+ <meta charset="UTF-8">
366+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
367+ <title>Unified Graph Analysis Report</title>
368+ <link rel="stylesheet" href="style.css">
369+ </head>
370+ <body>
371+ <div class="container">
372+ <header>
373+ <h1>Unified Graph Analysis Report</h1>
374+ <p class="timestamp">Generated on: <strong>""" + timestamp + """</strong></p>
375+ </header>
376+
377+ <div class="tabs">
378+ <button class="tab-button active" onclick="showTab('summary')">Summary</button>
379+ <button class="tab-button" onclick="showTab('coattendance')">Co-attendance Degree</button>
380+ <button class="tab-button" onclick="showTab('field-degree')">Field Degree</button>
381+ <button class="tab-button" onclick="showTab('path-structure')">Path Structure</button>
382+ <button class="tab-button" onclick="showTab('centrality')">Centrality</button>
383+ <button class="tab-button" onclick="showTab('clustering')">Clustering</button>
384+ <button class="tab-button" onclick="showTab('components')">Components</button>
385+ </div>
386+
387+ <div class="tab-content">
388+ <!-- Summary Tab -->
389+ <div id="summary" class="tab-pane active">
390+ <h2>Summary</h2>
391+ <p class="explanation">These are high-level counts of nodes/edges for each graph constructed during analysis.</p>
392+ <ul class="summary-list">
393+ """ )
394+ for k , v in summary .items ():
395+ f .write (f" <li><strong>{ k } :</strong> { v } </li>\n " )
396+ f .write (""" </ul>
397+ </div>
398+
399+ <!-- Co-attendance Degree Tab -->
400+ <div id="coattendance" class="tab-pane">
401+ <h2>Degree (Co-attendance) Analysis</h2>
402+ <p class="explanation">People are connected if they attend the same meeting; a person's degree is how many unique people they co-attended with.</p>
403+
404+ <h3>Top Nodes by Degree</h3>
405+ <p class="explanation">These are the people connected to the most unique others across meetings.</p>
406+ <table>
407+ <thead>
408+ <tr><th>Rank</th><th>Node</th><th>Degree</th></tr>
409+ </thead>
410+ <tbody>
411+ """ )
412+ for i , (node , deg ) in enumerate (attend_top , 1 ):
413+ label = _truncate_label (node , 80 )
414+ f .write (f" <tr><td>{ i } </td><td>{ label } </td><td>{ deg } </td></tr>\n " )
415+ f .write (""" </tbody>
416+ </table>
417+
418+ <h3>Degree Distribution</h3>
419+ <p class="explanation">How many people fall into each degree (number of unique co-attendees) bucket.</p>
420+ <table>
421+ <thead>
422+ <tr><th>Degree</th><th>Count of Nodes</th></tr>
423+ </thead>
424+ <tbody>
425+ """ )
426+ for d , c in attend_dist :
427+ f .write (f" <tr><td>{ d } </td><td>{ c } </td></tr>\n " )
428+ f .write (""" </tbody>
429+ </table>
430+ </div>
431+
432+ <!-- Field Degree Tab -->
433+ <div id="field-degree" class="tab-pane">
434+ <h2>JSON Field Degree Analysis</h2>
435+ <p class="explanation">Fields are connected when they appear together inside the same JSON object; a field's degree is the number of distinct fields it co-occurs with.</p>
436+
437+ <h3>Top Fields by Degree</h3>
438+ <p class="explanation">These fields co-occur with the largest variety of other fields.</p>
439+ <table>
440+ <thead>
441+ <tr><th>Rank</th><th>Field</th><th>Degree</th></tr>
442+ </thead>
443+ <tbody>
444+ """ )
445+ for i , (node , deg ) in enumerate (field_top , 1 ):
446+ label = _truncate_label (node , 80 )
447+ f .write (f" <tr><td>{ i } </td><td>{ label } </td><td>{ deg } </td></tr>\n " )
448+ f .write (""" </tbody>
449+ </table>
450+
451+ <h3>Degree Distribution</h3>
452+ <p class="explanation">How many fields have each degree (number of distinct co-occurring fields).</p>
453+ <table>
454+ <thead>
455+ <tr><th>Degree</th><th>Count of Fields</th></tr>
456+ </thead>
457+ <tbody>
458+ """ )
459+ for d , c in field_dist :
460+ f .write (f" <tr><td>{ d } </td><td>{ c } </td></tr>\n " )
461+ f .write (""" </tbody>
462+ </table>
463+ </div>
464+
465+ <!-- Path Structure Tab -->
466+ <div id="path-structure" class="tab-pane">
467+ <h2>JSON Path Structure Analysis</h2>
468+ <p class="explanation">Each JSON path represents a unique nested route (keys/array indices); depth shows how deeply information is nested.</p>
469+
470+ <ul class="summary-list">
471+ <li><strong>Total Unique Paths:</strong> """ + str (path_info ['total_paths' ]) + """</li>
472+ <li><strong>Maximum Depth:</strong> """ + str (path_info ['max_depth' ]) + """</li>
473+ <li><strong>Average Depth:</strong> """ + f"{ path_info ['avg_depth' ]:.2f} " + """</li>
474+ </ul>
475+
476+ <h3>Deepest JSON Paths (sample)</h3>
477+ <p class="explanation">The deepest examples indicate where the data structure is most nested.</p>
478+ <ul class="path-list">
479+ """ )
480+ for p in path_info ["deepest_paths" ][:10 ]:
481+ f .write (f" <li><code>{ p } </code></li>\n " )
482+ f .write (""" </ul>
483+
484+ <h3>Most Common Parent Paths</h3>
485+ <p class="explanation">Parents that appear most often, suggesting common structural hubs.</p>
486+ <table>
487+ <thead>
488+ <tr><th>Rank</th><th>Parent Path</th><th>Count</th></tr>
489+ </thead>
490+ <tbody>
491+ """ )
492+ for i , (parent , cnt ) in enumerate (parent_top , 1 ):
493+ f .write (f" <tr><td>{ i } </td><td><code>{ parent } </code></td><td>{ cnt } </td></tr>\n " )
494+ f .write (""" </tbody>
495+ </table>
496+ </div>
497+
498+ <!-- Centrality Tab -->
499+ <div id="centrality" class="tab-pane">
500+ <h2>Field Centrality (Co-occurrence)</h2>
501+ <p class="explanation">Centrality scores highlight fields that are well-connected (degree), act as bridges (betweenness), are close to others (closeness), or connect to other influential fields (eigenvector).</p>
502+
503+ <table>
504+ <thead>
505+ <tr><th>Rank</th><th>Field</th><th>Degree</th><th>Betweenness</th><th>Closeness</th><th>Eigenvector</th></tr>
506+ </thead>
507+ <tbody>
508+ """ )
509+ metrics = centrality
510+ top_fields = sorted (metrics ["degree" ].keys (), key = lambda x : metrics ["degree" ][x ], reverse = True )[:10 ]
511+ for i , node in enumerate (top_fields , 1 ):
512+ f .write (
513+ f" <tr><td>{ i } </td><td>{ node } </td>"
514+ f"<td>{ metrics ['degree' ].get (node , 0 ):.3f} </td>"
515+ f"<td>{ metrics ['betweenness' ].get (node , 0 ):.3f} </td>"
516+ f"<td>{ metrics ['closeness' ].get (node , 0 ):.3f} </td>"
517+ f"<td>{ metrics ['eigenvector' ].get (node , 0 ):.3f} </td></tr>\n "
518+ )
519+ f .write (""" </tbody>
520+ </table>
521+ </div>
522+
523+ <!-- Clustering Tab -->
524+ <div id="clustering" class="tab-pane">
525+ <h2>Clustering (Field Co-occurrence Graph)</h2>
526+ <p class="explanation">Clustering measures how tightly a field's neighbors are connected to each other (higher means more triads).</p>
527+
528+ <p><strong>Average Clustering Coefficient:</strong> """ )
529+ avg_clust , top_clust_nodes = clustering
530+ f .write (f"{ avg_clust :.3f} " )
531+ f .write ("""</p>
532+
533+ <h3>Top Nodes by Clustering Coefficient</h3>
534+ <p class="explanation">Fields whose immediate neighborhoods are most tightly interlinked.</p>
535+ <table>
536+ <thead>
537+ <tr><th>Rank</th><th>Field</th><th>Clustering</th></tr>
538+ </thead>
539+ <tbody>
540+ """ )
541+ for i , (node , val ) in enumerate (top_clust_nodes , 1 ):
542+ f .write (f" <tr><td>{ i } </td><td>{ node } </td><td>{ val :.3f} </td></tr>\n " )
543+ f .write (""" </tbody>
544+ </table>
545+ </div>
546+
547+ <!-- Connected Components Tab -->
548+ <div id="components" class="tab-pane">
549+ <h2>Connected Components (Field Co-occurrence Graph)</h2>
550+ <p class="explanation">Components are groups of fields that are all reachable from each other; multiple components suggest separate substructures.</p>
551+
552+ <ul class="summary-list">
553+ <li><strong>Number of Components:</strong> """ + str (components ['component_count' ]) + """</li>
554+ <li><strong>Component Sizes (top 10):</strong> """ + str (components ['component_sizes' ][:10 ]) + """</li>
555+ </ul>
556+
557+ <h3>Sample of Largest Component Nodes (top 10)</h3>
558+ <ul class="component-list">
559+ """ )
560+ for n in components ["largest_component_sample" ][:10 ]:
561+ f .write (f" <li>{ n } </li>\n " )
562+ f .write (""" </ul>
563+ </div>
564+ </div>
565+ </div>
566+
567+ <script src="script.js"></script>
568+ </body>
569+ </html>
570+ """ )
571+
572+
341573def main () -> None :
342574 parser = argparse .ArgumentParser (description = "Unified Graph Analysis" )
343575 parser .add_argument (
@@ -356,6 +588,16 @@ def main() -> None:
356588 default = 10 ,
357589 help = "Top-N rows to include in tables" ,
358590 )
591+ parser .add_argument (
592+ "--html" ,
593+ action = "store_true" ,
594+ help = "Generate HTML report in addition to Markdown" ,
595+ )
596+ parser .add_argument (
597+ "--html-output" ,
598+ default = "docs/index.html" ,
599+ help = "HTML report output path" ,
600+ )
359601 args = parser .parse_args ()
360602
361603 data = load_json (args .input )
@@ -414,6 +656,26 @@ def main() -> None:
414656 )
415657 print (f"✅ Unified report written to: { args .output } " )
416658
659+ if args .html :
660+ timestamp = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" )
661+ write_html_report (
662+ output_file = args .html_output ,
663+ timestamp = timestamp ,
664+ summary = summary ,
665+ attend_deg = (attend_deg_dict , attend_deg_counts ),
666+ attend_top = attend_top ,
667+ attend_dist = attend_dist ,
668+ field_deg = (fdeg_dict , fdeg_counts ),
669+ field_top = field_top ,
670+ field_dist = field_dist ,
671+ path_info = pmetrics ,
672+ parent_top = parent_top ,
673+ centrality = centrality ,
674+ clustering = (avg_clust , top_clust_nodes ),
675+ components = components ,
676+ )
677+ print (f"✅ HTML report written to: { args .html_output } " )
678+
417679
418680if __name__ == "__main__" :
419681 main ()
0 commit comments