Skip to content

Commit ae39d8b

Browse files
committed
Add explanations to the unified report
1 parent d54570a commit ae39d8b

File tree

2 files changed

+230
-0
lines changed

2 files changed

+230
-0
lines changed

Graph Analysis/unified_analysis.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,55 +233,66 @@ def write_report(
233233

234234
# Summary
235235
f.write("## Summary\n")
236+
f.write("These are high-level counts of nodes/edges for each graph constructed during analysis.\n\n")
236237
for k, v in summary.items():
237238
f.write(f"- {k}: {v}\n")
238239
f.write("\n")
239240

240241
# Participant-only Degree (Co-attendance)
241242
f.write("## Degree (Co-attendance) Analysis\n")
243+
f.write("People are connected if they attend the same meeting; a person's degree is how many unique people they co-attended with.\n\n")
242244
f.write("### Top Nodes by Degree\n")
245+
f.write("These are the people connected to the most unique others across meetings.\n\n")
243246
f.write("| Rank | Node | Degree |\n|------|------|--------|\n")
244247
for i, (node, deg) in enumerate(attend_top, 1):
245248
label = _truncate_label(node, 80)
246249
f.write(f"| {i} | {label} | {deg} |\n")
247250
f.write("\n")
248251
f.write("### Degree Distribution\n")
252+
f.write("How many people fall into each degree (number of unique co-attendees) bucket.\n\n")
249253
f.write("| Degree | Count of Nodes |\n|--------|-----------------|\n")
250254
for d, c in attend_dist:
251255
f.write(f"| {d} | {c} |\n")
252256
f.write("\n")
253257

254258
# JSON Field Degree Analysis
255259
f.write("## JSON Field Degree Analysis\n")
260+
f.write("Fields are connected when they appear together inside the same JSON object; a field's degree is the number of distinct fields it co-occurs with.\n\n")
256261
f.write("### Top Fields by Degree\n")
262+
f.write("These fields co-occur with the largest variety of other fields.\n\n")
257263
f.write("| Rank | Field | Degree |\n|------|-------|--------|\n")
258264
for i, (node, deg) in enumerate(field_top, 1):
259265
label = _truncate_label(node, 80)
260266
f.write(f"| {i} | {label} | {deg} |\n")
261267
f.write("\n")
262268
f.write("### Degree Distribution\n")
269+
f.write("How many fields have each degree (number of distinct co-occurring fields).\n\n")
263270
f.write("| Degree | Count of Fields |\n|--------|------------------|\n")
264271
for d, c in field_dist:
265272
f.write(f"| {d} | {c} |\n")
266273
f.write("\n")
267274

268275
# Path Analysis
269276
f.write("## JSON Path Structure Analysis\n")
277+
f.write("Each JSON path represents a unique nested route (keys/array indices); depth shows how deeply information is nested.\n\n")
270278
f.write(f"- Total Unique Paths: {path_info['total_paths']}\n")
271279
f.write(f"- Maximum Depth: {path_info['max_depth']}\n")
272280
f.write(f"- Average Depth: {path_info['avg_depth']:.2f}\n\n")
273281
f.write("### Deepest JSON Paths (sample)\n")
282+
f.write("The deepest examples indicate where the data structure is most nested.\n\n")
274283
for p in path_info["deepest_paths"][:10]:
275284
f.write(f"- `{p}`\n")
276285
f.write("\n")
277286
f.write("### Most Common Parent Paths\n")
287+
f.write("Parents that appear most often, suggesting common structural hubs.\n\n")
278288
f.write("| Rank | Parent Path | Count |\n|------|-------------|-------|\n")
279289
for i, (parent, cnt) in enumerate(parent_top, 1):
280290
f.write(f"| {i} | `{parent}` | {cnt} |\n")
281291
f.write("\n")
282292

283293
# Centrality
284294
f.write("## Field Centrality (Co-occurrence)\n")
295+
f.write("Centrality scores highlight fields that are well-connected (degree), act as bridges (betweenness), are close to others (closeness), or connect to other influential fields (eigenvector).\n\n")
285296
metrics = centrality
286297
top_fields = sorted(metrics["degree"].keys(), key=lambda x: metrics["degree"][x], reverse=True)[:10]
287298
f.write("| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |\n")
@@ -299,15 +310,18 @@ def write_report(
299310
# Clustering
300311
avg_clust, top_clust_nodes = clustering
301312
f.write("## Clustering (Field Co-occurrence Graph)\n")
313+
f.write("Clustering measures how tightly a field's neighbors are connected to each other (higher means more triads).\n\n")
302314
f.write(f"- Average Clustering Coefficient: {avg_clust:.3f}\n\n")
303315
f.write("### Top Nodes by Clustering Coefficient\n")
316+
f.write("Fields whose immediate neighborhoods are most tightly interlinked.\n\n")
304317
f.write("| Rank | Field | Clustering |\n|------|-------|------------|\n")
305318
for i, (node, val) in enumerate(top_clust_nodes, 1):
306319
f.write(f"| {i} | {node} | {val:.3f} |\n")
307320
f.write("\n")
308321

309322
# Connected Components
310323
f.write("## Connected Components (Field Co-occurrence Graph)\n")
324+
f.write("Components are groups of fields that are all reachable from each other; multiple components suggest separate substructures.\n\n")
311325
f.write(f"- Number of Components: {components['component_count']}\n")
312326
f.write(f"- Component Sizes (top 10): {components['component_sizes'][:10]}\n")
313327
f.write("- Sample of Largest Component Nodes (top 10):\n")
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
# Unified Graph Analysis Report
2+
**Generated on:** 2025-10-15 10:00:54
3+
4+
## Summary
5+
These are high-level counts of nodes/edges for each graph constructed during analysis.
6+
7+
- Co-attendance graph (nodes): 126
8+
- Co-attendance graph (edges): 1848
9+
- Path graph (nodes): 6833
10+
- Path graph (edges): 6832
11+
- Field graph (nodes): 44
12+
- Field graph (edges): 149
13+
14+
## Degree (Co-attendance) Analysis
15+
People are connected if they attend the same meeting; a person's degree is how many unique people they co-attended with.
16+
17+
### Top Nodes by Degree
18+
These are the people connected to the most unique others across meetings.
19+
20+
| Rank | Node | Degree |
21+
|------|------|--------|
22+
| 1 | AshleyDawn | 95 |
23+
| 2 | PeterE | 91 |
24+
| 3 | advanceameyaw | 87 |
25+
| 4 | CallyFromAuron | 81 |
26+
| 5 | Kateri | 81 |
27+
| 6 | UKnowZork | 79 |
28+
| 7 | Sucre n Spice | 78 |
29+
| 8 | esewilliams | 76 |
30+
| 9 | LordKizzy | 76 |
31+
| 10 | Jeffrey Ndarake | 73 |
32+
33+
### Degree Distribution
34+
How many people fall into each degree (number of unique co-attendees) bucket.
35+
36+
| Degree | Count of Nodes |
37+
|--------|-----------------|
38+
| 4 | 1 |
39+
| 6 | 2 |
40+
| 8 | 4 |
41+
| 9 | 6 |
42+
| 10 | 6 |
43+
| 11 | 8 |
44+
| 12 | 5 |
45+
| 13 | 17 |
46+
| 15 | 2 |
47+
| 16 | 5 |
48+
| 17 | 4 |
49+
| 18 | 2 |
50+
| 19 | 3 |
51+
| 20 | 4 |
52+
| 21 | 4 |
53+
| 23 | 2 |
54+
| 24 | 1 |
55+
| 26 | 2 |
56+
| 27 | 2 |
57+
| 29 | 1 |
58+
| 30 | 3 |
59+
| 31 | 1 |
60+
| 33 | 1 |
61+
| 34 | 2 |
62+
| 35 | 1 |
63+
| 36 | 2 |
64+
| 40 | 1 |
65+
| 43 | 2 |
66+
| 44 | 1 |
67+
| 45 | 1 |
68+
| 46 | 3 |
69+
| 47 | 1 |
70+
| 48 | 1 |
71+
| 49 | 1 |
72+
| 51 | 1 |
73+
| 57 | 1 |
74+
| 58 | 1 |
75+
| 60 | 1 |
76+
| 62 | 4 |
77+
| 63 | 1 |
78+
| 65 | 1 |
79+
| 66 | 1 |
80+
| 71 | 2 |
81+
| 72 | 1 |
82+
| 73 | 1 |
83+
| 76 | 2 |
84+
| 78 | 1 |
85+
| 79 | 1 |
86+
| 81 | 2 |
87+
| 87 | 1 |
88+
| 91 | 1 |
89+
| 95 | 1 |
90+
91+
## JSON Field Degree Analysis
92+
Fields are connected when they appear together inside the same JSON object; a field's degree is the number of distinct fields it co-occurs with.
93+
94+
### Top Fields by Degree
95+
These fields co-occur with the largest variety of other fields.
96+
97+
| Rank | Field | Degree |
98+
|------|-------|--------|
99+
| 1 | documenter | 11 |
100+
| 2 | typeOfMeeting | 11 |
101+
| 3 | purpose | 11 |
102+
| 4 | workingDocs | 11 |
103+
| 5 | host | 11 |
104+
| 6 | date | 11 |
105+
| 7 | peoplePresent | 11 |
106+
| 8 | status | 11 |
107+
| 9 | meetingVideoLink | 10 |
108+
| 10 | meetingInfo | 9 |
109+
110+
### Degree Distribution
111+
How many fields have each degree (number of distinct co-occurring fields).
112+
113+
| Degree | Count of Fields |
114+
|--------|------------------|
115+
| 1 | 2 |
116+
| 2 | 2 |
117+
| 3 | 9 |
118+
| 4 | 4 |
119+
| 5 | 1 |
120+
| 7 | 2 |
121+
| 8 | 3 |
122+
| 9 | 12 |
123+
| 10 | 1 |
124+
| 11 | 8 |
125+
126+
## JSON Path Structure Analysis
127+
Each JSON path represents a unique nested route (keys/array indices); depth shows how deeply information is nested.
128+
129+
- Total Unique Paths: 6832
130+
- Maximum Depth: 6
131+
- Average Depth: 4.20
132+
133+
### Deepest JSON Paths (sample)
134+
The deepest examples indicate where the data structure is most nested.
135+
136+
- `[0].agendaItems[0].actionItems[0].text`
137+
- `[0].agendaItems[0].actionItems[0].assignee`
138+
- `[0].agendaItems[0].actionItems[0].dueDate`
139+
- `[0].agendaItems[0].actionItems[0].status`
140+
- `[0].agendaItems[0].decisionItems[0].decision`
141+
- `[0].agendaItems[0].decisionItems[0].effect`
142+
- `[0].agendaItems[0].decisionItems[1].decision`
143+
- `[0].agendaItems[0].decisionItems[1].rationale`
144+
- `[0].agendaItems[0].decisionItems[1].effect`
145+
- `[0].agendaItems[0].decisionItems[2].decision`
146+
147+
### Most Common Parent Paths
148+
Parents that appear most often, suggesting common structural hubs.
149+
150+
| Rank | Parent Path | Count |
151+
|------|-------------|-------|
152+
| 1 | `[12].agendaItems[0]` | 26 |
153+
| 2 | `[2].agendaItems[0]` | 21 |
154+
| 3 | `[10].agendaItems[0]` | 21 |
155+
| 4 | `[7].agendaItems[0]` | 19 |
156+
| 5 | `[17].agendaItems[0]` | 19 |
157+
| 6 | `[22].meetingInfo` | 19 |
158+
| 7 | `[23].meetingInfo` | 19 |
159+
| 8 | `[101].agendaItems[0]` | 19 |
160+
| 9 | `[11].agendaItems[0]` | 18 |
161+
| 10 | `[37].agendaItems[0]` | 18 |
162+
163+
## Field Centrality (Co-occurrence)
164+
Centrality scores highlight fields that are well-connected (degree), act as bridges (betweenness), are close to others (closeness), or connect to other influential fields (eigenvector).
165+
166+
| Rank | Field | Degree | Betweenness | Closeness | Eigenvector |
167+
|------|-------|--------|-------------|-----------|------------|
168+
| 1 | documenter | 0.256 | 0.001 | 0.256 | 0.309 |
169+
| 2 | typeOfMeeting | 0.256 | 0.001 | 0.256 | 0.309 |
170+
| 3 | purpose | 0.256 | 0.001 | 0.256 | 0.309 |
171+
| 4 | workingDocs | 0.256 | 0.001 | 0.256 | 0.309 |
172+
| 5 | host | 0.256 | 0.001 | 0.256 | 0.309 |
173+
| 6 | date | 0.256 | 0.001 | 0.256 | 0.309 |
174+
| 7 | peoplePresent | 0.256 | 0.001 | 0.256 | 0.309 |
175+
| 8 | status | 0.256 | 0.030 | 0.256 | 0.000 |
176+
| 9 | meetingVideoLink | 0.233 | 0.000 | 0.234 | 0.290 |
177+
| 10 | meetingInfo | 0.209 | 0.000 | 0.209 | 0.000 |
178+
179+
## Clustering (Field Co-occurrence Graph)
180+
Clustering measures how tightly a field's neighbors are connected to each other (higher means more triads).
181+
182+
- Average Clustering Coefficient: 0.882
183+
184+
### Top Nodes by Clustering Coefficient
185+
Fields whose immediate neighborhoods are most tightly interlinked.
186+
187+
| Rank | Field | Clustering |
188+
|------|-------|------------|
189+
| 1 | meetingInfo | 1.000 |
190+
| 2 | tags | 1.000 |
191+
| 3 | canceledSummary | 1.000 |
192+
| 4 | workgroup_id | 1.000 |
193+
| 5 | workgroup | 1.000 |
194+
| 6 | agendaItems | 1.000 |
195+
| 7 | type | 1.000 |
196+
| 8 | noSummaryGiven | 1.000 |
197+
| 9 | timestampedVideo | 1.000 |
198+
| 10 | dueDate | 1.000 |
199+
200+
## Connected Components (Field Co-occurrence Graph)
201+
Components are groups of fields that are all reachable from each other; multiple components suggest separate substructures.
202+
203+
- Number of Components: 6
204+
- Component Sizes (top 10): [12, 12, 10, 4, 4, 2]
205+
- Sample of Largest Component Nodes (top 10):
206+
- timestampedVideo
207+
- documenter
208+
- otherMediaLink
209+
- mediaLink
210+
- typeOfMeeting
211+
- peoplePresent
212+
- workingDocs
213+
- host
214+
- date
215+
- miroBoardLink
216+

0 commit comments

Comments
 (0)