1
- import pandas as pd
2
- import re
3
1
import json
2
+ import logging
3
+ import re
4
4
import time
5
+
6
+ import pandas as pd
7
+
5
8
from graph_creator import llama3
6
9
10
+ logging .basicConfig (level = logging .INFO )
11
+ logger = logging .getLogger (__name__ )
12
+
7
13
8
14
def build_flattened_dataframe (entities_and_relations ):
9
15
"""
10
16
Flatten list of lists by adding chunk_id attribute convert to pandas dataframe
11
17
12
18
Parameters
13
19
----------
14
- entity_and_relations : list
20
+ entities_and_relations : list
15
21
List of Lists of dictionaries
16
22
17
23
Returns
@@ -47,7 +53,7 @@ def connect_with_chunk_proximity(entity_and_relation_df):
47
53
pandas.dataframe
48
54
A table with given relations and chunk proximity relations between the nodes
49
55
"""
50
- # seperate all nodes by chunk_id
56
+ # separate all nodes by chunk_id
51
57
df_by_chunk_id = pd .melt (
52
58
entity_and_relation_df ,
53
59
id_vars = ["chunk_id" ],
@@ -116,7 +122,7 @@ def index_entity_relation_table(entity_and_relation_df, entities):
116
122
A List containing all relations as tuples of entity indexes
117
123
"""
118
124
entities_dict = {}
119
- # for reproducable results
125
+ # for reproducible results
120
126
entities = sorted (entities )
121
127
for i in range (len (entities )):
122
128
entities_dict [entities [i ]] = i
@@ -178,7 +184,7 @@ def extract_components(relations_list):
178
184
elif inserte ["at" ] >= 0 :
179
185
components [inserte ["at" ]].append (inserte ["new_node" ])
180
186
181
- # remove empty componente
187
+ # remove empty components
182
188
components .pop (len (components ) - 1 )
183
189
184
190
return components
@@ -242,7 +248,6 @@ def get_shared_chunks_by_component(component1, component2, entity_chunks_list):
242
248
chunk_entities = set (entity_chunks_list [keys [i ]])
243
249
intersection_c1 = chunk_entities .intersection (entities_component_1 )
244
250
intersection_c2 = chunk_entities .intersection (entities_component_2 )
245
- # print(f"{intersection_size_c1}, {intersection_size_c2}")
246
251
if len (intersection_c1 ) > 0 and len (intersection_c2 ) > 0 :
247
252
shared_chunks .append (keys [i ])
248
253
intersections [keys [i ]] = {"c1" : intersection_c1 , "c2" : intersection_c2 }
@@ -344,6 +349,9 @@ def connect_with_llm(data, text_chunks, rate_limit):
344
349
Table of nodes and relations between the nodes
345
350
text_chunks : list
346
351
A list of dictionaries containing the text chunks
352
+ rate_limit : int
353
+ The maximum number of requests that can be made to the LLM within a specified
354
+ timeframe.
347
355
348
356
Returns
349
357
-------
@@ -356,7 +364,7 @@ def connect_with_llm(data, text_chunks, rate_limit):
356
364
components = extract_components (relations_list )
357
365
number_components = len (components )
358
366
359
- print ( "Before connecting {} components" . format ( number_components ) )
367
+ logger . info ( f "Before connecting { number_components } components" )
360
368
361
369
# get chunk information about contained entities
362
370
entity_chunks_list = get_entities_by_chunk (data , entities_dict )
@@ -408,18 +416,17 @@ def connect_with_llm(data, text_chunks, rate_limit):
408
416
relation = extract_relation_from_llm_output (
409
417
connecting_relation , main_chunk_entities , current_chunk_entities
410
418
)
419
+
411
420
# if relation is extracted than a valid relation containing only existing entities can be added
412
- # print(relation)
413
421
if relation is not None :
414
422
relation ["chunk_id" ] = key_shared_chunk
415
423
connecting_relations .append (relation )
416
424
connections += 1
417
425
break
418
426
419
- print (
420
- "Made {} new connections and thereby reduced the graph to {} components" .format (
421
- connections , number_components - connections
422
- )
427
+ logger .info (
428
+ f"Made { connections } new connections and thereby reduced the graph "
429
+ f"to { number_components - connections } components "
423
430
)
424
431
data = add_relations_to_data (data , connecting_relations )
425
432
0 commit comments