amosproj
diff --git a/‎Project/backend/codebase/graph_analysis/graph_analysis.py
+9-6 b/‎Project/backend/codebase/graph_analysis/graph_analysis.py
+9-6
diff --git a/‎Project/backend/codebase/graph_creator/gemini.py
+11-6 b/‎Project/backend/codebase/graph_creator/gemini.py
+11-6
diff --git a/‎Project/backend/codebase/graph_creator/graph_creator_main.py
+7-5 b/‎Project/backend/codebase/graph_creator/graph_creator_main.py
+7-5
diff --git a/‎Project/backend/codebase/graph_creator/graph_handler.py
+20-13 b/‎Project/backend/codebase/graph_creator/graph_handler.py
+20-13
diff --git a/‎Project/backend/codebase/graph_creator/json_to_graphml.py
+2-1 b/‎Project/backend/codebase/graph_creator/json_to_graphml.py
+2-1
diff --git a/‎Project/backend/codebase/graph_creator/llama3.py
+11-7 b/‎Project/backend/codebase/graph_creator/llama3.py
+11-7
diff --git a/‎Project/backend/codebase/graph_creator/pdf_handler.py
+3-2 b/‎Project/backend/codebase/graph_creator/pdf_handler.py
+3-2
@@ -1,6 +1,7 @@
-import networkx as nx
-import os
 import json
+import os
+
+import networkx as nx
 
 
 def analyze_graph_structure(G):
@@ -118,9 +119,11 @@ def analyze_graph_structure(G):
     #  - Check if the graph is connected
     is_connected = nx.is_connected(G)
     #  - Calculate diameter: Longest shortest path between any two nodes
-    diameter = nx.diameter(G) if is_connected else float('inf')
+    diameter = nx.diameter(G) if is_connected else float("inf")
     #  - Average shortest path length: Average of all shortest paths in the graph
-    average_shortest_path_length = nx.average_shortest_path_length(G) if is_connected else float('inf')
+    average_shortest_path_length = (
+        nx.average_shortest_path_length(G) if is_connected else float("inf")
+    )
 
     # Clustering Coefficient
     #  - Measures the degree to which nodes tend to cluster together
@@ -133,7 +136,7 @@ def analyze_graph_structure(G):
     # Graph Diameter and Radius
     #  - Diameter: Longest shortest path in the graph
     #  - Radius: Minimum eccentricity of any node
-    radius = nx.radius(G) if is_connected else float('inf')
+    radius = nx.radius(G) if is_connected else float("inf")
 
     # Graph Transitivity
     #  - Measures the overall probability for the network to have adjacent nodes interconnected
@@ -158,7 +161,7 @@ def analyze_graph_structure(G):
         "average_clustering_coefficient": average_clustering_coefficient,
         "assortativity": assortativity,
         "radius": radius,
-        "transitivity": transitivity
+        "transitivity": transitivity,
     }
 
     return graph_info
 
@@ -1,6 +1,8 @@
 import os
 from datetime import datetime
+
 import google.generativeai as genai
+
 from graph_creator.services.json_handler import transform_llm_output_to_dict
 
 
@@ -69,16 +71,19 @@ def extract_entities_and_relations(chunk, genai_client):
 
 
 def check_for_connecting_relation(
-        chunk, entities_component_1, entities_component_2, genai_client
+    chunk, entities_component_1, entities_component_2, genai_client
 ):
     """
     Check for connecting relation between entities of two components.
     """
     SYS_PROMPT = (
         "Only answer in JSON format. \n"
-        "Your task is to help create a knowledge graph by extracting one more relation between any entity of list_1 with any entity of list_2.\n"
-        "We want to connect the subgraphs of nodes and relations that were extracted from the given text chunk (delimited by ```)."
-        "For this one more relation needs to be extracted from the given text chunk between any entity of list_1 and list_2:\n"
+        "Your task is to help create a knowledge graph by extracting one more relation between any entity of list_1 "
+        "with any entity of list_2.\n "
+        "We want to connect the subgraphs of nodes and relations that were extracted from the given text chunk ("
+        "delimited by ```). "
+        "For this one more relation needs to be extracted from the given text chunk between any entity of list_1 and "
+        "list_2:\n "
         f"list_1: {entities_component_1}\n"
         f"list_2: {entities_component_2}\n"
         "Only use the exact entities given in the lists."
@@ -99,7 +104,7 @@ def check_for_connecting_relation(
 
 
 def check_for_connecting_relation_(
-        text_chunk, entities_component_1, entities_component_2
+    text_chunk, entities_component_1, entities_component_2
 ):
     """
     Takes a text chunk, and two lists of entities (from each component in the graph)
@@ -112,7 +117,7 @@ def check_for_connecting_relation_(
         The text chunk to be proccessed
     entities_component_1 : list
         List of entities
-    entities_component_1 : list
+    entities_component_2 : list
         List of entities
 
     Returns
 
@@ -1,11 +1,15 @@
+import logging
 import mimetypes
 
+from graph_creator import graph_handler
+from graph_creator import pdf_handler
 from graph_creator.llama3 import process_chunks as groq_process_chunks
 from graph_creator.models.graph_job import GraphJob
-from graph_creator import pdf_handler
-from graph_creator import graph_handler
 from graph_creator.services import netx_graphdb
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 
 def process_file_to_graph(g_job: GraphJob):
     """
@@ -57,11 +61,9 @@ def process_file_to_entities_and_relations(file: str):
         ]  # Assuming chunk has 'page_content' attribute
 
         # Generate response using LLM
-        # response_json = process_chunks(text_chunks, prompt_template)
         response_json = groq_process_chunks(text_chunks)
-        print(response_json)
     except Exception as e:
-        print(e)
+        logging.error(e)
         response_json = None
 
     return response_json, chunks
 
@@ -1,17 +1,23 @@
-import pandas as pd
-import re
 import json
+import logging
+import re
 import time
+
+import pandas as pd
+
 from graph_creator import llama3
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 
 def build_flattened_dataframe(entities_and_relations):
     """
     Flatten list of lists by adding chunk_id attribute convert to pandas dataframe
 
     Parameters
     ----------
-    entity_and_relations :  list
+    entities_and_relations :  list
         List of Lists of dictionaries
 
     Returns
@@ -47,7 +53,7 @@ def connect_with_chunk_proximity(entity_and_relation_df):
     pandas.dataframe
         A table with given relations and chunk proximity relations between the nodes
     """
-    # seperate all nodes by chunk_id
+    # separate all nodes by chunk_id
     df_by_chunk_id = pd.melt(
         entity_and_relation_df,
         id_vars=["chunk_id"],
@@ -116,7 +122,7 @@ def index_entity_relation_table(entity_and_relation_df, entities):
         A List containing all relations as tuples of entity indexes
     """
     entities_dict = {}
-    # for reproducable results
+    # for reproducible results
     entities = sorted(entities)
     for i in range(len(entities)):
         entities_dict[entities[i]] = i
@@ -178,7 +184,7 @@ def extract_components(relations_list):
         elif inserte["at"] >= 0:
             components[inserte["at"]].append(inserte["new_node"])
 
-    # remove empty componente
+    # remove empty components
     components.pop(len(components) - 1)
 
     return components
@@ -242,7 +248,6 @@ def get_shared_chunks_by_component(component1, component2, entity_chunks_list):
         chunk_entities = set(entity_chunks_list[keys[i]])
         intersection_c1 = chunk_entities.intersection(entities_component_1)
         intersection_c2 = chunk_entities.intersection(entities_component_2)
-        # print(f"{intersection_size_c1}, {intersection_size_c2}")
         if len(intersection_c1) > 0 and len(intersection_c2) > 0:
             shared_chunks.append(keys[i])
             intersections[keys[i]] = {"c1": intersection_c1, "c2": intersection_c2}
@@ -344,6 +349,9 @@ def connect_with_llm(data, text_chunks, rate_limit):
         Table of nodes and relations between the nodes
     text_chunks : list
         A list of dictionaries containing the text chunks
+    rate_limit : int
+        The maximum number of requests that can be made to the LLM within a specified
+        timeframe.
 
     Returns
     -------
@@ -356,7 +364,7 @@ def connect_with_llm(data, text_chunks, rate_limit):
     components = extract_components(relations_list)
     number_components = len(components)
 
-    print("Before connecting {} components".format(number_components))
+    logger.info(f"Before connecting {number_components} components")
 
     # get chunk information about contained entities
     entity_chunks_list = get_entities_by_chunk(data, entities_dict)
@@ -408,18 +416,17 @@ def connect_with_llm(data, text_chunks, rate_limit):
             relation = extract_relation_from_llm_output(
                 connecting_relation, main_chunk_entities, current_chunk_entities
             )
+
             # if relation is extracted than a valid relation containing only existing entities can be added
-            # print(relation)
             if relation is not None:
                 relation["chunk_id"] = key_shared_chunk
                 connecting_relations.append(relation)
                 connections += 1
                 break
 
-    print(
-        "Made {} new connections and thereby reduced the graph to {} components".format(
-            connections, number_components - connections
-        )
+    logger.info(
+        f"Made {connections} new connections and thereby reduced the graph "
+        f"to {number_components - connections} components "
     )
     data = add_relations_to_data(data, connecting_relations)
 
 
@@ -1,7 +1,8 @@
 import json
+import logging
+
 import networkx as nx
 import pandas as pd
-import logging
 
 
 def json_string_to_graph(json_string):
 
@@ -1,5 +1,6 @@
 import os
 from datetime import datetime
+
 from groq import Groq
 
 from graph_creator.services.json_handler import transform_llm_output_to_dict
@@ -9,7 +10,7 @@ def configure_groq():
     """
     Ensure the API key is set in the environment
     """
-    # load_dotenv("Project/backend/.env", override=True)
+
     api_key = os.getenv("GROQ_API_KEY")
     if not api_key:
         raise ValueError("API key not found in environment variables")
@@ -71,16 +72,19 @@ def extract_entities_and_relations(chunk, groq_client):
 
 
 def check_for_connecting_relation(
-        chunk, entities_component_1, entities_component_2, groq_client
+    chunk, entities_component_1, entities_component_2, groq_client
 ):
     """
     Check for connecting relation between entities of two components.
     """
     SYS_PROMPT = (
         "Only answer in JSON format. \n"
-        "Your task is to help create a knowledge graph by extracting one more relation between any entity of list_1 with any entity of list_2.\n"
-        "We want to connect the subgraphs of nodes and relations that were extracted from the given text chunk (delimited by ```)."
-        "For this one more relation needs to be extracted from the given text chunk between any entity of list_1 and list_2:\n"
+        "Your task is to help create a knowledge graph by extracting one more relation between any entity of list_1 "
+        "with any entity of list_2.\n "
+        "We want to connect the subgraphs of nodes and relations that were extracted from the given text chunk ("
+        "delimited by ```). "
+        "For this one more relation needs to be extracted from the given text chunk between any entity of list_1 and "
+        "list_2:\n "
         f"list_1: {entities_component_1}\n"
         f"list_2: {entities_component_2}\n"
         "Only use the exact entities given in the lists."
@@ -103,7 +107,7 @@ def check_for_connecting_relation(
 
 
 def check_for_connecting_relation_(
-        text_chunk, entities_component_1, entities_component_2
+    text_chunk, entities_component_1, entities_component_2
 ):
     """
     Takes a text chunk, and two lists of entities (from each component in the graph)
@@ -116,7 +120,7 @@ def check_for_connecting_relation_(
         The text chunk to be proccessed
     entities_component_1 : list
         List of entities
-    entities_component_1 : list
+    entities_component_2 : list
         List of entities
 
     Returns
 
@@ -1,6 +1,7 @@
 import os
-from langchain_text_splitters import RecursiveCharacterTextSplitter
+
 from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
 def process_pdf_into_chunks(filename):
@@ -10,7 +11,7 @@ def process_pdf_into_chunks(filename):
     Parameters
     ----------
     filename : str
-        The name of the pdf file to be proccessed
+        The name of the pdf file to be processed
 
     Returns
     -------