-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
172 lines (139 loc) · 7.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import configparser
from loguru import logger
from dotenv import load_dotenv
from equator_qa.charting import create_basic_charts
from equator_qa.utils import begin_benchmark, cleanup_chromadb
from equator_qa.vectordb import VectorDB_Controller
import load_images
import create_vision_json
from datetime import datetime
import sys
def validate_config(config):
"""
Validates the presence of required sections and options in the config.ini file.
Raises a ValueError if any required configuration is missing.
"""
required_sections = {
"rounds": ["answer_rounds"],
"BENCHMARK_NAME": ["benchmark_name"],
"evaluator_models": ["GROQ_EVALUATOR_MODEL", "OLLAMA_EVALUATOR_MODEL"],
"execution_steps": ["EXECUTION_STEPS"],
"student_models": ["STUDENT_OPENROUTER_MODELS", "STUDENT_GROQ_MODELS", "STUDENT_OLLAMA_MODELS"],
"vision": ["VISION_DB"],
"parquet": ["PARQUET"],
"keep_vector_db": ["KEEP_VECTOR_DB"],
}
for section, keys in required_sections.items():
if not config.has_section(section):
logger.error(f"Missing required section: [{section}] in config.ini")
raise ValueError(f"Missing required section: [{section}] in config.ini")
for key in keys:
if not config.has_option(section, key):
logger.error(f"Missing required option '{key}' in section [{section}] in config.ini")
raise ValueError(f"Missing required option '{key}' in section [{section}] in config.ini")
def main():
"""
Main function to execute benchmarking and statistical analysis based on provided steps.
Parameters:
execution_steps (list, optional): A list of steps to execute. If None, defaults to a predefined list.
"""
# Initialize the parser
config = configparser.ConfigParser()
# Read the INI file
config.read("config.ini")
# Load environment variables from the .env file to configure the application
load_dotenv()
# Validate the configuration
try:
validate_config(config)
except ValueError as e:
logger.error(str(e))
sys.exit(1)
# Capture the current date in YYYY-MM-DD format for logging and organizing results
date_now = datetime.now().strftime("%Y-%m-%d")
# Define the evaluator models used for benchmarking
GROQ_EVALUATOR_MODEL = config["evaluator_models"]["GROQ_EVALUATOR_MODEL"]
OLLAMA_EVALUATOR_MODEL = config["evaluator_models"]["OLLAMA_EVALUATOR_MODEL"]
# Use configparser's getboolean method for boolean values
try:
VISION_DB = config.getboolean("vision", "VISION_DB")
PARQUET = config.getboolean("parquet", "PARQUET")
KEEP_VECTOR_DB = config.getboolean("keep_vector_db", "KEEP_VECTOR_DB")
except ValueError as e:
logger.error(f"Boolean conversion error: {e}")
sys.exit(1)
if not KEEP_VECTOR_DB:
cleanup_chromadb()
# Number of times each question will be posed to the models
try:
ANSWER_ROUNDS = config.getint("rounds", "answer_rounds")
except ValueError:
logger.error("Invalid value for 'answer_rounds'. It must be an integer.")
sys.exit(1)
# Identifier name for the current benchmark run
BENCHMARK_NAME = config["BENCHMARK_NAME"]["benchmark_name"]
if VISION_DB and PARQUET:
load_images.main()
create_vision_json.create_vision_benchmark_json()
# Parse comma-separated execution steps into a list
EXECUTION_STEPS = [step.strip() for step in config["execution_steps"]["EXECUTION_STEPS"].split(",")]
# Parse student models
STUDENT_OPENROUTER_MODELS = [
model.strip() for model in config["student_models"]["STUDENT_OPENROUTER_MODELS"].split(",") if model.strip()
]
STUDENT_GROQ_MODELS = [
model.strip() for model in config["student_models"]["STUDENT_GROQ_MODELS"].split(",") if model.strip()
]
STUDENT_OLLAMA_MODELS = [
model.strip() for model in config["student_models"]["STUDENT_OLLAMA_MODELS"].split(",") if model.strip()
]
# Flags to track whether certain steps have been executed
benchmark_executed = False # Indicates if benchmarking has been performed
statistics_generated = False # Indicates if statistical analysis has been performed
# Iterate over each step provided in execution_steps to perform corresponding actions
for step in EXECUTION_STEPS:
if step != "generate_statistics":
# Log the start of the benchmarking process
logger.info("Program started successfully.")
# Initialize the VectorDB_Controller with a parameter to determine if the vector DB should be kept
vectordb_instance = VectorDB_Controller(keepVectorDB=KEEP_VECTOR_DB, VISION=VISION_DB)
# Begin the benchmarking process with the specified configurations and models
begin_benchmark(
VISION_DB,
EXECUTION_STEPS, # List of execution steps
STUDENT_OLLAMA_MODELS, # List of Ollama student models
STUDENT_GROQ_MODELS, # List of GROQ student models
STUDENT_OPENROUTER_MODELS, # List of OpenRouter student models
OLLAMA_EVALUATOR_MODEL, # Ollama evaluator model
GROQ_EVALUATOR_MODEL, # GROQ evaluator model
vectordb_instance, # Instance of VectorDB_Controller
BENCHMARK_NAME, # Name of the benchmark
date_now, # Current date
ANSWER_ROUNDS, # Number of answer rounds
)
# Log the initiation of the benchmarking process
logger.info("Starting initialization.")
# Update the flag to indicate that benchmarking has been executed
benchmark_executed = True
elif step == "generate_statistics":
# Log the start of the statistical analysis step
logger.info("Executing generate_statistics step")
# Generate basic statistical charts based on the execution steps and benchmarking results
create_basic_charts(EXECUTION_STEPS, ANSWER_ROUNDS, BENCHMARK_NAME, date_now)
# Bind the logger to a specific context ('equator=True') and log the successful generation of statistics
logger.bind(equator=True).info("Statistics generated successfully.")
# Print a confirmation message to the console
print("-- DONE STATS --\n")
# Update the flag to indicate that statistical analysis has been executed
statistics_generated = True
# After processing all steps, check if any recognized steps were executed
if not benchmark_executed and not statistics_generated:
# Log a warning indicating that no valid execution steps were provided
logger.warning("No execution steps were provided or recognized.")
# Entry point of the script
if __name__ == "__main__":
main() # Invoke the main function when the script is run directly
# Note:
# Do NOT run Ollama on the same machine without Docker.
# You can run Ollama on separate machines remotely with or without Docker.
# To use a remote instance, update the URL and port.