utopia-group · RoManInv · Sep 2, 2024 · Sep 19, 2024 · Sep 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@ models/*
 .cache/*
 **/__pycache__/*
 .idea/*
+**/benchmarks - Backup.csv
+eval_res/*.csv
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # Using official ubuntu image as a parent image
-FROM ubuntu:latest
+FROM ubuntu:jammy
 
 # Setting the working directory to /app
 WORKDIR /Smore
@@ -16,7 +16,7 @@ RUN apt-get install --assume-yes --no-install-recommends --quiet \
     curl \
     gcc \
     build-essential \
-    python3-dev\
+    python3.10-dev\
     python3-pip \
     wget \
     unzip \

diff --git a/benchmarks/benchmarks.csv b/benchmarks/benchmarks.csv
diff --git a/lib/config/config.py b/lib/config/config.py
@@ -8,7 +8,8 @@
 random.seed(SEED)
 
 
-AE = True
+# AE = True
+AE = False
 
 
 # Credentials for Google Sheet API

diff --git a/lib/nlp/nlp.py b/lib/nlp/nlp.py
@@ -57,7 +57,8 @@ def __init__(self, disable=False, debug=False):
         self.sbert_cache: Dict[str, Any] = {}
         self.byte_tokenizer = ByteLevel()
         self.gpt3 = GPT(os.getenv("OPENAI_API_KEY"))
-        self.gpt3_entity_model = 'text-davinci-003'
+        # self.gpt3_entity_model = 'text-davinci-003'
+        self.gpt3_entity_model = 'gpt-3.5-turbo'
         self.datamuse = Datamuse()
         # self.wiki = wikipediaapi.Wikipedia('en')
         self.wiki = None
@@ -82,13 +83,13 @@ def write_cache(self):
     Following are gpt-3 related
     probably need to add something else 
     """
-    def call_gpt(self, prompt: str, temperature=0, max_token=256, stop=None, model='text-davinci-003') -> Dict:
+    def call_gpt(self, prompt: str, temperature=0, max_token=256, stop=None, model='gpt-3.5-turbo') -> Dict:
         if temperature == 0:
             return self.call_gpt_deterministic(prompt, max_token, stop, model)
         else:
             return self.gpt3.call(prompt, max_token, temperature, stop, model)
 
-    @cache(ignore_args=[0])
+    # @cache(ignore_args=[0])
     def call_gpt_deterministic(self, prompt: str, max_token: int, stop, model: str) -> Dict:
         if not config.AE:
             response = self.gpt3.call(prompt, max_token, 0, stop, model)
@@ -541,7 +542,12 @@ def get_entity(self, context: str) -> Dict[str, List[Tuple[str, MatchSpan]]]:
             if 'TIME' not in ents_dict and time_match:
                 if '-' in context and ':' in context:
                     split_string = context.split(' ')
-                    time_str = find_date(split_string[1])[0][0]
+                    idx = -1
+                    if len(split_string) >= 2:
+                        idx = 1
+                    else:
+                        idx = 0
+                    time_str = find_date(split_string[idx])[0][0]
                     time_start_idx = context.index(time_str)
                     time_span = (time_start_idx, time_start_idx + len(time_str))
                     matched_spans.append(time_span)

diff --git a/lib/sketch_gen/sketch_gen.py b/lib/sketch_gen/sketch_gen.py
@@ -30,8 +30,8 @@ def __init__(self, mode: str, executor: Executor, no_type: bool, no_decomp: bool
         self.executor = executor
         self.nlp_func = executor.nlp_engine
         self.gpt_func = {
-            'code-davinci-002': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'code-davinci-002'),
-            'text-davinci-003': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'text-davinci-003'),
+            # 'code-davinci-002': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'code-davinci-002'),
+            # 'text-davinci-003': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'text-davinci-003'),
             'gpt-3.5-turbo': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'gpt-3.5-turbo')
         }
 

diff --git a/lib/utils/csv_utils.py b/lib/utils/csv_utils.py
@@ -8,7 +8,9 @@
 import sys
 from typing import Dict, List
 
-csv.field_size_limit(sys.maxsize)
+# print(sys.maxsize)
+# 2147483647
+csv.field_size_limit(2147483647)
 
 
 def get_column_list_from_dict(data: List[Dict], col_name: str) -> List:

diff --git a/run_eval.py b/run_eval.py
@@ -67,7 +67,7 @@ def parse_args():
     print('args: {}'.format(args))
 
     # read benchmark here first
-    benchmark_engine = BenchmarkResultEngine(args.eval_sheet_name, args.eval_worksheet_name, benchmark_size=50)
+    benchmark_engine = BenchmarkResultEngine(args.eval_sheet_name, args.eval_worksheet_name, benchmark_size=8)
 
     if args.test_sketch_gen:
         # test sketch generator with different prompt version and model
@@ -226,8 +226,8 @@ def parse_args():
                 print('Evaluating benchmark {}'.format(benchmark.bid))
                 synthesizer = TopLevelSynthesizer(executor, args.depth, args.no_type, args.no_type_system, args.no_decomp, args.no_repair, args.prompt_version)
                 executor.context = {}
-                signal.signal(signal.SIGALRM, handle_timeout)
-                signal.alarm(args.timeout)
+                # signal.signal(signal.SIGALRM, handle_timeout)
+                if(not 'nt' in os.name): signal.alarm(args.timeout)
                 try:
                     if args.no_sketch:
                         res = synthesizer.synthesize_no_sketch(benchmark.task)
@@ -236,12 +236,13 @@ def parse_args():
                     res.bid = benchmark.bid
                 except TimeoutError:
                     res = EvalRes(benchmark.task, 'TIMEOUT', args.timeout, 0, '', '', benchmark.bid)
-                    signal.alarm(0)
+                    if(not 'nt' in os.name): signal.alarm(0)
                 except TimeOutException:
                     res = EvalRes(benchmark.task, 'TIMEOUT', args.timeout, 0, '', '', benchmark.bid)
-                    signal.alarm(0)
+                    if(not 'nt' in os.name): signal.alarm(0)
                 finally:
-                    signal.alarm(0)
+                    pass
+                    if(not 'nt' in os.name): signal.alarm(0)
 
                 results.append(res)
             except StopIteration: