Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ models/*
.cache/*
**/__pycache__/*
.idea/*
**/benchmarks - Backup.csv
eval_res/*.csv
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Using official ubuntu image as a parent image
FROM ubuntu:latest
FROM ubuntu:jammy

# Setting the working directory to /app
WORKDIR /Smore
Expand All @@ -16,7 +16,7 @@ RUN apt-get install --assume-yes --no-install-recommends --quiet \
curl \
gcc \
build-essential \
python3-dev\
python3.10-dev\
python3-pip \
wget \
unzip \
Expand Down
1,470 changes: 327 additions & 1,143 deletions benchmarks/benchmarks.csv

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion lib/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
random.seed(SEED)


AE = True
# AE = True
AE = False


# Credentials for Google Sheet API
Expand Down
14 changes: 10 additions & 4 deletions lib/nlp/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def __init__(self, disable=False, debug=False):
self.sbert_cache: Dict[str, Any] = {}
self.byte_tokenizer = ByteLevel()
self.gpt3 = GPT(os.getenv("OPENAI_API_KEY"))
self.gpt3_entity_model = 'text-davinci-003'
# self.gpt3_entity_model = 'text-davinci-003'
self.gpt3_entity_model = 'gpt-3.5-turbo'
self.datamuse = Datamuse()
# self.wiki = wikipediaapi.Wikipedia('en')
self.wiki = None
Expand All @@ -82,13 +83,13 @@ def write_cache(self):
Following are gpt-3 related
probably need to add something else
"""
def call_gpt(self, prompt: str, temperature=0, max_token=256, stop=None, model='text-davinci-003') -> Dict:
def call_gpt(self, prompt: str, temperature=0, max_token=256, stop=None, model='gpt-3.5-turbo') -> Dict:
if temperature == 0:
return self.call_gpt_deterministic(prompt, max_token, stop, model)
else:
return self.gpt3.call(prompt, max_token, temperature, stop, model)

@cache(ignore_args=[0])
# @cache(ignore_args=[0])
def call_gpt_deterministic(self, prompt: str, max_token: int, stop, model: str) -> Dict:
if not config.AE:
response = self.gpt3.call(prompt, max_token, 0, stop, model)
Expand Down Expand Up @@ -541,7 +542,12 @@ def get_entity(self, context: str) -> Dict[str, List[Tuple[str, MatchSpan]]]:
if 'TIME' not in ents_dict and time_match:
if '-' in context and ':' in context:
split_string = context.split(' ')
time_str = find_date(split_string[1])[0][0]
idx = -1
if len(split_string) >= 2:
idx = 1
else:
idx = 0
time_str = find_date(split_string[idx])[0][0]
time_start_idx = context.index(time_str)
time_span = (time_start_idx, time_start_idx + len(time_str))
matched_spans.append(time_span)
Expand Down
4 changes: 2 additions & 2 deletions lib/sketch_gen/sketch_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(self, mode: str, executor: Executor, no_type: bool, no_decomp: bool
self.executor = executor
self.nlp_func = executor.nlp_engine
self.gpt_func = {
'code-davinci-002': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'code-davinci-002'),
'text-davinci-003': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'text-davinci-003'),
# 'code-davinci-002': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'code-davinci-002'),
# 'text-davinci-003': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'text-davinci-003'),
'gpt-3.5-turbo': lambda x: self.nlp_func.call_gpt3_and_get_answer(x, self.mode, 'gpt-3.5-turbo')
}

Expand Down
4 changes: 3 additions & 1 deletion lib/utils/csv_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import sys
from typing import Dict, List

csv.field_size_limit(sys.maxsize)
# print(sys.maxsize)
# 2147483647
csv.field_size_limit(2147483647)


def get_column_list_from_dict(data: List[Dict], col_name: str) -> List:
Expand Down
13 changes: 7 additions & 6 deletions run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse_args():
print('args: {}'.format(args))

# read benchmark here first
benchmark_engine = BenchmarkResultEngine(args.eval_sheet_name, args.eval_worksheet_name, benchmark_size=50)
benchmark_engine = BenchmarkResultEngine(args.eval_sheet_name, args.eval_worksheet_name, benchmark_size=8)

if args.test_sketch_gen:
# test sketch generator with different prompt version and model
Expand Down Expand Up @@ -226,8 +226,8 @@ def parse_args():
print('Evaluating benchmark {}'.format(benchmark.bid))
synthesizer = TopLevelSynthesizer(executor, args.depth, args.no_type, args.no_type_system, args.no_decomp, args.no_repair, args.prompt_version)
executor.context = {}
signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(args.timeout)
# signal.signal(signal.SIGALRM, handle_timeout)
if(not 'nt' in os.name): signal.alarm(args.timeout)
try:
if args.no_sketch:
res = synthesizer.synthesize_no_sketch(benchmark.task)
Expand All @@ -236,12 +236,13 @@ def parse_args():
res.bid = benchmark.bid
except TimeoutError:
res = EvalRes(benchmark.task, 'TIMEOUT', args.timeout, 0, '', '', benchmark.bid)
signal.alarm(0)
if(not 'nt' in os.name): signal.alarm(0)
except TimeOutException:
res = EvalRes(benchmark.task, 'TIMEOUT', args.timeout, 0, '', '', benchmark.bid)
signal.alarm(0)
if(not 'nt' in os.name): signal.alarm(0)
finally:
signal.alarm(0)
pass
if(not 'nt' in os.name): signal.alarm(0)

results.append(res)
except StopIteration:
Expand Down