Skip to content

Commit 9a233f8

Browse files
committed
Fix adhoc function call figure
1 parent 7f0587f commit 9a233f8

7 files changed

+230
-96
lines changed

artifact/1_run_all.sh

+40-40
Original file line numberDiff line numberDiff line change
@@ -13,58 +13,58 @@ if [ -z "$n_runs" ]; then
1313
fi
1414

1515
# Translation
16-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
17-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
18-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100
16+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
17+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
18+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100
1919

20-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
21-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
22-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability --batch_size 1 2 5 10 20 50 100
20+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
21+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
22+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability --batch_size 1 2 5 10 20 50 100
2323

24-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
25-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
26-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability --batch_size 1 2 5 10 20 50 100
24+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
25+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
26+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model gpt-3.5-finetuned --policy_types reachability --batch_size 1 2 5 10 20 50 100
2727

28-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
29-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
30-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100
28+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
29+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
30+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-7b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100
3131

32-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-13b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
33-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-13b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
34-
python3 ../netconfeval/step_1_formal_spec_translation.py --n_run $n_runs --model codellama-13b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100
32+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-13b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
33+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-13b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
34+
python3 ../netconfeval/step_1_formal_spec_translation.py --results_path $(pwd)/results_spec_translation --n_run $n_runs --model codellama-13b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100
3535

3636
# Conflict Detection
37-
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
38-
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --model gpt-4 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
39-
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
40-
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --n_run $n_runs --combined --batch_size 1 3 11 33
37+
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --results_path $(pwd)/results_conflict_detection --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
38+
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --results_path $(pwd)/results_conflict_detection --model gpt-4 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
39+
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --results_path $(pwd)/results_conflict_detection --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --n_run $n_runs --batch_size 1 3 11 33
40+
python3 ../netconfeval/step_1_formal_spec_conflict_detection.py --results_path $(pwd)/results_conflict_detection --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --n_run $n_runs --combined --batch_size 1 3 11 33
4141

4242
# Conflict Distance
43-
python3 ../netconfeval/step_1_formal_spec_conflict_distance.py --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --batch_size 11
43+
python3 ../netconfeval/step_1_formal_spec_conflict_distance.py --results_path $(pwd)/results_conflict_distance --n_run $n_runs --model gpt-3.5-0613 --policy_types reachability waypoint loadbalancing --batch_size 11
4444

4545
# Function Call
46-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
47-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
48-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100
46+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33
47+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50
48+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100
4949

50-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
51-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
52-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
53-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
54-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
55-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
56-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
57-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
58-
python3 ../netconfeval/step_1_function_call.py --n_run $n_runs --model codellama-7b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
50+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
51+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
52+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-4-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
53+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
54+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
55+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model gpt-3.5-1106 --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
56+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint loadbalancing --batch_size 1 3 11 33 --adhoc
57+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model codellama-7b-instruct --policy_types reachability waypoint --batch_size 1 2 5 10 25 50 --adhoc
58+
python3 ../netconfeval/step_1_function_call.py --results_path $(pwd)/results_function_call --n_run $n_runs --model codellama-7b-instruct --policy_types reachability --batch_size 1 2 5 10 20 50 100 --adhoc
5959

6060
# Code Generation
61-
python3 ../netconfeval/step_2_code_gen.py --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10
62-
python3 ../netconfeval/step_2_code_gen.py --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --feedback
63-
python3 ../netconfeval/step_2_code_gen.py --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --prompts no_detail
64-
python3 ../netconfeval/step_2_code_gen.py --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --prompts no_detail --feedback
61+
python3 ../netconfeval/step_2_code_gen.py --results_path $(pwd)/results_code_gen --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10
62+
python3 ../netconfeval/step_2_code_gen.py --results_path $(pwd)/results_code_gen --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --feedback
63+
python3 ../netconfeval/step_2_code_gen.py --results_path $(pwd)/results_code_gen --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --prompts no_detail
64+
python3 ../netconfeval/step_2_code_gen.py --results_path $(pwd)/results_code_gen --model gpt-4-1106 --n_run $n_runs --policy_types shortest_path reachability waypoint loadbalancing --n_retries 10 --prompts no_detail --feedback
6565

6666
# Low-Level Configurations
67-
python3 ../netconfeval/step_3_low_level.py --n_run $n_runs --model gpt-4-1106 --mode none
68-
python3 ../netconfeval/step_3_low_level.py --n_run $n_runs --model gpt-4-1106 --mode full
69-
python3 ../netconfeval/step_3_low_level.py --n_run $n_runs --model gpt-4-1106 --mode idx
70-
python3 ../netconfeval/step_3_low_level.py --n_run $n_runs --model gpt-4-1106 --mode rag --rag_chunk_size 9000
67+
python3 ../netconfeval/step_3_low_level.py --results_path $(pwd)/results_low_level --n_run $n_runs --model gpt-4-1106 --mode none
68+
python3 ../netconfeval/step_3_low_level.py --results_path $(pwd)/results_low_level --n_run $n_runs --model gpt-4-1106 --mode full
69+
python3 ../netconfeval/step_3_low_level.py --results_path $(pwd)/results_low_level --n_run $n_runs --model gpt-4-1106 --mode idx
70+
python3 ../netconfeval/step_3_low_level.py --results_path $(pwd)/results_low_level --n_run $n_runs --model gpt-4-1106 --mode rag --rag_chunk_size 9000

artifact/2_plot_all.sh

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
#!/bin/bash
22

33
# Translation
4-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models gpt --policy_types reachability
5-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models gpt --policy_types reachability waypoint
6-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models gpt --policy_types reachability waypoint loadbalancing
7-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models codellama --policy_types reachability
8-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models codellama --policy_types reachability waypoint
9-
python3 plot/step_1_plot_formal_spec_translation.py --results_path ../results_spec_translation --figures_path figures --models codellama --policy_types reachability loadbalancing
4+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models gpt --policy_types reachability
5+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models gpt --policy_types reachability waypoint
6+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models gpt --policy_types reachability waypoint loadbalancing
7+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models codellama --policy_types reachability
8+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models codellama --policy_types reachability waypoint
9+
python3 plot/step_1_plot_formal_spec_translation.py --results_path results_spec_translation --figures_path figures --models codellama --policy_types reachability loadbalancing
1010

1111
# Conflict Detection
12-
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path ../results_conflict_detection --figures_path figures --metric f1_score --policy_types reachability waypoint loadbalancing
13-
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path ../results_conflict_detection --figures_path figures --metric recall --policy_types reachability waypoint loadbalancing
14-
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path ../results_conflict_detection --figures_path figures --combined --metric f1_score --policy_types reachability waypoint loadbalancing
12+
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path results_conflict_detection --figures_path figures --metric f1_score --policy_types reachability waypoint loadbalancing
13+
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path results_conflict_detection --figures_path figures --metric recall --policy_types reachability waypoint loadbalancing
14+
python3 plot/step_1_plot_formal_spec_conflict_detection.py --results_path results_conflict_detection --figures_path figures --combined --metric f1_score --policy_types reachability waypoint loadbalancing
1515

1616
# Conflict Distance
17-
python3 plot/step_1_plot_formal_spec_conflict_heatmap.py --results_path ../results_conflict_distance --figures_path figures --policy_types reachability waypoint loadbalancing --model gpt-3.5-0613
17+
python3 plot/step_1_plot_formal_spec_conflict_heatmap.py --results_path results_conflict_distance --figures_path figures --policy_types reachability waypoint loadbalancing --model gpt-3.5-0613
1818

1919
# Function Call
20-
python3 plot/step_1_plot_function_call.py --results_path ../results_function_call --figures_path figures --model gpt-4-1106 --type native
21-
python3 plot/step_1_plot_function_call.py --results_path ../results_function_call --figures_path figures --model gpt-4-1106 --type adhoc
20+
python3 plot/step_1_plot_function_call_native.py --results_path results_function_call --figures_path figures --model gpt-4-1106
21+
python3 plot/step_1_plot_function_call_adhoc.py --results_path results_function_call --figures_path figures
2222

2323
# Code Generation
24-
python3 plot/step_2_plot_code_gen.py --results_path ../results_code_gen --figures_path figures --model gpt-4-1106
24+
python3 plot/step_2_plot_code_gen.py --results_path results_code_gen --figures_path figures --model gpt-4-1106
2525

2626
# Low-Level Configurations
27-
python3 plot/step_3_plot_low_level.py --results_path ../results_low_level --figures_path figures --model gpt-4-1106
27+
python3 plot/step_3_plot_low_level.py --results_path results_low_level --figures_path figures --model gpt-4-1106

artifact/plot/step_1_plot_formal_spec_conflict_heatmap.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
def _plot(results_path: str, figures_path: str, requirements: SortedSet, model: str) -> None:
1818
requirements_str = "_".join(requirements)
1919

20+
corr = np.corrcoef([[0] * 34 for _ in range(34)])
2021
results = step_1_conflict_distance_extract(results_path, requirements, model)
21-
corr = np.corrcoef(results)
2222
mask = np.tril(np.full_like(corr, 0))
2323

2424
ax = sns.heatmap(

0 commit comments

Comments
 (0)