Skip to content

Commit b50d163

Browse files
authored
[Fix] Refactor Needlebench Configs for CLI Testing Support (#1020)
* add needlebench datasets suffix * fix import * update run.py args for summarizer key and dataset suffix * update utils/run.py
1 parent 2d4e559 commit b50d163

33 files changed

+287
-276
lines changed

Diff for: configs/datasets/needlebench/needlebench.py

-11
This file was deleted.

Diff for: configs/datasets/needlebench/needlebench_1000k/needlebench.py

-18
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from mmengine.config import read_base
2+
3+
with read_base():
4+
from .needlebench_multi_reasoning_1000k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
5+
from .needlebench_multi_reasoning_1000k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
6+
from .needlebench_multi_reasoning_1000k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
7+
from .needlebench_multi_reasoning_1000k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
8+
from .needlebench_multi_reasoning_1000k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
9+
from .needlebench_multi_reasoning_1000k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
10+
from .needlebench_multi_reasoning_1000k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
11+
from .needlebench_multi_reasoning_1000k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
12+
13+
from .needlebench_single_1000k import needlebench_en_datasets as needlebench_origin_en_datasets
14+
from .needlebench_single_1000k import needlebench_zh_datasets as needlebench_origin_zh_datasets
15+
from .needlebench_multi_retrieval_1000k import needlebench_en_datasets as needlebench_parallel_en_datasets
16+
from .needlebench_multi_retrieval_1000k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
17+
18+
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

Diff for: configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning.py renamed to configs/datasets/needlebench/needlebench_1000k/needlebench_multi_reasoning_1000k.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def generate_depth_percents(intervals, interval_type):
6262
needle_file_name = 'multi_needle_reasoning_en.json'
6363
diff = 10
6464
num_needles = 2
65-
needlebench_datasets_2needle_en = []
65+
needlebench_2needle_en_datasets = []
6666
language = 'English'
6767

6868
for original_context_length in context_lengths:
@@ -87,10 +87,10 @@ def generate_depth_percents(intervals, interval_type):
8787
'infer_cfg': needlebench_infer_cfg,
8888
'eval_cfg': needlebench_eval_cfg
8989
}
90-
needlebench_datasets_2needle_en.append(dataset_dict)
90+
needlebench_2needle_en_datasets.append(dataset_dict)
9191

9292
num_needles = 3
93-
needlebench_datasets_3needle_en = []
93+
needlebench_3needle_en_datasets = []
9494

9595
for original_context_length in context_lengths:
9696
for depth_percent in depths_list:
@@ -114,10 +114,10 @@ def generate_depth_percents(intervals, interval_type):
114114
'infer_cfg': needlebench_infer_cfg,
115115
'eval_cfg': needlebench_eval_cfg
116116
}
117-
needlebench_datasets_3needle_en.append(dataset_dict)
117+
needlebench_3needle_en_datasets.append(dataset_dict)
118118

119119
num_needles = 4
120-
needlebench_datasets_4needle_en = []
120+
needlebench_4needle_en_datasets = []
121121

122122
for original_context_length in context_lengths:
123123
for depth_percent in depths_list:
@@ -141,10 +141,10 @@ def generate_depth_percents(intervals, interval_type):
141141
'infer_cfg': needlebench_infer_cfg,
142142
'eval_cfg': needlebench_eval_cfg
143143
}
144-
needlebench_datasets_4needle_en.append(dataset_dict)
144+
needlebench_4needle_en_datasets.append(dataset_dict)
145145

146146
num_needles = 5
147-
needlebench_datasets_5needle_en = []
147+
needlebench_5needle_en_datasets = []
148148

149149
for original_context_length in context_lengths:
150150
for depth_percent in depths_list:
@@ -168,7 +168,7 @@ def generate_depth_percents(intervals, interval_type):
168168
'infer_cfg': needlebench_infer_cfg,
169169
'eval_cfg': needlebench_eval_cfg
170170
}
171-
needlebench_datasets_5needle_en.append(dataset_dict)
171+
needlebench_5needle_en_datasets.append(dataset_dict)
172172

173173
# ----------Chinese Version----------
174174
base_path = './data/needlebench'
@@ -177,7 +177,7 @@ def generate_depth_percents(intervals, interval_type):
177177
needle_file_name = 'multi_needle_reasoning_zh.json'
178178
diff = 10
179179
num_needles = 2
180-
needlebench_datasets_2needle_zh = []
180+
needlebench_2needle_zh_datasets = []
181181
language = 'Chinese'
182182

183183
for original_context_length in context_lengths:
@@ -202,10 +202,10 @@ def generate_depth_percents(intervals, interval_type):
202202
'infer_cfg': needlebench_infer_cfg,
203203
'eval_cfg': needlebench_eval_cfg
204204
}
205-
needlebench_datasets_2needle_zh.append(dataset_dict)
205+
needlebench_2needle_zh_datasets.append(dataset_dict)
206206

207207
num_needles = 3
208-
needlebench_datasets_3needle_zh = []
208+
needlebench_3needle_zh_datasets = []
209209

210210
for original_context_length in context_lengths:
211211
for depth_percent in depths_list:
@@ -229,10 +229,10 @@ def generate_depth_percents(intervals, interval_type):
229229
'infer_cfg': needlebench_infer_cfg,
230230
'eval_cfg': needlebench_eval_cfg
231231
}
232-
needlebench_datasets_3needle_zh.append(dataset_dict)
232+
needlebench_3needle_zh_datasets.append(dataset_dict)
233233

234234
num_needles = 4
235-
needlebench_datasets_4needle_zh = []
235+
needlebench_4needle_zh_datasets = []
236236

237237
for original_context_length in context_lengths:
238238
for depth_percent in depths_list:
@@ -256,10 +256,10 @@ def generate_depth_percents(intervals, interval_type):
256256
'infer_cfg': needlebench_infer_cfg,
257257
'eval_cfg': needlebench_eval_cfg
258258
}
259-
needlebench_datasets_4needle_zh.append(dataset_dict)
259+
needlebench_4needle_zh_datasets.append(dataset_dict)
260260

261261
num_needles = 5
262-
needlebench_datasets_5needle_zh = []
262+
needlebench_5needle_zh_datasets = []
263263

264264
for original_context_length in context_lengths:
265265
for depth_percent in depths_list:
@@ -283,4 +283,4 @@ def generate_depth_percents(intervals, interval_type):
283283
'infer_cfg': needlebench_infer_cfg,
284284
'eval_cfg': needlebench_eval_cfg
285285
}
286-
needlebench_datasets_5needle_zh.append(dataset_dict)
286+
needlebench_5needle_zh_datasets.append(dataset_dict)

Diff for: configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval.py renamed to configs/datasets/needlebench/needlebench_1000k/needlebench_multi_retrieval_1000k.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def generate_depth_percents(intervals, interval_type):
5858

5959
base_path = './data/needlebench'
6060
file_list = ['PaulGrahamEssays.jsonl']
61-
needlebench_datasets_en = []
61+
needlebench_en_datasets = []
6262
needle_file_name = 'needles.jsonl'
6363
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
6464

@@ -81,10 +81,10 @@ def generate_depth_percents(intervals, interval_type):
8181
'infer_cfg': needlebench_infer_cfg,
8282
'eval_cfg': needlebench_eval_cfg
8383
}
84-
needlebench_datasets_en.append(dataset_dict)
84+
needlebench_en_datasets.append(dataset_dict)
8585

8686
file_list = ['zh_finance.jsonl']
87-
needlebench_datasets_zh = []
87+
needlebench_zh_datasets = []
8888

8989
for original_context_length in context_lengths:
9090
dataset_dict = {
@@ -105,4 +105,4 @@ def generate_depth_percents(intervals, interval_type):
105105
'infer_cfg': needlebench_infer_cfg,
106106
'eval_cfg': needlebench_eval_cfg
107107
}
108-
needlebench_datasets_zh.append(dataset_dict)
108+
needlebench_zh_datasets.append(dataset_dict)

Diff for: configs/datasets/needlebench/needlebench_1000k/needlebench_single.py renamed to configs/datasets/needlebench/needlebench_1000k/needlebench_single_1000k.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def generate_depth_percents(intervals, interval_type):
5757

5858
base_path = './data/needlebench'
5959
file_list = ['PaulGrahamEssays.jsonl']
60-
needlebench_datasets_en = []
60+
needlebench_en_datasets = []
6161
needle_file_name = 'needles.jsonl'
6262

6363
for original_context_length in context_lengths:
@@ -80,10 +80,10 @@ def generate_depth_percents(intervals, interval_type):
8080
'infer_cfg': needlebench_infer_cfg,
8181
'eval_cfg': needlebench_eval_cfg
8282
}
83-
needlebench_datasets_en.append(dataset_dict)
83+
needlebench_en_datasets.append(dataset_dict)
8484

8585
file_list = ['zh_finance.jsonl']
86-
needlebench_datasets_zh = []
86+
needlebench_zh_datasets = []
8787
needle_file_name = 'needles.jsonl'
8888

8989
for original_context_length in context_lengths:
@@ -106,4 +106,4 @@ def generate_depth_percents(intervals, interval_type):
106106
'infer_cfg': needlebench_infer_cfg,
107107
'eval_cfg': needlebench_eval_cfg
108108
}
109-
needlebench_datasets_zh.append(dataset_dict)
109+
needlebench_zh_datasets.append(dataset_dict)

Diff for: configs/datasets/needlebench/needlebench_128k/needlebench.py

-18
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from mmengine.config import read_base
2+
3+
with read_base():
4+
from .needlebench_multi_reasoning_128k import needlebench_2needle_en_datasets as needlebench_multi_2needle_en_datasets
5+
from .needlebench_multi_reasoning_128k import needlebench_3needle_en_datasets as needlebench_multi_3needle_en_datasets
6+
from .needlebench_multi_reasoning_128k import needlebench_4needle_en_datasets as needlebench_multi_4needle_en_datasets
7+
from .needlebench_multi_reasoning_128k import needlebench_5needle_en_datasets as needlebench_multi_5needle_en_datasets
8+
from .needlebench_multi_reasoning_128k import needlebench_2needle_zh_datasets as needlebench_multi_2needle_zh_datasets
9+
from .needlebench_multi_reasoning_128k import needlebench_3needle_zh_datasets as needlebench_multi_3needle_zh_datasets
10+
from .needlebench_multi_reasoning_128k import needlebench_4needle_zh_datasets as needlebench_multi_4needle_zh_datasets
11+
from .needlebench_multi_reasoning_128k import needlebench_5needle_zh_datasets as needlebench_multi_5needle_zh_datasets
12+
13+
from .needlebench_single_128k import needlebench_en_datasets as needlebench_origin_en_datasets
14+
from .needlebench_single_128k import needlebench_zh_datasets as needlebench_origin_zh_datasets
15+
from .needlebench_multi_retrieval_128k import needlebench_en_datasets as needlebench_parallel_en_datasets
16+
from .needlebench_multi_retrieval_128k import needlebench_zh_datasets as needlebench_parallel_zh_datasets
17+
18+
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])

Diff for: configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning.py renamed to configs/datasets/needlebench/needlebench_128k/needlebench_multi_reasoning_128k.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def generate_depth_percents(intervals, interval_type):
6464
needle_file_name = 'multi_needle_reasoning_en.json'
6565
diff = 10
6666
num_needles = 2
67-
needlebench_datasets_2needle_en = []
67+
needlebench_2needle_en_datasets = []
6868
language = 'English'
6969

7070
for original_context_length in context_lengths:
@@ -89,10 +89,10 @@ def generate_depth_percents(intervals, interval_type):
8989
'infer_cfg': needlebench_infer_cfg,
9090
'eval_cfg': needlebench_eval_cfg
9191
}
92-
needlebench_datasets_2needle_en.append(dataset_dict)
92+
needlebench_2needle_en_datasets.append(dataset_dict)
9393

9494
num_needles = 3
95-
needlebench_datasets_3needle_en = []
95+
needlebench_3needle_en_datasets = []
9696

9797
for original_context_length in context_lengths:
9898
for depth_percent in depths_list:
@@ -116,10 +116,10 @@ def generate_depth_percents(intervals, interval_type):
116116
'infer_cfg': needlebench_infer_cfg,
117117
'eval_cfg': needlebench_eval_cfg
118118
}
119-
needlebench_datasets_3needle_en.append(dataset_dict)
119+
needlebench_3needle_en_datasets.append(dataset_dict)
120120

121121
num_needles = 4
122-
needlebench_datasets_4needle_en = []
122+
needlebench_4needle_en_datasets = []
123123

124124
for original_context_length in context_lengths:
125125
for depth_percent in depths_list:
@@ -143,10 +143,10 @@ def generate_depth_percents(intervals, interval_type):
143143
'infer_cfg': needlebench_infer_cfg,
144144
'eval_cfg': needlebench_eval_cfg
145145
}
146-
needlebench_datasets_4needle_en.append(dataset_dict)
146+
needlebench_4needle_en_datasets.append(dataset_dict)
147147

148148
num_needles = 5
149-
needlebench_datasets_5needle_en = []
149+
needlebench_5needle_en_datasets = []
150150

151151
for original_context_length in context_lengths:
152152
for depth_percent in depths_list:
@@ -170,7 +170,7 @@ def generate_depth_percents(intervals, interval_type):
170170
'infer_cfg': needlebench_infer_cfg,
171171
'eval_cfg': needlebench_eval_cfg
172172
}
173-
needlebench_datasets_5needle_en.append(dataset_dict)
173+
needlebench_5needle_en_datasets.append(dataset_dict)
174174

175175
# ----------Chinese Version----------
176176
base_path = './data/needlebench'
@@ -179,7 +179,7 @@ def generate_depth_percents(intervals, interval_type):
179179
needle_file_name = 'multi_needle_reasoning_zh.json'
180180
diff = 10
181181
num_needles = 2
182-
needlebench_datasets_2needle_zh = []
182+
needlebench_2needle_zh_datasets = []
183183
language = 'Chinese'
184184

185185
for original_context_length in context_lengths:
@@ -204,10 +204,10 @@ def generate_depth_percents(intervals, interval_type):
204204
'infer_cfg': needlebench_infer_cfg,
205205
'eval_cfg': needlebench_eval_cfg
206206
}
207-
needlebench_datasets_2needle_zh.append(dataset_dict)
207+
needlebench_2needle_zh_datasets.append(dataset_dict)
208208

209209
num_needles = 3
210-
needlebench_datasets_3needle_zh = []
210+
needlebench_3needle_zh_datasets = []
211211

212212
for original_context_length in context_lengths:
213213
for depth_percent in depths_list:
@@ -231,10 +231,10 @@ def generate_depth_percents(intervals, interval_type):
231231
'infer_cfg': needlebench_infer_cfg,
232232
'eval_cfg': needlebench_eval_cfg
233233
}
234-
needlebench_datasets_3needle_zh.append(dataset_dict)
234+
needlebench_3needle_zh_datasets.append(dataset_dict)
235235

236236
num_needles = 4
237-
needlebench_datasets_4needle_zh = []
237+
needlebench_4needle_zh_datasets = []
238238

239239
for original_context_length in context_lengths:
240240
for depth_percent in depths_list:
@@ -258,10 +258,10 @@ def generate_depth_percents(intervals, interval_type):
258258
'infer_cfg': needlebench_infer_cfg,
259259
'eval_cfg': needlebench_eval_cfg
260260
}
261-
needlebench_datasets_4needle_zh.append(dataset_dict)
261+
needlebench_4needle_zh_datasets.append(dataset_dict)
262262

263263
num_needles = 5
264-
needlebench_datasets_5needle_zh = []
264+
needlebench_5needle_zh_datasets = []
265265

266266
for original_context_length in context_lengths:
267267
for depth_percent in depths_list:
@@ -285,4 +285,4 @@ def generate_depth_percents(intervals, interval_type):
285285
'infer_cfg': needlebench_infer_cfg,
286286
'eval_cfg': needlebench_eval_cfg
287287
}
288-
needlebench_datasets_5needle_zh.append(dataset_dict)
288+
needlebench_5needle_zh_datasets.append(dataset_dict)

Diff for: configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval.py renamed to configs/datasets/needlebench/needlebench_128k/needlebench_multi_retrieval_128k.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def generate_depth_percents(intervals, interval_type):
5858

5959
base_path = './data/needlebench'
6060
file_list = ['PaulGrahamEssays.jsonl']
61-
needlebench_datasets_en = []
61+
needlebench_en_datasets = []
6262
needle_file_name = 'needles.jsonl'
6363
depths = [0, 10, 21, 31, 42, 52, 63, 73, 84, 94, 100]
6464

@@ -81,10 +81,10 @@ def generate_depth_percents(intervals, interval_type):
8181
'infer_cfg': needlebench_infer_cfg,
8282
'eval_cfg': needlebench_eval_cfg
8383
}
84-
needlebench_datasets_en.append(dataset_dict)
84+
needlebench_en_datasets.append(dataset_dict)
8585

8686
file_list = ['zh_finance.jsonl']
87-
needlebench_datasets_zh = []
87+
needlebench_zh_datasets = []
8888

8989
for original_context_length in context_lengths:
9090
dataset_dict = {
@@ -105,4 +105,4 @@ def generate_depth_percents(intervals, interval_type):
105105
'infer_cfg': needlebench_infer_cfg,
106106
'eval_cfg': needlebench_eval_cfg
107107
}
108-
needlebench_datasets_zh.append(dataset_dict)
108+
needlebench_zh_datasets.append(dataset_dict)

0 commit comments

Comments
 (0)