4
4
"open-data" : " None" ,
5
5
"pass@1" : {
6
6
"instruct" : null ,
7
- "complete" : 41.7
7
+ "complete" : 38.73
8
8
},
9
- "prompted" : false ,
9
+ "prompted" : true ,
10
10
"size" : 34 ,
11
- "direct_complete" : true ,
11
+ "direct_complete" : false ,
12
12
"lazy" : false ,
13
13
"elo_mle" : 942
14
14
},
15
- "CodeLlama-13B-Python " : {
16
- "link" : " https://huggingface.co/codellama/CodeLlama-13b-hf " ,
15
+ "Meta-Llama-3-70B " : {
16
+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B " ,
17
17
"open-data" : " None" ,
18
18
"pass@1" : {
19
19
"instruct" : null ,
20
- "complete" : 40.0
20
+ "complete" : 48.98
21
21
},
22
22
"prompted" : false ,
23
- "size" : 13 ,
24
- "direct_complete" : true ,
23
+ "size" : 70 ,
24
+ "direct_complete" : false ,
25
+ "lazy" : false ,
26
+ "elo_mle" : 874
27
+ },
28
+ "Meta-Llama-3-70B-Instruct" : {
29
+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" ,
30
+ "open-data" : " None" ,
31
+ "pass@1" : {
32
+ "instruct" : null ,
33
+ "complete" : 62.45
34
+ },
35
+ "prompted" : true ,
36
+ "size" : 70 ,
37
+ "direct_complete" : false ,
38
+ "lazy" : false ,
39
+ "elo_mle" : 874
40
+ },
41
+ "Meta-Llama-3.1-70B-Instruct" : {
42
+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" ,
43
+ "open-data" : " None" ,
44
+ "pass@1" : {
45
+ "instruct" : null ,
46
+ "complete" : 60
47
+ },
48
+ "prompted" : true ,
49
+ "size" : 70 ,
50
+ "direct_complete" : false ,
25
51
"lazy" : false ,
26
52
"elo_mle" : 874
27
53
},
28
- "CodeQwen1.5-7B " : {
29
- "link" : " https://huggingface.co/Qwen/CodeQwen1.5-7B " ,
54
+ "Meta-Llama-3.1-70B " : {
55
+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B " ,
30
56
"open-data" : " None" ,
31
57
"pass@1" : {
32
58
"instruct" : null ,
33
- "complete" : 31.8
59
+ "complete" : 37.56
34
60
},
35
61
"prompted" : false ,
62
+ "size" : 70 ,
63
+ "direct_complete" : false ,
64
+ "lazy" : false ,
65
+ "elo_mle" : 874
66
+ },
67
+ "Mistral-7B-Instruct-v0.3" : {
68
+ "link" : " https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" ,
69
+ "open-data" : " None" ,
70
+ "pass@1" : {
71
+ "instruct" : null ,
72
+ "complete" : 43.33
73
+ },
74
+ "prompted" : true ,
36
75
"size" : 7 ,
37
- "direct_complete" : true ,
76
+ "direct_complete" : false ,
38
77
"lazy" : false ,
39
- "elo_mle" : 1056
78
+ "elo_mle" : 874
40
79
},
41
- "DeepSeek-Coder-33B-Base " : {
42
- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-base " ,
80
+ "Mixtral-8x7B-Instruct-v0.1 " : {
81
+ "link" : " https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1 " ,
43
82
"open-data" : " None" ,
44
83
"pass@1" : {
45
84
"instruct" : null ,
46
- "complete" : 33.5
85
+ "complete" : 42.96
47
86
},
48
- "prompted" : false ,
49
- "size" : 33 ,
50
- "direct_complete" : true ,
87
+ "prompted" : true ,
88
+ "size" : 7 ,
89
+ "direct_complete" : false ,
90
+ "lazy" : false ,
91
+ "elo_mle" : 874
92
+ },
93
+ "Codestral-22B-v0.1" : {
94
+ "link" : " https://huggingface.co/mistralai/Codestral-22B-v0.1" ,
95
+ "open-data" : " None" ,
96
+ "pass@1" : {
97
+ "instruct" : null ,
98
+ "complete" : 47.6
99
+ },
100
+ "prompted" : true ,
101
+ "size" : 22 ,
102
+ "direct_complete" : false ,
51
103
"lazy" : false ,
52
- "elo_mle" : 1064
104
+ "elo_mle" : 874
53
105
},
54
- "StarCoder2-15B " : {
55
- "link" : " https://huggingface.co/bigcode/starcoder2-15b " ,
56
- "open-data" : " Full " ,
106
+ "Phi-3-medium-128k-instruct " : {
107
+ "link" : " https://huggingface.co/microsoft/Phi-3-medium-128k-instruct " ,
108
+ "open-data" : " None " ,
57
109
"pass@1" : {
58
110
"instruct" : null ,
59
- "complete" : 28.2
111
+ "complete" : 48.03
60
112
},
61
- "prompted" : false ,
62
- "size" : 15 ,
63
- "direct_complete" : true ,
113
+ "prompted" : true ,
114
+ "size" : 14 ,
115
+ "direct_complete" : false ,
64
116
"lazy" : false ,
65
- "elo_mle" : 960
117
+ "elo_mle" : 874
66
118
},
67
- "DeepSeek-Coder-6.7B-Base " : {
68
- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base " ,
119
+ "Phi-3-mini-128k-instruct " : {
120
+ "link" : " https://huggingface.co/microsoft/Phi-3-mini-128k-instruct " ,
69
121
"open-data" : " None" ,
70
122
"pass@1" : {
71
123
"instruct" : null ,
72
- "complete" : 28.4
124
+ "complete" : 37.93
73
125
},
74
- "prompted" : false ,
75
- "size" : 6.7 ,
76
- "direct_complete" : true ,
126
+ "prompted" : true ,
127
+ "size" : 3.8 ,
128
+ "direct_complete" : false ,
77
129
"lazy" : false ,
78
- "elo_mle" : 1002
130
+ "elo_mle" : 874
79
131
},
80
- "DeepSeek-Coder-33B -Instruct" : {
81
- "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " ,
132
+ "Qwen2-57B-A14B -Instruct" : {
133
+ "link" : " https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct " ,
82
134
"open-data" : " None" ,
83
135
"pass@1" : {
84
136
"instruct" : null ,
85
- "complete" : 33.5
137
+ "complete" : 46.34
86
138
},
87
139
"prompted" : true ,
88
- "size" : 33 ,
140
+ "size" : 57 ,
89
141
"direct_complete" : false ,
90
142
"lazy" : false ,
91
- "elo_mle" : 1129
143
+ "elo_mle" : 874
92
144
},
93
- "Yi-1 .5-34B " : {
94
- "link" : " https://huggingface.co/01-ai/Yi-1 .5-34B " ,
145
+ "CodeQwen1 .5-7B-Chat " : {
146
+ "link" : " https://huggingface.co/Qwen/CodeQwen1 .5-7B-Chat " ,
95
147
"open-data" : " None" ,
96
148
"pass@1" : {
97
149
"instruct" : null ,
98
- "complete" : 34.9
150
+ "complete" : 49.82
99
151
},
100
- "prompted" : false ,
152
+ "prompted" : true ,
153
+ "size" : 7 ,
154
+ "direct_complete" : false ,
155
+ "lazy" : false ,
156
+ "elo_mle" : 874
157
+ },
158
+ "Yi-1.5-34B-Chat" : {
159
+ "link" : " https://huggingface.co/01-ai/Yi-1.5-34B-Chat" ,
160
+ "open-data" : " None" ,
161
+ "pass@1" : {
162
+ "instruct" : null ,
163
+ "complete" : 49.39
164
+ },
165
+ "prompted" : true ,
101
166
"size" : 34 ,
102
- "direct_complete" : true ,
167
+ "direct_complete" : false ,
168
+ "lazy" : false ,
169
+ "elo_mle" : 874
170
+ },
171
+ "Yi-1.5-9B-Chat" : {
172
+ "link" : " https://huggingface.co/01-ai/Yi-1.5-9B-Chat" ,
173
+ "open-data" : " None" ,
174
+ "pass@1" : {
175
+ "instruct" : null ,
176
+ "complete" : 47.23
177
+ },
178
+ "prompted" : true ,
179
+ "size" : 9 ,
180
+ "direct_complete" : false ,
181
+ "lazy" : false ,
182
+ "elo_mle" : 874
183
+ },
184
+ "DeepSeek-coder-7b-instruct-v1.5" : {
185
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5" ,
186
+ "open-data" : " None" ,
187
+ "pass@1" : {
188
+ "instruct" : null ,
189
+ "complete" : 41.21
190
+ },
191
+ "prompted" : true ,
192
+ "size" : 7 ,
193
+ "direct_complete" : false ,
103
194
"lazy" : false ,
104
- "elo_mle" : 978
195
+ "elo_mle" : 874
105
196
},
106
- "OpenCodeInterpreter-DS-33B " : {
107
- "link" : " https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-33B " ,
108
- "open-data" : " Partial " ,
197
+ "DeepSeek-coder-33b-instruct " : {
198
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct " ,
199
+ "open-data" : " None " ,
109
200
"pass@1" : {
110
201
"instruct" : null ,
111
- "complete" : 31.0
202
+ "complete" : 36.6
112
203
},
113
204
"prompted" : true ,
114
205
"size" : 33 ,
115
- "direct_complete" : true ,
206
+ "direct_complete" : false ,
116
207
"lazy" : false ,
117
- "elo_mle" : 1131
208
+ "elo_mle" : 874
118
209
},
119
- "To be updated " : {
120
- "link" : " " ,
210
+ "DeepSeek-moe-16b-chat " : {
211
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat " ,
121
212
"open-data" : " None" ,
122
213
"pass@1" : {
123
214
"instruct" : null ,
124
- "complete" : 0
215
+ "complete" : 31.01
125
216
},
126
- "prompted" : false ,
217
+ "prompted" : true ,
218
+ "size" : 16.4 ,
219
+ "direct_complete" : false ,
220
+ "lazy" : false ,
221
+ "elo_mle" : 874
222
+ },
223
+ "DeepSeek-Coder-V2-Lite-Instruct" : {
224
+ "link" : " https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" ,
225
+ "open-data" : " None" ,
226
+ "pass@1" : {
227
+ "instruct" : null ,
228
+ "complete" : 46.51
229
+ },
230
+ "prompted" : true ,
231
+ "size" : 16 ,
232
+ "direct_complete" : false ,
233
+ "lazy" : false ,
234
+ "elo_mle" : 874
235
+ },
236
+ "InternLM2-5-20b-chat" : {
237
+ "link" : " https://huggingface.co/internlm/internlm2_5-20b-chat" ,
238
+ "open-data" : " None" ,
239
+ "pass@1" : {
240
+ "instruct" : null ,
241
+ "complete" : 44.89
242
+ },
243
+ "prompted" : true ,
244
+ "size" : 20 ,
245
+ "direct_complete" : false ,
246
+ "lazy" : false ,
247
+ "elo_mle" : 874
248
+ },
249
+ "StarCoder2-15b-instruct-v0.1" : {
250
+ "link" : " https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1" ,
251
+ "open-data" : " None" ,
252
+ "pass@1" : {
253
+ "instruct" : null ,
254
+ "complete" : 47.94
255
+ },
256
+ "prompted" : true ,
127
257
"size" : 15 ,
128
- "direct_complete" : true ,
258
+ "direct_complete" : false ,
259
+ "lazy" : false ,
260
+ "elo_mle" : 874
261
+ },
262
+ "Claude-3-sonnet@20240229" : {
263
+ "link" : " " ,
264
+ "open-data" : " None" ,
265
+ "pass@1" : {
266
+ "instruct" : null ,
267
+ "complete" : 53.97
268
+ },
269
+ "prompted" : true ,
270
+ "size" : " None" ,
271
+ "direct_complete" : false ,
272
+ "lazy" : false ,
273
+ "elo_mle" : 874
274
+ },
275
+ "GPT-4o-2024-05-13" : {
276
+ "link" : " " ,
277
+ "open-data" : " None" ,
278
+ "pass@1" : {
279
+ "instruct" : null ,
280
+ "complete" : 67
281
+ },
282
+ "prompted" : true ,
283
+ "size" : " None" ,
284
+ "direct_complete" : false ,
285
+ "lazy" : false ,
286
+ "elo_mle" : 874
287
+ },
288
+ "GPT-3.5-turbo-0613" : {
289
+ "link" : " " ,
290
+ "open-data" : " None" ,
291
+ "pass@1" : {
292
+ "instruct" : null ,
293
+ "complete" : 51.7
294
+ },
295
+ "prompted" : true ,
296
+ "size" : " None" ,
297
+ "direct_complete" : false ,
129
298
"lazy" : false ,
130
- "elo_mle" : 960
299
+ "elo_mle" : 874
131
300
}
132
301
}
0 commit comments