@@ -188,10 +188,61 @@ def make_byte_array_copier(dst, src):
188
188
# batch copy the bytearray (including length word) using copy_bytes
189
189
len_ = add_ofst (get_bytearray_length (src ), 32 )
190
190
max_bytes = src .typ .maxlen + 32
191
+
192
+ if _prefer_copy_maxbound_heuristic (dst , src , item_size = 1 ):
193
+ len_ = max_bytes
194
+
195
+ # batch copy the entire dynarray, including length word
191
196
ret = copy_bytes (dst , src , len_ , max_bytes )
192
197
return b1 .resolve (ret )
193
198
194
199
200
+ # heuristic to choose
201
+ def _prefer_copy_maxbound_heuristic (dst , src , item_size ):
202
+ if dst .location != MEMORY :
203
+ return False
204
+
205
+ # a heuristic - it's cheaper to just copy the extra buffer bytes
206
+ # than calculate the number of bytes
207
+ # copy(dst, src, 32 + itemsize*load(src))
208
+ # =>
209
+ # copy(dst, src, bound)
210
+ # (32 + itemsize*(load(src))) costs 4*3 + 8 - 3 gas over just `bound`
211
+ length_calc_cost = 4 * 3 - 3
212
+ length_calc_cost += 8 * (item_size != 1 ) # PUSH MUL
213
+
214
+ # NOTE: there is an opportunity for more optimization if this
215
+ # is one in a sequence of copies, since doing copy(dst, src, maxbound)
216
+ # allows us to fuse copies together, further saving gas (each copy
217
+ # costs at least 15 gas).
218
+
219
+ if _opt_codesize ():
220
+ # if we are optimizing for codesize, we are ok with a higher
221
+ # gas cost before switching to copy(dst, src, <precise length>).
222
+ # +45 is based on vibes -- it says we are willing to burn 45
223
+ # gas (additional 15 words in the copy operation) at runtime to
224
+ # save these 5-8 bytes (depending on if itemsize is 1 or not)
225
+ # (DUP<src> MLOAD PUSH1 ITEMSIZE MUL PUSH1 32 ADD)
226
+ length_calc_cost += 45
227
+
228
+ src_bound = src .typ .memory_bytes_required
229
+ # 3 gas per word, minus the cost of the length word
230
+ # (since it is always copied, we don't include it in the marginal
231
+ # cost difference)
232
+ copy_cost = ceil32 (src_bound - 32 ) * 3 // 32
233
+ if src .location in (CALLDATA , MEMORY ) and copy_cost <= length_calc_cost :
234
+ return True
235
+ # threshold is 6 words of data (+ 1 length word that we need to copy anyway)
236
+ # dload(src) costs additional 14-20 gas depending on if `src` is a literal
237
+ # or not.
238
+ # (dload(src) expands to `codecopy(0, add(CODE_END, src), 32); mload(0)`,
239
+ # and we have already accounted for an `mload(ptr)`).
240
+ # for simplicity, skip the 14 case.
241
+ if src .location == DATA and copy_cost <= (20 + length_calc_cost ):
242
+ return True
243
+ return False
244
+
245
+
195
246
def bytes_data_ptr (ptr ):
196
247
if ptr .location is None : # pragma: nocover
197
248
raise CompilerPanic ("tried to modify non-pointer type" )
@@ -287,6 +338,9 @@ def _dynarray_make_setter(dst, src, hi=None):
287
338
n_bytes = add_ofst (_mul (count , element_size ), 32 )
288
339
max_bytes = 32 + src .typ .count * element_size
289
340
341
+ if _prefer_copy_maxbound_heuristic (dst , src , element_size ):
342
+ n_bytes = max_bytes
343
+
290
344
# batch copy the entire dynarray, including length word
291
345
ret .append (copy_bytes (dst , src , n_bytes , max_bytes ))
292
346
@@ -1049,7 +1103,18 @@ def _complex_make_setter(left, right, hi=None):
1049
1103
assert is_tuple_like (left .typ )
1050
1104
keys = left .typ .tuple_keys ()
1051
1105
1052
- if left .is_pointer and right .is_pointer and right .encoding == Encoding .VYPER :
1106
+ # performance: if there is any dynamic data, there might be
1107
+ # unused space between the end of the dynarray and the end of the buffer.
1108
+ # for instance DynArray[uint256, 100] with runtime length of 5.
1109
+ # in these cases, we recurse to dynarray make_setter which has its own
1110
+ # heuristic for when to copy all data.
1111
+
1112
+ # use abi_type.is_dynamic since it is identical to the query "do any children
1113
+ # have dynamic size"
1114
+ has_dynamic_data = right .typ .abi_type .is_dynamic ()
1115
+ simple_encoding = right .encoding == Encoding .VYPER
1116
+
1117
+ if left .is_pointer and right .is_pointer and simple_encoding and not has_dynamic_data :
1053
1118
# both left and right are pointers, see if we want to batch copy
1054
1119
# instead of unrolling the loop.
1055
1120
assert left .encoding == Encoding .VYPER
0 commit comments