Skip to content

Commit 9486a41

Browse files
fix[codegen]: fix overcopying of bytes in make_setter (#4419)
in `_complex_make_setter`, when a dynamic type (e.g. `Bytes` or `DynArray`) is contained within a tuple, the current code generation copies the entire buffer without checking how large it is. note this represents a gas issue since more bytes might be copied than is necessary, but not a correctness issue. in comparison, the `make_setter` implementations for `Bytes` and `DynArray` limit the copy length to the runtime sizes of the `Bytes`/`DynArray`. this commit disables the full buffer copy and adds a heuristic to the `make_setter` implementations for `DynArray` and `Bytes` so that in certain cases, they copy the full buffer instead of checking the length.
1 parent 2d515d3 commit 9486a41

File tree

2 files changed

+81
-1
lines changed

2 files changed

+81
-1
lines changed

tests/unit/compiler/venom/test_memmerging.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,21 @@ def test_memmerging_double_use():
920920
_check_pre_post(pre, post)
921921

922922

923+
def test_existing_mcopy_overlap_nochange():
924+
"""
925+
Check that mcopy which already contains an overlap does not get optimized
926+
"""
927+
if not version_check(begin="cancun"):
928+
return
929+
930+
pre = """
931+
_global:
932+
mcopy 32, 33, 2
933+
return %1
934+
"""
935+
_check_no_change(pre)
936+
937+
923938
@pytest.mark.parametrize("load_opcode,copy_opcode", LOAD_COPY)
924939
def test_memmerging_load(load_opcode, copy_opcode):
925940
"""

vyper/codegen/core.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,61 @@ def make_byte_array_copier(dst, src):
188188
# batch copy the bytearray (including length word) using copy_bytes
189189
len_ = add_ofst(get_bytearray_length(src), 32)
190190
max_bytes = src.typ.maxlen + 32
191+
192+
if _prefer_copy_maxbound_heuristic(dst, src, item_size=1):
193+
len_ = max_bytes
194+
195+
# batch copy the entire dynarray, including length word
191196
ret = copy_bytes(dst, src, len_, max_bytes)
192197
return b1.resolve(ret)
193198

194199

200+
# heuristic to choose
201+
def _prefer_copy_maxbound_heuristic(dst, src, item_size):
202+
if dst.location != MEMORY:
203+
return False
204+
205+
# a heuristic - it's cheaper to just copy the extra buffer bytes
206+
# than calculate the number of bytes
207+
# copy(dst, src, 32 + itemsize*load(src))
208+
# =>
209+
# copy(dst, src, bound)
210+
# (32 + itemsize*(load(src))) costs 4*3 + 8 - 3 gas over just `bound`
211+
length_calc_cost = 4 * 3 - 3
212+
length_calc_cost += 8 * (item_size != 1) # PUSH MUL
213+
214+
# NOTE: there is an opportunity for more optimization if this
215+
# is one in a sequence of copies, since doing copy(dst, src, maxbound)
216+
# allows us to fuse copies together, further saving gas (each copy
217+
# costs at least 15 gas).
218+
219+
if _opt_codesize():
220+
# if we are optimizing for codesize, we are ok with a higher
221+
# gas cost before switching to copy(dst, src, <precise length>).
222+
# +45 is based on vibes -- it says we are willing to burn 45
223+
# gas (additional 15 words in the copy operation) at runtime to
224+
# save these 5-8 bytes (depending on if itemsize is 1 or not)
225+
# (DUP<src> MLOAD PUSH1 ITEMSIZE MUL PUSH1 32 ADD)
226+
length_calc_cost += 45
227+
228+
src_bound = src.typ.memory_bytes_required
229+
# 3 gas per word, minus the cost of the length word
230+
# (since it is always copied, we don't include it in the marginal
231+
# cost difference)
232+
copy_cost = ceil32(src_bound - 32) * 3 // 32
233+
if src.location in (CALLDATA, MEMORY) and copy_cost <= length_calc_cost:
234+
return True
235+
# threshold is 6 words of data (+ 1 length word that we need to copy anyway)
236+
# dload(src) costs additional 14-20 gas depending on if `src` is a literal
237+
# or not.
238+
# (dload(src) expands to `codecopy(0, add(CODE_END, src), 32); mload(0)`,
239+
# and we have already accounted for an `mload(ptr)`).
240+
# for simplicity, skip the 14 case.
241+
if src.location == DATA and copy_cost <= (20 + length_calc_cost):
242+
return True
243+
return False
244+
245+
195246
def bytes_data_ptr(ptr):
196247
if ptr.location is None: # pragma: nocover
197248
raise CompilerPanic("tried to modify non-pointer type")
@@ -287,6 +338,9 @@ def _dynarray_make_setter(dst, src, hi=None):
287338
n_bytes = add_ofst(_mul(count, element_size), 32)
288339
max_bytes = 32 + src.typ.count * element_size
289340

341+
if _prefer_copy_maxbound_heuristic(dst, src, element_size):
342+
n_bytes = max_bytes
343+
290344
# batch copy the entire dynarray, including length word
291345
ret.append(copy_bytes(dst, src, n_bytes, max_bytes))
292346

@@ -1049,7 +1103,18 @@ def _complex_make_setter(left, right, hi=None):
10491103
assert is_tuple_like(left.typ)
10501104
keys = left.typ.tuple_keys()
10511105

1052-
if left.is_pointer and right.is_pointer and right.encoding == Encoding.VYPER:
1106+
# performance: if there is any dynamic data, there might be
1107+
# unused space between the end of the dynarray and the end of the buffer.
1108+
# for instance DynArray[uint256, 100] with runtime length of 5.
1109+
# in these cases, we recurse to dynarray make_setter which has its own
1110+
# heuristic for when to copy all data.
1111+
1112+
# use abi_type.is_dynamic since it is identical to the query "do any children
1113+
# have dynamic size"
1114+
has_dynamic_data = right.typ.abi_type.is_dynamic()
1115+
simple_encoding = right.encoding == Encoding.VYPER
1116+
1117+
if left.is_pointer and right.is_pointer and simple_encoding and not has_dynamic_data:
10531118
# both left and right are pointers, see if we want to batch copy
10541119
# instead of unrolling the loop.
10551120
assert left.encoding == Encoding.VYPER

0 commit comments

Comments
 (0)