Skip to content

Commit fa24213

Browse files
authored
[MemCpyOpt] Forward memcpy based on the actual copy memory location. (#87190)
Fixes #85560. We can forward `memcpy` as long as the actual memory location being copied have not been altered. alive2: https://alive2.llvm.org/ce/z/q9JaHV
1 parent 840e857 commit fa24213

File tree

4 files changed

+306
-26
lines changed

4 files changed

+306
-26
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

+74-25
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
1515
#include "llvm/ADT/DenseSet.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/ScopeExit.h"
1718
#include "llvm/ADT/SmallVector.h"
1819
#include "llvm/ADT/Statistic.h"
1920
#include "llvm/ADT/iterator_range.h"
@@ -1124,28 +1125,79 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
11241125
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11251126
MemCpyInst *MDep,
11261127
BatchAAResults &BAA) {
1127-
// We can only transforms memcpy's where the dest of one is the source of the
1128-
// other.
1129-
if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1130-
return false;
1131-
11321128
// If dep instruction is reading from our current input, then it is a noop
1133-
// transfer and substituting the input won't change this instruction. Just
1134-
// ignore the input and let someone else zap MDep. This handles cases like:
1129+
// transfer and substituting the input won't change this instruction. Just
1130+
// ignore the input and let someone else zap MDep. This handles cases like:
11351131
// memcpy(a <- a)
11361132
// memcpy(b <- a)
11371133
if (M->getSource() == MDep->getSource())
11381134
return false;
11391135

1140-
// Second, the length of the memcpy's must be the same, or the preceding one
1136+
// We can only optimize non-volatile memcpy's.
1137+
if (MDep->isVolatile())
1138+
return false;
1139+
1140+
int64_t MForwardOffset = 0;
1141+
const DataLayout &DL = M->getModule()->getDataLayout();
1142+
// We can only transforms memcpy's where the dest of one is the source of the
1143+
// other, or they have an offset in a range.
1144+
if (M->getSource() != MDep->getDest()) {
1145+
std::optional<int64_t> Offset =
1146+
M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1147+
if (!Offset || *Offset < 0)
1148+
return false;
1149+
MForwardOffset = *Offset;
1150+
}
1151+
1152+
// The length of the memcpy's must be the same, or the preceding one
11411153
// must be larger than the following one.
1142-
if (MDep->getLength() != M->getLength()) {
1154+
if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
11431155
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
11441156
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1145-
if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1157+
if (!MDepLen || !MLen ||
1158+
MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
11461159
return false;
11471160
}
11481161

1162+
IRBuilder<> Builder(M);
1163+
auto *CopySource = MDep->getSource();
1164+
Instruction *NewCopySource = nullptr;
1165+
auto CleanupOnRet = llvm::make_scope_exit([&NewCopySource] {
1166+
if (NewCopySource && NewCopySource->use_empty())
1167+
// Safety: It's safe here because we will only allocate more instructions
1168+
// after finishing all BatchAA queries, but we have to be careful if we
1169+
// want to do something like this in another place. Then we'd probably
1170+
// have to delay instruction removal until all transforms on an
1171+
// instruction finished.
1172+
NewCopySource->eraseFromParent();
1173+
});
1174+
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1175+
// We just need to calculate the actual size of the copy.
1176+
auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1177+
MemoryLocation::getForSource(M).Size);
1178+
1179+
// When the forwarding offset is greater than 0, we transform
1180+
// memcpy(d1 <- s1)
1181+
// memcpy(d2 <- d1+o)
1182+
// to
1183+
// memcpy(d2 <- s1+o)
1184+
if (MForwardOffset > 0) {
1185+
// The copy destination of `M` maybe can serve as the source of copying.
1186+
std::optional<int64_t> MDestOffset =
1187+
M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1188+
if (MDestOffset == MForwardOffset)
1189+
CopySource = M->getDest();
1190+
else {
1191+
NewCopySource = cast<Instruction>(Builder.CreateInBoundsPtrAdd(
1192+
CopySource, Builder.getInt64(MForwardOffset)));
1193+
CopySource = NewCopySource;
1194+
}
1195+
// We need to update `MCopyLoc` if an offset exists.
1196+
MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1197+
if (CopySourceAlign)
1198+
CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1199+
}
1200+
11491201
// Verify that the copied-from memory doesn't change in between the two
11501202
// transfers. For example, in:
11511203
// memcpy(a <- b)
@@ -1155,14 +1207,12 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11551207
//
11561208
// TODO: If the code between M and MDep is transparent to the destination "c",
11571209
// then we could still perform the xform by moving M up to the first memcpy.
1158-
// TODO: It would be sufficient to check the MDep source up to the memcpy
1159-
// size of M, rather than MDep.
1160-
if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1161-
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1210+
if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1211+
MSSA->getMemoryAccess(M)))
11621212
return false;
11631213

11641214
// No need to create `memcpy(a <- a)`.
1165-
if (BAA.isMustAlias(M->getDest(), MDep->getSource())) {
1215+
if (BAA.isMustAlias(M->getDest(), CopySource)) {
11661216
// Remove the instruction we're replacing.
11671217
eraseInstruction(M);
11681218
++NumMemCpyInstr;
@@ -1191,23 +1241,22 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11911241

11921242
// TODO: Is this worth it if we're creating a less aligned memcpy? For
11931243
// example we could be moving from movaps -> movq on x86.
1194-
IRBuilder<> Builder(M);
11951244
Instruction *NewM;
11961245
if (UseMemMove)
1197-
NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1198-
MDep->getRawSource(), MDep->getSourceAlign(),
1199-
M->getLength(), M->isVolatile());
1246+
NewM =
1247+
Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248+
CopySourceAlign, M->getLength(), M->isVolatile());
12001249
else if (isa<MemCpyInlineInst>(M)) {
12011250
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
12021251
// never allowed since that would allow the latter to be lowered as a call
12031252
// to an external function.
1204-
NewM = Builder.CreateMemCpyInline(
1205-
M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1206-
MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1253+
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1254+
CopySource, CopySourceAlign,
1255+
M->getLength(), M->isVolatile());
12071256
} else
1208-
NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1209-
MDep->getRawSource(), MDep->getSourceAlign(),
1210-
M->getLength(), M->isVolatile());
1257+
NewM =
1258+
Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1259+
CopySourceAlign, M->getLength(), M->isVolatile());
12111260
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12121261

12131262
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));

llvm/test/Transforms/MemCpyOpt/lifetime.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ define void @call_slot_lifetime_bitcast(ptr %ptr) {
124124
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 4 [[PTR:%.*]], i64 4, i1 false)
125125
; CHECK-NEXT: [[TMP1_CAST:%.*]] = bitcast ptr [[TMP1]] to ptr
126126
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1_CAST]])
127-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1_CAST]], ptr align 4 [[PTR]], i64 4, i1 false)
127+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[PTR]], i64 4, i1 false)
128128
; CHECK-NEXT: ret void
129129
;
130130
%tmp1 = alloca i32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s
3+
4+
%buf = type [9 x i8]
5+
6+
; We can forward `memcpy` because the copy location are the same,
7+
define void @forward_offset(ptr %src, ptr %dest) {
8+
; CHECK-LABEL: define void @forward_offset(
9+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
10+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
11+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
12+
; CHECK-NEXT: [[SRC_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
13+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
14+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
15+
; CHECK-NEXT: ret void
16+
;
17+
%cpy_tmp = alloca %buf, align 1
18+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
19+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
20+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
21+
ret void
22+
}
23+
24+
; We need to update the align value of the source of `memcpy` when forwarding.
25+
define void @forward_offset_align(ptr %src, ptr %dest) {
26+
; CHECK-LABEL: define void @forward_offset_align(
27+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
28+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
29+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
30+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
31+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
32+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
33+
; CHECK-NEXT: ret void
34+
;
35+
%cpy_tmp = alloca %buf, align 1
36+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
37+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 3
38+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
39+
ret void
40+
}
41+
42+
; We can change the align value to 2 when forwarding.
43+
define void @forward_offset_align_2(ptr %src, ptr %dest) {
44+
; CHECK-LABEL: define void @forward_offset_align_2(
45+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
46+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
47+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
48+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
49+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
50+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 2 [[TMP1]], i64 6, i1 false)
51+
; CHECK-NEXT: ret void
52+
;
53+
%cpy_tmp = alloca %buf, align 1
54+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
55+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 2
56+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
57+
ret void
58+
}
59+
60+
; If the copy destination can be used as the copy source, we don't need to create a GEP instruction.
61+
define void @forward_offset_without_gep(ptr %src) {
62+
; CHECK-LABEL: define void @forward_offset_without_gep(
63+
; CHECK-SAME: ptr [[SRC:%.*]]) {
64+
; CHECK-NEXT: [[TMP:%.*]] = alloca [9 x i8], align 1
65+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i64 7, i1 false)
66+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
67+
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
68+
; CHECK-NEXT: ret void
69+
;
70+
%cpy_tmp = alloca %buf, align 1
71+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
72+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
73+
%dest = getelementptr inbounds i8, ptr %src, i64 1
74+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
75+
ret void
76+
}
77+
78+
; We need to create a GEP instruction when forwarding.
79+
define void @forward_offset_with_gep(ptr %src) {
80+
; CHECK-LABEL: define void @forward_offset_with_gep(
81+
; CHECK-SAME: ptr [[SRC:%.*]]) {
82+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
83+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
84+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
85+
; CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
86+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
87+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
88+
; CHECK-NEXT: ret void
89+
;
90+
%cpy_tmp = alloca %buf, align 1
91+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
92+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
93+
%dest = getelementptr inbounds i8, ptr %src, i64 2
94+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
95+
ret void
96+
}
97+
98+
; Make sure we pass the right parameters when calling `memcpy`.
99+
define void @forward_offset_memcpy(ptr %src, ptr %dest) {
100+
; CHECK-LABEL: define void @forward_offset_memcpy(
101+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
102+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
103+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
104+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
105+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
106+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
107+
; CHECK-NEXT: call void @use(ptr [[DEST]])
108+
; CHECK-NEXT: ret void
109+
;
110+
%cpy_tmp = alloca %buf, align 1
111+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
112+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
113+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
114+
call void @use(ptr %dest)
115+
ret void
116+
}
117+
118+
; Make sure we pass the right parameters when calling `memcpy.inline`.
119+
define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
120+
; CHECK-LABEL: define void @forward_offset_memcpy_inline(
121+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
122+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
123+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
124+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
125+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
126+
; CHECK-NEXT: call void @use(ptr [[DEST]])
127+
; CHECK-NEXT: ret void
128+
;
129+
%cpy_tmp = alloca %buf, align 1
130+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
131+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
132+
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
133+
call void @use(ptr %dest)
134+
ret void
135+
}
136+
137+
; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
138+
define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
139+
; CHECK-LABEL: define void @do_not_forward_oversize_offset(
140+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
141+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
142+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
143+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
144+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
145+
; CHECK-NEXT: ret void
146+
;
147+
%cpy_tmp = alloca %buf, align 1
148+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 6, i1 false)
149+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
150+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
151+
ret void
152+
}
153+
154+
; We can forward `memcpy` because the write operation does not corrupt the location to be copied.
155+
define void @forward_offset_and_store(ptr %src, ptr %dest) {
156+
; CHECK-LABEL: define void @forward_offset_and_store(
157+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
158+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
159+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
160+
; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1
161+
; CHECK-NEXT: [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
162+
; CHECK-NEXT: store i8 1, ptr [[DEP_SRC_END]], align 1
163+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
164+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
165+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
166+
; CHECK-NEXT: ret void
167+
;
168+
%cpy_tmp = alloca %buf, align 1
169+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
170+
store i8 1, ptr %src, align 1
171+
%src_end = getelementptr inbounds i8, ptr %src, i64 6
172+
store i8 1, ptr %src_end, align 1
173+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
174+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
175+
ret void
176+
}
177+
178+
; We cannot forward `memcpy` because the write operation alters the location to be copied.
179+
; Also, make sure we have removed the GEP instruction that was created temporarily.
180+
define void @do_not_forward_offset_and_store(ptr %src, ptr %dest) {
181+
; CHECK-LABEL: define void @do_not_forward_offset_and_store(
182+
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
183+
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
184+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
185+
; CHECK-NEXT: [[DEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
186+
; CHECK-NEXT: store i8 1, ptr [[DEP]], align 1
187+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
188+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 5, i1 false)
189+
; CHECK-NEXT: ret void
190+
;
191+
%cpy_tmp = alloca %buf, align 1
192+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
193+
%src_offset = getelementptr inbounds i8, ptr %src, i64 1
194+
store i8 1, ptr %src_offset, align 1
195+
%cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
196+
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
197+
ret void
198+
}
199+
200+
declare void @use(ptr)
201+
202+
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
203+
declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes=memcpyopt,instcombine -S -verify-memoryssa | FileCheck --check-prefix=CUSTOM %s
3+
; RUN: opt < %s -O2 -S | FileCheck --check-prefix=O2 %s
4+
5+
; Check that we eliminate all `memcpy` calls in this function.
6+
define void @memcpy_forward_back_with_offset(ptr %arg) {
7+
; CUSTOM-LABEL: define void @memcpy_forward_back_with_offset(
8+
; CUSTOM-SAME: ptr [[ARG:%.*]]) {
9+
; CUSTOM-NEXT: store i8 1, ptr [[ARG]], align 1
10+
; CUSTOM-NEXT: ret void
11+
;
12+
; O2-LABEL: define void @memcpy_forward_back_with_offset(
13+
; O2-SAME: ptr nocapture writeonly [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
14+
; O2-NEXT: store i8 1, ptr [[ARG]], align 1
15+
; O2-NEXT: ret void
16+
;
17+
%i = alloca [753 x i8], align 1
18+
%i1 = alloca [754 x i8], align 1
19+
call void @llvm.memcpy.p0.p0.i64(ptr %i1, ptr %arg, i64 754, i1 false)
20+
%i2 = getelementptr inbounds i8, ptr %i1, i64 1
21+
call void @llvm.memcpy.p0.p0.i64(ptr %i, ptr %i2, i64 753, i1 false)
22+
store i8 1, ptr %arg, align 1
23+
%i3 = getelementptr inbounds i8, ptr %arg, i64 1
24+
call void @llvm.memcpy.p0.p0.i64(ptr %i3, ptr %i, i64 753, i1 false)
25+
ret void
26+
}
27+
28+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

0 commit comments

Comments
 (0)