-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLD][COFF] Add basic ARM64X dynamic relocations support #118035
Conversation
@llvm/pr-subscribers-platform-windows @llvm/pr-subscribers-lld-coff Author: Jacek Caban (cjacek) ChangesThis modifies the machine field in the hybrid view to be AMD64, aligning it with expectations from ARM64EC modules. While this provides initial support, additional relocations will be necessary for full functionality. Many of these cases depend on implementing separate namespace support first. Full diff: https://github.com/llvm/llvm-project/pull/118035.diff 5 Files Affected:
diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
index 059d4aeddc6e56..5d89e97a7f7761 100644
--- a/lld/COFF/COFFLinkerContext.h
+++ b/lld/COFF/COFFLinkerContext.h
@@ -88,6 +88,8 @@ class COFFLinkerContext : public CommonLinkerContext {
Timer diskCommitTimer;
Configuration config;
+
+ DynamicRelocsChunk *dynamicRelocs = nullptr;
};
} // namespace lld::coff
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index a82e37ff052303..a4bd6df6c0ec0d 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -25,6 +25,7 @@
using namespace llvm;
using namespace llvm::object;
+using namespace llvm::support;
using namespace llvm::support::endian;
using namespace llvm::COFF;
using llvm::support::ulittle32_t;
@@ -1147,4 +1148,85 @@ uint32_t ImportThunkChunkARM64EC::extendRanges() {
return sizeof(arm64Thunk) - sizeof(uint32_t);
}
+size_t Arm64XDynamicRelocEntry::getSize() const {
+ switch (type) {
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
+ return sizeof(uint16_t) + size; // A header and a payload.
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void Arm64XDynamicRelocEntry::writeTo(uint8_t *buf) const {
+ auto out = reinterpret_cast<ulittle16_t *>(buf);
+ *out = (offset & 0xfff) | (type << 12);
+
+ switch (type) {
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
+ *out |= ((bit_width(size) - 1) << 14); // Encode the size.
+ switch (size) {
+ case 2:
+ out[1] = value;
+ break;
+ case 4:
+ *reinterpret_cast<ulittle32_t *>(out + 1) = value;
+ break;
+ case 8:
+ *reinterpret_cast<ulittle64_t *>(out + 1) = value;
+ break;
+ default:
+ llvm_unreachable("invalid size");
+ }
+ break;
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void DynamicRelocsChunk::finalize() {
+ llvm::stable_sort(arm64xRelocs, [=](const Arm64XDynamicRelocEntry &a,
+ const Arm64XDynamicRelocEntry &b) {
+ return a.offset < b.offset;
+ });
+
+ size = sizeof(coff_dynamic_reloc_table) + sizeof(coff_dynamic_relocation64) +
+ sizeof(coff_base_reloc_block_header);
+
+ for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
+ assert(!(entry.offset & ~0xfff)); // Not yet supported.
+ size += entry.getSize();
+ }
+
+ size = alignTo(size, sizeof(uint32_t));
+}
+
+void DynamicRelocsChunk::writeTo(uint8_t *buf) const {
+ auto table = reinterpret_cast<coff_dynamic_reloc_table *>(buf);
+ table->Version = 1;
+ table->Size = sizeof(coff_dynamic_relocation64);
+ buf += sizeof(*table);
+
+ auto header = reinterpret_cast<coff_dynamic_relocation64 *>(buf);
+ header->Symbol = IMAGE_DYNAMIC_RELOCATION_ARM64X;
+ buf += sizeof(*header);
+
+ auto pageHeader = reinterpret_cast<coff_base_reloc_block_header *>(buf);
+ pageHeader->BlockSize = sizeof(*pageHeader);
+ size_t relocSize = sizeof(*pageHeader);
+ for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
+ entry.writeTo(buf + relocSize);
+ size_t entrySize = entry.getSize();
+ pageHeader->BlockSize += entrySize;
+ relocSize += entrySize;
+ }
+ pageHeader->BlockSize = alignTo(pageHeader->BlockSize, sizeof(uint32_t));
+ relocSize = alignTo(relocSize, sizeof(uint32_t));
+
+ header->BaseRelocSize = relocSize;
+ table->Size += relocSize;
+ assert(size == sizeof(*table) + sizeof(*header) + relocSize);
+}
+
} // namespace lld::coff
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 42284f485e5c07..1c219f2365f669 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -835,6 +835,43 @@ class ECExportThunkChunk : public NonSectionCodeChunk {
Defined *target;
};
+// ARM64X entry for dynamic relocations.
+class Arm64XDynamicRelocEntry {
+public:
+ Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type, uint8_t size,
+ uint32_t offset, uint64_t value)
+ : offset(offset), value(value), type(type), size(size) {}
+
+ size_t getSize() const;
+ void writeTo(uint8_t *buf) const;
+
+ uint32_t offset;
+ uint64_t value;
+
+private:
+ llvm::COFF::Arm64XFixupType type;
+ uint8_t size;
+};
+
+// Dynamic relocation chunk containing ARM64X relocations for the hybrid image.
+class DynamicRelocsChunk : public NonSectionChunk {
+public:
+ DynamicRelocsChunk() {}
+ size_t getSize() const override { return size; }
+ void writeTo(uint8_t *buf) const override;
+ void finalize();
+
+ uint32_t add(llvm::COFF::Arm64XFixupType type, uint8_t size, uint32_t offset,
+ uint64_t value) {
+ arm64xRelocs.emplace_back(type, size, offset, value);
+ return arm64xRelocs.size() - 1;
+ }
+
+private:
+ std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
+ size_t size;
+};
+
// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index d3e326378ed2d4..3226b3a050ad21 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -272,12 +272,12 @@ class Writer {
OutputSection *findSection(StringRef name);
void addBaserels();
void addBaserelBlocks(std::vector<Baserel> &v);
+ void createDynamicRelocs();
uint32_t getSizeOfInitializedData();
void prepareLoadConfig();
template <typename T> void prepareLoadConfig(T *loadConfig);
- template <typename T> void checkLoadConfigGuardData(const T *loadConfig);
std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
@@ -754,6 +754,8 @@ void Writer::run() {
llvm::TimeTraceScope timeScope("Write PE");
ScopedTimer t1(ctx.codeLayoutTimer);
+ if (ctx.config.machine == ARM64X)
+ ctx.dynamicRelocs = make<DynamicRelocsChunk>();
createImportTables();
createSections();
appendImportThunks();
@@ -764,6 +766,7 @@ void Writer::run() {
mergeSections();
sortECChunks();
appendECImportTables();
+ createDynamicRelocs();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
@@ -1596,8 +1599,13 @@ void Writer::assignAddresses() {
for (OutputSection *sec : ctx.outputSections) {
llvm::TimeTraceScope timeScope("Section: ", sec->name);
- if (sec == relocSec)
+ if (sec == relocSec) {
addBaserels();
+ if (ctx.dynamicRelocs) {
+ ctx.dynamicRelocs->finalize();
+ relocSec->addChunk(ctx.dynamicRelocs);
+ }
+ }
uint64_t rawSize = 0, virtualSize = 0;
sec->header.VirtualAddress = rva;
@@ -1798,9 +1806,12 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
exceptionTable.last->getSize() -
exceptionTable.first->getRVA();
}
- if (relocSec->getVirtualSize()) {
+ size_t relocSize = relocSec->getVirtualSize();
+ if (ctx.dynamicRelocs)
+ relocSize -= ctx.dynamicRelocs->getSize();
+ if (relocSize) {
dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
- dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize();
+ dir[BASE_RELOCATION_TABLE].Size = relocSize;
}
if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) {
if (Defined *b = dyn_cast<Defined>(sym)) {
@@ -2555,6 +2566,33 @@ void Writer::addBaserelBlocks(std::vector<Baserel> &v) {
relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
}
+void Writer::createDynamicRelocs() {
+ if (!ctx.dynamicRelocs)
+ return;
+
+ const uint32_t coffHeaderOffset = dosStubSize + sizeof(PEMagic);
+ const uint32_t peHeaderOffset = coffHeaderOffset + sizeof(coff_file_header);
+ const uint32_t dataDirOffset = peHeaderOffset + sizeof(pe32plus_header);
+
+ // Adjust the Machine field in the COFF header to AMD64.
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint16_t),
+ coffHeaderOffset + offsetof(coff_file_header, Machine),
+ AMD64);
+
+ // Adjust the load config directory.
+ // FIXME: Use the hybrid load config value instead.
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+ dataDirOffset +
+ LOAD_CONFIG_TABLE * sizeof(data_directory) +
+ offsetof(data_directory, RelativeVirtualAddress),
+ 0);
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+ dataDirOffset +
+ LOAD_CONFIG_TABLE * sizeof(data_directory) +
+ offsetof(data_directory, Size),
+ 0);
+}
+
PartialSection *Writer::createPartialSection(StringRef name,
uint32_t outChars) {
PartialSection *&pSec = partialSections[{name, outChars}];
@@ -2634,11 +2672,6 @@ template <typename T> void Writer::prepareLoadConfig(T *loadConfig) {
if (ctx.config.dependentLoadFlags)
loadConfig->DependentLoadFlags = ctx.config.dependentLoadFlags;
- checkLoadConfigGuardData(loadConfig);
-}
-
-template <typename T>
-void Writer::checkLoadConfigGuardData(const T *loadConfig) {
size_t loadConfigSize = loadConfig->Size;
#define RETURN_IF_NOT_CONTAINS(field) \
@@ -2660,6 +2693,16 @@ void Writer::checkLoadConfigGuardData(const T *loadConfig) {
if (loadConfig->field != s->getVA()) \
warn(#field " not set correctly in '_load_config_used'");
+ if (ctx.dynamicRelocs) {
+ IF_CONTAINS(DynamicValueRelocTableSection) {
+ loadConfig->DynamicValueRelocTableSection = relocSec->sectionIndex;
+ loadConfig->DynamicValueRelocTableOffset =
+ ctx.dynamicRelocs->getRVA() - relocSec->getRVA();
+ } else {
+ warn("'_load_config_used' structure too small to include dynamic relocations");
+ }
+ }
+
if (ctx.config.guardCF == GuardCFLevel::Off)
return;
RETURN_IF_NOT_CONTAINS(GuardFlags)
diff --git a/lld/test/COFF/arm64x-loadconfig.s b/lld/test/COFF/arm64x-loadconfig.s
new file mode 100644
index 00000000000000..53f42b8f6e680d
--- /dev/null
+++ b/lld/test/COFF/arm64x-loadconfig.s
@@ -0,0 +1,68 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows test.s -o test.obj
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig.s -o loadconfig.obj
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig-short.s -o loadconfig-short.obj
+
+// RUN: lld-link -machine:arm64x -out:out.dll -dll -noentry loadconfig.obj test.obj
+
+// RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=DYNRELOCS %s
+// DYNRELOCS: DynamicValueRelocTableOffset: 0xC
+// DYNRELOCS-NEXT: DynamicValueRelocTableSection: 4
+// DYNRELOCS: DynamicRelocations [
+// DYNRELOCS-NEXT: Version: 0x1
+// DYNRELOCS-NEXT: Arm64X [
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x7C
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x2
+// DYNRELOCS-NEXT: Value: 0x8664
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x150
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x4
+// DYNRELOCS-NEXT: Value: 0x0
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x154
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x4
+// DYNRELOCS-NEXT: Value: 0x0
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: ]
+
+// RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s
+// HEADERS: BaseRelocationTableRVA: 0x4000
+// HEADERS-NEXT: BaseRelocationTableSize: 0xC
+// HEADERS: LoadConfigTableRVA: 0x1000
+// HEADERS-NEXT: LoadConfigTableSize: 0x140
+// HEADERS: Name: .reloc (2E 72 65 6C 6F 63 00 00)
+// HEADERS-NEXT: VirtualSize: 0x38
+
+// RUN: lld-link -machine:arm64x -out:out-short.dll -dll -noentry loadconfig-short.obj 2>&1 | FileCheck --check-prefix=WARN-RELOC-SIZE %s
+// WARN-RELOC-SIZE: lld-link: warning: '_load_config_used' structure too small to include dynamic relocations
+
+#--- test.s
+ .data
+sym:
+ // Emit a basereloc to make the loadconfig test more meaningful.
+ .xword sym
+
+#--- loadconfig.s
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0x140
+ .fill 0x13c,1,0
+
+#--- loadconfig-short.s
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0xe4
+ .fill 0xe0,1,0
|
@llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesThis modifies the machine field in the hybrid view to be AMD64, aligning it with expectations from ARM64EC modules. While this provides initial support, additional relocations will be necessary for full functionality. Many of these cases depend on implementing separate namespace support first. Full diff: https://github.com/llvm/llvm-project/pull/118035.diff 5 Files Affected:
diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
index 059d4aeddc6e56..5d89e97a7f7761 100644
--- a/lld/COFF/COFFLinkerContext.h
+++ b/lld/COFF/COFFLinkerContext.h
@@ -88,6 +88,8 @@ class COFFLinkerContext : public CommonLinkerContext {
Timer diskCommitTimer;
Configuration config;
+
+ DynamicRelocsChunk *dynamicRelocs = nullptr;
};
} // namespace lld::coff
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index a82e37ff052303..a4bd6df6c0ec0d 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -25,6 +25,7 @@
using namespace llvm;
using namespace llvm::object;
+using namespace llvm::support;
using namespace llvm::support::endian;
using namespace llvm::COFF;
using llvm::support::ulittle32_t;
@@ -1147,4 +1148,85 @@ uint32_t ImportThunkChunkARM64EC::extendRanges() {
return sizeof(arm64Thunk) - sizeof(uint32_t);
}
+size_t Arm64XDynamicRelocEntry::getSize() const {
+ switch (type) {
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
+ return sizeof(uint16_t) + size; // A header and a payload.
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void Arm64XDynamicRelocEntry::writeTo(uint8_t *buf) const {
+ auto out = reinterpret_cast<ulittle16_t *>(buf);
+ *out = (offset & 0xfff) | (type << 12);
+
+ switch (type) {
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE:
+ *out |= ((bit_width(size) - 1) << 14); // Encode the size.
+ switch (size) {
+ case 2:
+ out[1] = value;
+ break;
+ case 4:
+ *reinterpret_cast<ulittle32_t *>(out + 1) = value;
+ break;
+ case 8:
+ *reinterpret_cast<ulittle64_t *>(out + 1) = value;
+ break;
+ default:
+ llvm_unreachable("invalid size");
+ }
+ break;
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA:
+ case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL:
+ llvm_unreachable("unsupported type");
+ }
+}
+
+void DynamicRelocsChunk::finalize() {
+ llvm::stable_sort(arm64xRelocs, [=](const Arm64XDynamicRelocEntry &a,
+ const Arm64XDynamicRelocEntry &b) {
+ return a.offset < b.offset;
+ });
+
+ size = sizeof(coff_dynamic_reloc_table) + sizeof(coff_dynamic_relocation64) +
+ sizeof(coff_base_reloc_block_header);
+
+ for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
+ assert(!(entry.offset & ~0xfff)); // Not yet supported.
+ size += entry.getSize();
+ }
+
+ size = alignTo(size, sizeof(uint32_t));
+}
+
+void DynamicRelocsChunk::writeTo(uint8_t *buf) const {
+ auto table = reinterpret_cast<coff_dynamic_reloc_table *>(buf);
+ table->Version = 1;
+ table->Size = sizeof(coff_dynamic_relocation64);
+ buf += sizeof(*table);
+
+ auto header = reinterpret_cast<coff_dynamic_relocation64 *>(buf);
+ header->Symbol = IMAGE_DYNAMIC_RELOCATION_ARM64X;
+ buf += sizeof(*header);
+
+ auto pageHeader = reinterpret_cast<coff_base_reloc_block_header *>(buf);
+ pageHeader->BlockSize = sizeof(*pageHeader);
+ size_t relocSize = sizeof(*pageHeader);
+ for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) {
+ entry.writeTo(buf + relocSize);
+ size_t entrySize = entry.getSize();
+ pageHeader->BlockSize += entrySize;
+ relocSize += entrySize;
+ }
+ pageHeader->BlockSize = alignTo(pageHeader->BlockSize, sizeof(uint32_t));
+ relocSize = alignTo(relocSize, sizeof(uint32_t));
+
+ header->BaseRelocSize = relocSize;
+ table->Size += relocSize;
+ assert(size == sizeof(*table) + sizeof(*header) + relocSize);
+}
+
} // namespace lld::coff
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 42284f485e5c07..1c219f2365f669 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -835,6 +835,43 @@ class ECExportThunkChunk : public NonSectionCodeChunk {
Defined *target;
};
+// ARM64X entry for dynamic relocations.
+class Arm64XDynamicRelocEntry {
+public:
+ Arm64XDynamicRelocEntry(llvm::COFF::Arm64XFixupType type, uint8_t size,
+ uint32_t offset, uint64_t value)
+ : offset(offset), value(value), type(type), size(size) {}
+
+ size_t getSize() const;
+ void writeTo(uint8_t *buf) const;
+
+ uint32_t offset;
+ uint64_t value;
+
+private:
+ llvm::COFF::Arm64XFixupType type;
+ uint8_t size;
+};
+
+// Dynamic relocation chunk containing ARM64X relocations for the hybrid image.
+class DynamicRelocsChunk : public NonSectionChunk {
+public:
+ DynamicRelocsChunk() {}
+ size_t getSize() const override { return size; }
+ void writeTo(uint8_t *buf) const override;
+ void finalize();
+
+ uint32_t add(llvm::COFF::Arm64XFixupType type, uint8_t size, uint32_t offset,
+ uint64_t value) {
+ arm64xRelocs.emplace_back(type, size, offset, value);
+ return arm64xRelocs.size() - 1;
+ }
+
+private:
+ std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
+ size_t size;
+};
+
// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index d3e326378ed2d4..3226b3a050ad21 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -272,12 +272,12 @@ class Writer {
OutputSection *findSection(StringRef name);
void addBaserels();
void addBaserelBlocks(std::vector<Baserel> &v);
+ void createDynamicRelocs();
uint32_t getSizeOfInitializedData();
void prepareLoadConfig();
template <typename T> void prepareLoadConfig(T *loadConfig);
- template <typename T> void checkLoadConfigGuardData(const T *loadConfig);
std::unique_ptr<FileOutputBuffer> &buffer;
std::map<PartialSectionKey, PartialSection *> partialSections;
@@ -754,6 +754,8 @@ void Writer::run() {
llvm::TimeTraceScope timeScope("Write PE");
ScopedTimer t1(ctx.codeLayoutTimer);
+ if (ctx.config.machine == ARM64X)
+ ctx.dynamicRelocs = make<DynamicRelocsChunk>();
createImportTables();
createSections();
appendImportThunks();
@@ -764,6 +766,7 @@ void Writer::run() {
mergeSections();
sortECChunks();
appendECImportTables();
+ createDynamicRelocs();
removeUnusedSections();
finalizeAddresses();
removeEmptySections();
@@ -1596,8 +1599,13 @@ void Writer::assignAddresses() {
for (OutputSection *sec : ctx.outputSections) {
llvm::TimeTraceScope timeScope("Section: ", sec->name);
- if (sec == relocSec)
+ if (sec == relocSec) {
addBaserels();
+ if (ctx.dynamicRelocs) {
+ ctx.dynamicRelocs->finalize();
+ relocSec->addChunk(ctx.dynamicRelocs);
+ }
+ }
uint64_t rawSize = 0, virtualSize = 0;
sec->header.VirtualAddress = rva;
@@ -1798,9 +1806,12 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
exceptionTable.last->getSize() -
exceptionTable.first->getRVA();
}
- if (relocSec->getVirtualSize()) {
+ size_t relocSize = relocSec->getVirtualSize();
+ if (ctx.dynamicRelocs)
+ relocSize -= ctx.dynamicRelocs->getSize();
+ if (relocSize) {
dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
- dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize();
+ dir[BASE_RELOCATION_TABLE].Size = relocSize;
}
if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) {
if (Defined *b = dyn_cast<Defined>(sym)) {
@@ -2555,6 +2566,33 @@ void Writer::addBaserelBlocks(std::vector<Baserel> &v) {
relocSec->addChunk(make<BaserelChunk>(page, &v[i], &v[0] + j));
}
+void Writer::createDynamicRelocs() {
+ if (!ctx.dynamicRelocs)
+ return;
+
+ const uint32_t coffHeaderOffset = dosStubSize + sizeof(PEMagic);
+ const uint32_t peHeaderOffset = coffHeaderOffset + sizeof(coff_file_header);
+ const uint32_t dataDirOffset = peHeaderOffset + sizeof(pe32plus_header);
+
+ // Adjust the Machine field in the COFF header to AMD64.
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint16_t),
+ coffHeaderOffset + offsetof(coff_file_header, Machine),
+ AMD64);
+
+ // Adjust the load config directory.
+ // FIXME: Use the hybrid load config value instead.
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+ dataDirOffset +
+ LOAD_CONFIG_TABLE * sizeof(data_directory) +
+ offsetof(data_directory, RelativeVirtualAddress),
+ 0);
+ ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t),
+ dataDirOffset +
+ LOAD_CONFIG_TABLE * sizeof(data_directory) +
+ offsetof(data_directory, Size),
+ 0);
+}
+
PartialSection *Writer::createPartialSection(StringRef name,
uint32_t outChars) {
PartialSection *&pSec = partialSections[{name, outChars}];
@@ -2634,11 +2672,6 @@ template <typename T> void Writer::prepareLoadConfig(T *loadConfig) {
if (ctx.config.dependentLoadFlags)
loadConfig->DependentLoadFlags = ctx.config.dependentLoadFlags;
- checkLoadConfigGuardData(loadConfig);
-}
-
-template <typename T>
-void Writer::checkLoadConfigGuardData(const T *loadConfig) {
size_t loadConfigSize = loadConfig->Size;
#define RETURN_IF_NOT_CONTAINS(field) \
@@ -2660,6 +2693,16 @@ void Writer::checkLoadConfigGuardData(const T *loadConfig) {
if (loadConfig->field != s->getVA()) \
warn(#field " not set correctly in '_load_config_used'");
+ if (ctx.dynamicRelocs) {
+ IF_CONTAINS(DynamicValueRelocTableSection) {
+ loadConfig->DynamicValueRelocTableSection = relocSec->sectionIndex;
+ loadConfig->DynamicValueRelocTableOffset =
+ ctx.dynamicRelocs->getRVA() - relocSec->getRVA();
+ } else {
+ warn("'_load_config_used' structure too small to include dynamic relocations");
+ }
+ }
+
if (ctx.config.guardCF == GuardCFLevel::Off)
return;
RETURN_IF_NOT_CONTAINS(GuardFlags)
diff --git a/lld/test/COFF/arm64x-loadconfig.s b/lld/test/COFF/arm64x-loadconfig.s
new file mode 100644
index 00000000000000..53f42b8f6e680d
--- /dev/null
+++ b/lld/test/COFF/arm64x-loadconfig.s
@@ -0,0 +1,68 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows test.s -o test.obj
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig.s -o loadconfig.obj
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig-short.s -o loadconfig-short.obj
+
+// RUN: lld-link -machine:arm64x -out:out.dll -dll -noentry loadconfig.obj test.obj
+
+// RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=DYNRELOCS %s
+// DYNRELOCS: DynamicValueRelocTableOffset: 0xC
+// DYNRELOCS-NEXT: DynamicValueRelocTableSection: 4
+// DYNRELOCS: DynamicRelocations [
+// DYNRELOCS-NEXT: Version: 0x1
+// DYNRELOCS-NEXT: Arm64X [
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x7C
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x2
+// DYNRELOCS-NEXT: Value: 0x8664
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x150
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x4
+// DYNRELOCS-NEXT: Value: 0x0
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: Entry [
+// DYNRELOCS-NEXT: RVA: 0x154
+// DYNRELOCS-NEXT: Type: VALUE
+// DYNRELOCS-NEXT: Size: 0x4
+// DYNRELOCS-NEXT: Value: 0x0
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: ]
+// DYNRELOCS-NEXT: ]
+
+// RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s
+// HEADERS: BaseRelocationTableRVA: 0x4000
+// HEADERS-NEXT: BaseRelocationTableSize: 0xC
+// HEADERS: LoadConfigTableRVA: 0x1000
+// HEADERS-NEXT: LoadConfigTableSize: 0x140
+// HEADERS: Name: .reloc (2E 72 65 6C 6F 63 00 00)
+// HEADERS-NEXT: VirtualSize: 0x38
+
+// RUN: lld-link -machine:arm64x -out:out-short.dll -dll -noentry loadconfig-short.obj 2>&1 | FileCheck --check-prefix=WARN-RELOC-SIZE %s
+// WARN-RELOC-SIZE: lld-link: warning: '_load_config_used' structure too small to include dynamic relocations
+
+#--- test.s
+ .data
+sym:
+ // Emit a basereloc to make the loadconfig test more meaningful.
+ .xword sym
+
+#--- loadconfig.s
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0x140
+ .fill 0x13c,1,0
+
+#--- loadconfig-short.s
+ .section .rdata,"dr"
+ .globl _load_config_used
+ .p2align 3, 0
+_load_config_used:
+ .word 0xe4
+ .fill 0xe0,1,0
|
This commit introduces an initial implementation of ARM64X relocations. While this is a foundational step, further ARM64X support will require additional relocation types. The relocations included here represent the minimal set found in all ARM64X binaries. The implementation omits untestable elements for now, such as additional relocation types and relocations to non-header pages (e.g., those relative to symbols or chunks). These can be easily added to The ability to create these relocations will be useful elsewhere in the codebase, particularly when constructing import tables. As such, the relocations are stored in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking good overall, but more than a few things to wrap my head around. :-)
@@ -2660,6 +2693,18 @@ void Writer::checkLoadConfigGuardData(const T *loadConfig) { | |||
if (loadConfig->field != s->getVA()) \ | |||
warn(#field " not set correctly in '_load_config_used'"); | |||
|
|||
if (ctx.dynamicRelocs) { | |||
IF_CONTAINS(DynamicValueRelocTableSection) { | |||
loadConfig->DynamicValueRelocTableSection = relocSec->sectionIndex; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that you're merging the checkLoadConfigGuardData
function into the caller, since you're reusing this context here, to not only check member fields but also use the same IF_CONTAINS()
to set fields - is that a correct observation?
Are there other fields that we currently set in load config, which could benefit from IF_CONTAINS()
in the same way? Such a refactoring, either moving the IF_CONTAINS()
macro elsewhere, or merging checkLoadConfigGuardData
into the caller to ease reusing these, feels like it's somewhat orthogonal to this change. Or if it's not possible to neatly separate out that step, I guess that aspect at least should be mentioned and explained in the commit message.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's a correct observation. Using the existing helpers felt appropriate here.
For ARM64X, we will also need to modify the CHPEMetadataPointer
field (although that's earlier than dynamic relocs in the load config, so we may use the existing check and warning). Unlike the ARM64EC load config, the native ARM64 load config doesn’t reference it in the CRT. This allows the same load config to work for the native target. On ARM64X, CHPE metadata exists only in EC code, so it wouldn’t be possible to reference it from the native load config anyway. Instead, the linker reads it from the EC load config and sets it in the native variant.
We should technically have such check for the existing dependentLoadFlags
too. I’ll use that to split off that part.
addBaserels(); | ||
if (ctx.dynamicRelocs) { | ||
ctx.dynamicRelocs->finalize(); | ||
relocSec->addChunk(ctx.dynamicRelocs); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The added code here, ->finalize()
and ->addChunk
, how does this behave if assignAddresses()
gets called multiple times? If we need to add range extension thunks, we'll end up calling this method multiple times.
Ok, I see that addBaserels()
starts out with relocSec->chunks.clear();
so I guess the ->addChunk()
part is fine at least.
I'm pondering if there are other ways of making this clearer to the reader - either moving the newly added code here into addBaserels()
, but that's probably not entirely right either, or moving the relocSec->chunks.clear()
out here, to make it more visible that first we clear the section, then we add one kind of relocations, then we add another kind of relocations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, that would be cleaner and more correct with respect to ctx.config.relocatable
. I’ll move it.
lld/COFF/Writer.cpp
Outdated
|
||
const uint32_t coffHeaderOffset = dosStubSize + sizeof(PEMagic); | ||
const uint32_t peHeaderOffset = coffHeaderOffset + sizeof(coff_file_header); | ||
const uint32_t dataDirOffset = peHeaderOffset + sizeof(pe32plus_header); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These constants are pretty straightforward as such, but I wonder, if we'd move them somewhere outside of this function, would it be possible to have asserts checking that they actually are correct, e.g. at the point where we actually write them?
From my memory offhand about these headers; isn't the case that some of them point to each other, so that there in principle could be some padding/gaps inbetween them, or do any reader/user assume the same kind of sequential packing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the format allows padding, and we will use that padding for the DOS stub program. lld-link differs from MSVC, which doesn’t emit the program for ARM targets. What matters here is matching the behavior of writeHeader()
. Asserts sound good to me; I’ll add them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, skipping the DOS stub for ARM targets sounds like a nice cleanup that we probably could/should do as well. Then it's indeed good to have such asserts in place to make sure that whatever changes we do (including making the offset runtime calculated), this stays in sync.
lld/COFF/Writer.cpp
Outdated
coffHeaderOffset + offsetof(coff_file_header, Machine), | ||
AMD64); | ||
|
||
// Adjust the load config directory. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"Adjust", as in setting the values to 0, if I understand the code correctly? Can you adjust (no pun intended ;-) ) the comment to be more clear about what adjustment it does?
Then again, I guess that the FIXME below means that clearing it isn't the final intended behaviour, but an initial stepping stone?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ultimately, on ARM64X, we should define two separate load configs: native and EC. The native load config is written to the header, and ARM64X relocations "adjust" it to reference the EC load config. If the EC load config is missing, as in the test from this PR, MSVC emits a warning (this part is not yet implemented) and resets the header value so that the EC view has no load config.
Testing and supporting two load configs isn’t currently possible in lld-link due to duplicated _load_config_used
symbols. We need support for separated namespaces first.
lld/COFF/Chunks.h
Outdated
uint32_t add(llvm::COFF::Arm64XFixupType type, uint8_t size, uint32_t offset, | ||
uint64_t value) { | ||
arm64xRelocs.emplace_back(type, size, offset, value); | ||
return arm64xRelocs.size() - 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This return value doesn't seem to be used in this commit yet; it's not entirely obvious to me how it's going to be used, but I guess it's fine to keep the return value here, as it probably becomes understandable in later patches.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I needed it in a previous versions and forgot to remove, I will fix that.
void DynamicRelocsChunk::finalize() { | ||
llvm::stable_sort(arm64xRelocs, [=](const Arm64XDynamicRelocEntry &a, | ||
const Arm64XDynamicRelocEntry &b) { | ||
return a.offset < b.offset; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This finalize()
method gets called multiple times, if we need to add range extension thunks - while I don't think anything at least at this point within the function really needs to be recalculated after the layout gets redone. But perhaps some future dynamic relocations may need to be re-sorted? (Technically, if we're inserting thunks and have relocations based on symbols, the relative ordering of relocs probably still is the same, but we may need to recalculate offsets. At that point, it's probably fine to keep this in the part of the code that gets rerun, for clarity.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The sorting itself won’t change, but the size might once we allow offsets to chunks. Adding thunks could shift offsets between pages, which might add or remove coff_base_reloc_block_header
s. While we could avoid re-sorting in such cases, keeping it together felt cleaner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, sounds reasonable - no need to move the sorting out in that case; keeping it together probably makes more sense indeed than to microoptimize such things at this stage.
sizeof(coff_base_reloc_block_header); | ||
|
||
for (const Arm64XDynamicRelocEntry &entry : arm64xRelocs) { | ||
assert(!(entry.offset & ~0xfff)); // Not yet supported. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So essentially, at the point of this patch, we can only do dynamic relocations within the first 4 KB of the linked image, i.e. in the basics of the image headers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, adding support for other offsets is straightforward, but it would remain dead code until we actually emit them. Since it’s only a few lines of code, I can add it if you think it’s better. In general, testing is tricky without separate namespace support, and namespace support depends on ARM64X relocs. So, I’m trying to find a way to keep things incremental.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, good, then I'm understanding it correctly. No need to add support for other offsets yet if it's not easy to test.
lld/COFF/Chunks.cpp
Outdated
relocSize += entrySize; | ||
} | ||
pageHeader->BlockSize = alignTo(pageHeader->BlockSize, sizeof(uint32_t)); | ||
relocSize = alignTo(relocSize, sizeof(uint32_t)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All throughout this point, we have relocSize
being identical to pageHeader->BlockSize
, which feels a bit redundant. But I guess that a separate variable will be needed in some future version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once we allow relocs for other pages, the plan is for relocSize
to include all blocks, while pageHeader->BlockSize
will account for only the single block. See my draft.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will skip relocSize
for now to keep this PR cleaner.
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig.s -o loadconfig.obj | ||
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows loadconfig-short.s -o loadconfig-short.obj | ||
|
||
// RUN: lld-link -machine:arm64x -out:out.dll -dll -noentry loadconfig.obj test.obj |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So we're testing writing an arm64x image, but where we only have plain aarch64 chunks? And if changing to the arm64ec form at runtime, it changes the machine field and clears the load config, but nothing else?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think that's the simplest possible ARM64X relocs test (and testing more complex cases will require namespaces).
I created #118535 to separate the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Please mention and explain why the .clear()
call is moved from addBaserels()
in the commit message.
This update modifies the machine field in the hybrid view to be AMD64, aligning it with expectations from ARM64EC modules. While this provides initial support, additional relocations will be necessary for full functionality. Many of these cases depend on implementing separate namespace support first. Move clearing of the .reloc section from addBaserels to assignAddresses to ensure it is always cleared, regardless of the relocatable configuration. This change also clarifies the reasoning for adding the dynamic relocations chunk in that location.
case IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL: | ||
llvm_unreachable("unsupported type"); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This causes warnings when building with GCC:
../../lld/COFF/Chunks.cpp: In member function ‘size_t lld::coff::Arm64XDynamicRelocEntry::getSize() const’:
../../lld/COFF/Chunks.cpp:1177:1: warning: control reaches end of non-void function [-Wreturn-type]
1177 | }
| ^
This case is mentioned in the LLVM coding style guide, see https://llvm.org/docs/CodingStandards.html#don-t-use-default-labels-in-fully-covered-switches-over-enumerations - the default action to silence this warning with GCC is to add an llvm_unreachable("")
after the switch; see e.g. b21de9b. (In this case, it feels a bit repetetive when some cases already end with llvm_unreachable()
too, but they're different cases.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I created #122382 for a fix, sorry about that. The current llvm_unreachable()
will be removed one ARM64X support is more complete. Those two reloc types are needed for the shared IAT.
This modifies the machine field in the hybrid view to be AMD64, aligning it with expectations from ARM64EC modules. While this provides initial support, additional relocations will be necessary for full functionality. Many of these cases depend on implementing separate namespace support first.
Move clearing of the .reloc section from addBaserels to assignAddresses to ensure it is always cleared, regardless of the relocatable configuration. This change also clarifies the reasoning for adding the dynamic relocations chunk in that location.