Skip to content

Commit 42fa39e

Browse files
authored
containertool: Add basic ELF file type detection (#63)
Motivation ---------- The container's architecture metadata field should match the architecture of the executable it contains. The plugin can't currently tell which architecture the SDK targets, but `containertool` can infer the architecture by reading the ELF headers. Modifications ------------- * Adds basic ELF header reader. Reading the whole ELF header is not required - the necessary information is provided by the first few fields of the header. * Changes `containertool` to use the detected architecture unless overridden by command line flags or environment variables. Result ------ Adding ELF detection reduces the risk of building a mismatched container image, where the architecture of the packaged binary does not match the architecture of the underlying Linux distribution. Test Plan --------- * New unit tests exercise ELF header parsing * A new integration test checks that `containertool` selects the correct base image architecture for different cross-compiled binaries * All previous tests continue to pass Fixes #49
1 parent 196a7ce commit 42fa39e

File tree

5 files changed

+554
-3
lines changed

5 files changed

+554
-3
lines changed

.github/workflows/interop_tests.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,32 @@ jobs:
4343
docker create --name second --pull always localhost:5000/layering_test
4444
docker cp second:/payload second.payload
4545
grep second second.payload
46+
47+
elf-detection-test:
48+
name: ELF detection test
49+
runs-on: ubuntu-latest
50+
services:
51+
registry:
52+
image: registry:2
53+
ports:
54+
- 5000:5000
55+
steps:
56+
- name: Checkout repository
57+
uses: actions/checkout@v4
58+
with:
59+
persist-credentials: false
60+
61+
- name: Mark the workspace as safe
62+
# https://github.com/actions/checkout/issues/766
63+
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
64+
65+
- name: Install the static SDK
66+
run: |
67+
swift sdk install \
68+
https://download.swift.org/swift-6.0.2-release/static-sdk/swift-6.0.2-RELEASE/swift-6.0.2-RELEASE_static-linux-0.0.1.artifactbundle.tar.gz \
69+
--checksum aa5515476a403797223fc2aad4ca0c3bf83995d5427fb297cab1d93c68cee075
70+
71+
# Run the test script
72+
- name: Test ELF detection
73+
run: |
74+
scripts/test-elf-detection.sh

Sources/containertool/ELFDetect.swift

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftContainerPlugin open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the SwiftContainerPlugin project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftContainerPlugin project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
struct ArrayField<T: Collection> where T.Element == UInt8 {
16+
var start: Int
17+
var count: Int
18+
}
19+
20+
struct IntField<T: BinaryInteger> {
21+
var start: Int
22+
}
23+
24+
extension Array where Element == UInt8 {
25+
subscript(idx: ArrayField<[UInt8]>) -> [UInt8] {
26+
[UInt8](self[idx.start..<idx.start + idx.count])
27+
}
28+
29+
subscript(idx: IntField<UInt8>) -> UInt8 {
30+
self[idx.start]
31+
}
32+
33+
subscript(idx: IntField<UInt16>, endianness endianness: ELF.Endianness) -> UInt16 {
34+
let (a, b) = (UInt16(self[idx.start]), UInt16(self[idx.start + 1]))
35+
36+
switch endianness {
37+
case .littleEndian:
38+
return a &<< 0 &+ b &<< 8
39+
case .bigEndian:
40+
return a &<< 8 &+ b &<< 0
41+
}
42+
}
43+
}
44+
45+
/// ELF header
46+
///
47+
/// - https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
48+
/// - https://refspecs.linuxbase.org/elf/elf.pdf
49+
///
50+
/// This struct only defines enough fields to identify a valid ELF file
51+
/// and extract the type of object it contains, and the processor
52+
/// architecture and operating system ABI for which that object
53+
/// was created.
54+
struct ELF: Equatable {
55+
/// Multibyte ELF fields are stored in the native endianness of the target system.
56+
/// This field records the endianness of objects in the file.
57+
enum Endianness: UInt8 {
58+
case littleEndian = 0x01
59+
case bigEndian = 0x02
60+
}
61+
62+
/// Offsets (addresses) are stored as 32-bit or 64-bit integers.
63+
/// This field records the offset size used in objects in the file.
64+
/// Variable offset sizes mean that some fields are found at different
65+
/// offsets in 32-bit and 64-bit ELF files.
66+
enum Encoding: UInt8 {
67+
case bits32 = 0x01
68+
case bits64 = 0x02
69+
}
70+
71+
/// ELF files can hold a variety of different object types.
72+
/// This field records type of object in the file.
73+
/// The standard defines a number of fixed types but also
74+
/// reserves ranges of type numbers for to be used by
75+
/// specific operating systems and processors.
76+
enum Object: Equatable {
77+
case none
78+
case relocatable
79+
case executable
80+
case shared
81+
case core
82+
case reservedOS(UInt16)
83+
case reservedCPU(UInt16)
84+
case unknown(UInt16)
85+
86+
init?(rawValue: UInt16) {
87+
switch rawValue {
88+
case 0x0000: self = .none
89+
case 0x0001: self = .relocatable
90+
case 0x0002: self = .executable
91+
case 0x0003: self = .shared
92+
case 0x0004: self = .core
93+
94+
/// Reserved for OS-specific use
95+
case 0xfe00...0xfeff: self = .reservedOS(rawValue)
96+
97+
/// Reserved for CPU-specific use
98+
case 0xff00...0xffff: self = .reservedCPU(rawValue)
99+
100+
default: return nil
101+
}
102+
}
103+
}
104+
105+
/// The ABI used by the object in this ELF file. The standard reserves values for a variety of ABIs and operating systems; only a few are implemented here.
106+
enum ABI: Equatable {
107+
case SysV
108+
case Linux
109+
case unknown(UInt8)
110+
111+
init(rawValue: UInt8) {
112+
switch rawValue {
113+
case 0x00: self = .SysV
114+
case 0x03: self = .Linux
115+
default: self = .unknown(rawValue)
116+
}
117+
}
118+
}
119+
120+
/// The processor architecture used by the object in this ELF file. Values are reserved for many ISAs;
121+
/// this enum includes cases for the linux-* host types for which Swift can currently be built:
122+
///
123+
/// https://github.com/swiftlang/swift/blob/c6d1060778f35631000911372d7645dbd5cade0a/utils/build-script-impl#L458
124+
enum ISA: Equatable {
125+
case x86
126+
case powerpc
127+
case powerpc64
128+
case s390 // incluing s390x
129+
case arm // up to armv7
130+
case x86_64
131+
case aarch64 // armv8 onwards
132+
case riscv
133+
case unknown(UInt16)
134+
135+
init(rawValue: UInt16) {
136+
switch rawValue {
137+
case 0x0003: self = .x86
138+
case 0x0014: self = .powerpc
139+
case 0x0015: self = .powerpc64
140+
case 0x0016: self = .s390
141+
case 0x0028: self = .arm
142+
case 0x003e: self = .x86_64
143+
case 0x00b7: self = .aarch64
144+
case 0x00f3: self = .riscv
145+
default: self = .unknown(rawValue)
146+
}
147+
}
148+
}
149+
150+
var encoding: Encoding
151+
var endianness: Endianness
152+
var ABI: ABI
153+
var object: Object
154+
var ISA: ISA
155+
}
156+
157+
extension ELF {
158+
/// ELF header field addresses
159+
///
160+
/// The ELF format can store binaries for 32-bit and 64-bit systems,
161+
/// using little-endian and big-endian data encoding.
162+
///
163+
/// All multibyte fields are stored using the endianness of the target
164+
/// system. Read the EI_DATA field to find the endianness of the file.
165+
///
166+
/// Some fields are different sizes in 32-bit and 64-bit ELF files, but
167+
/// these occur after all the fields we need to read for basic file type
168+
/// identification, so all our offsets are the same on 32-bit and 64-bit systems.
169+
enum Field {
170+
/// ELF magic number: a string of 4 bytes, not a UInt32; no endianness
171+
static let EI_MAGIC = ArrayField<[UInt8]>(start: 0x0, count: 4)
172+
173+
/// ELF class (word size): 1 byte
174+
static let EI_CLASS = IntField<UInt8>(start: 0x4)
175+
176+
/// Data encoding (endianness): 1 byte
177+
static let EI_DATA = IntField<UInt8>(start: 0x5)
178+
179+
// ELF version: 1 byte
180+
static let EI_VERSION = IntField<UInt8>(start: 0x6)
181+
182+
// Operating system/ABI identification: 1 byte
183+
static let EI_OSABI = IntField<UInt8>(start: 0x7)
184+
185+
// The following fields are multibyte, so endianness must be considered,
186+
// All the fields we need are the same length in 32-bit and 64-bit
187+
// ELF files, so their offsets do not change.
188+
189+
/// Object type: 2 bytes
190+
static let EI_TYPE = IntField<UInt16>(start: 0x10)
191+
192+
//l Machine ISA (processor architecture): 2 bytes
193+
static let EI_MACHINE = IntField<UInt16>(start: 0x12)
194+
}
195+
196+
/// The initial magic number (4 bytes) which identifies an ELF file.
197+
///
198+
/// The ELF magic number is *not* a multibyte integer. It is defined as a
199+
/// string of 4 individual bytes and is the same for little-endian and
200+
/// big-endian ELF files.
201+
static let ELFMagic = Array("\u{7f}ELF".utf8)
202+
203+
/// Read enough of an ELF header from bytes to discover the object type,
204+
/// processor architecture and operating system ABI.
205+
static func read(_ bytes: [UInt8]) -> ELF? {
206+
// An ELF file starts with a magic number which is the same in either endianness.
207+
// The only defined ELF header version is 1.
208+
guard bytes.count > 0x13, bytes[Field.EI_MAGIC] == ELFMagic, bytes[Field.EI_VERSION] == 1 else {
209+
return nil
210+
}
211+
212+
guard
213+
let encoding = Encoding(rawValue: bytes[Field.EI_CLASS]),
214+
let endianness = Endianness(rawValue: bytes[Field.EI_DATA]),
215+
let object = Object(rawValue: bytes[Field.EI_TYPE, endianness: endianness])
216+
else {
217+
return nil
218+
}
219+
220+
return ELF(
221+
encoding: encoding,
222+
endianness: endianness,
223+
ABI: .init(rawValue: bytes[Field.EI_OSABI]),
224+
object: object,
225+
ISA: .init(rawValue: bytes[Field.EI_MACHINE, endianness: endianness])
226+
)
227+
}
228+
}

Sources/containertool/containertool.swift

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ enum AllowHTTP: String, ExpressibleByArgument, CaseIterable { case source, desti
5151
var allowInsecureHttp: AllowHTTP?
5252

5353
@Option(help: "CPU architecture")
54-
private var architecture: String = ProcessInfo.processInfo.environment["CONTAINERTOOL_ARCHITECTURE"] ?? "amd64"
54+
private var architecture: String?
5555

5656
@Option(help: "Base image reference")
5757
private var from: String = ProcessInfo.processInfo.environment["CONTAINERTOOL_BASE_IMAGE"] ?? "swift:slim"
@@ -72,6 +72,9 @@ enum AllowHTTP: String, ExpressibleByArgument, CaseIterable { case source, desti
7272
let baseimage = try ImageReference(fromString: from, defaultRegistry: defaultRegistry)
7373
var destination_image = try ImageReference(fromString: repository, defaultRegistry: defaultRegistry)
7474

75+
let executableURL = URL(fileURLWithPath: executable)
76+
let payload = try Data(contentsOf: executableURL)
77+
7578
let authProvider: AuthorizationProvider?
7679
if !netrc {
7780
authProvider = nil
@@ -110,6 +113,14 @@ enum AllowHTTP: String, ExpressibleByArgument, CaseIterable { case source, desti
110113

111114
// MARK: Find the base image
112115

116+
let elfheader = ELF.read([UInt8](payload))
117+
let architecture =
118+
architecture
119+
?? ProcessInfo.processInfo.environment["CONTAINERTOOL_ARCHITECTURE"]
120+
?? elfheader?.ISA.containerArchitecture
121+
?? "amd64"
122+
if verbose { log("Base image architecture: \(architecture)") }
123+
113124
let baseimage_manifest: ImageManifest
114125
let baseimage_config: ImageConfiguration
115126
if let source {
@@ -137,8 +148,6 @@ enum AllowHTTP: String, ExpressibleByArgument, CaseIterable { case source, desti
137148

138149
// MARK: Build the application layer
139150

140-
let executableURL = URL(fileURLWithPath: executable)
141-
let payload = try Data(contentsOf: executableURL)
142151
let payload_name = executableURL.lastPathComponent
143152
let tardiff = tar(payload, filename: payload_name)
144153
log("Built application layer")
@@ -228,3 +237,13 @@ enum AllowHTTP: String, ExpressibleByArgument, CaseIterable { case source, desti
228237
print(destination_image)
229238
}
230239
}
240+
241+
extension ELF.ISA {
242+
var containerArchitecture: String? {
243+
switch self {
244+
case .x86_64: "amd64"
245+
case .aarch64: "arm64"
246+
default: nil
247+
}
248+
}
249+
}

0 commit comments

Comments
 (0)