Skip to content

Commit 2746c4c

Browse files
committed
Optimize init?(_:, appending:) using direct bits manipulation
1 parent bf6694d commit 2746c4c

File tree

1 file changed

+93
-10
lines changed

1 file changed

+93
-10
lines changed

stdlib/public/core/SmallString.swift

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,21 +323,104 @@ extension _SmallString {
323323

324324
@usableFromInline // @testable
325325
internal init?(_ base: _SmallString, appending other: _SmallString) {
326-
let totalCount = base.count + other.count
326+
let otherCount = other.count
327+
328+
if otherCount == 0 {
329+
// Fast path: nothing to append
330+
self = base
331+
return
332+
}
333+
334+
let baseCount = base.count
335+
let totalCount = baseCount + otherCount
327336
guard totalCount <= _SmallString.capacity else { return nil }
328337

329-
// TODO(SIMD): The below can be replaced with just be a couple vector ops
338+
let (resultLeading, resultTrailing) = base.zeroTerminatedRawCodeUnits
339+
let (otherLeading, otherTrailing) = other.zeroTerminatedRawCodeUnits
340+
341+
// Use direct bit manipulation instead of byte-by-byte copying.
342+
// This is more efficient than SIMD for such small data sizes (max 15 bytes).
343+
344+
if baseCount < 8 {
345+
if baseCount + otherCount <= 8 {
346+
// Everything fits in leading word
347+
let shiftAmount = UInt64(baseCount) &* 8
348+
let otherMask = (UInt64(1) &<< (UInt64(otherCount) &* 8)) &- 1
349+
let otherBits = otherLeading & otherMask
350+
#if _endian(big)
351+
let insertShift = UInt64(8 - baseCount - otherCount) &* 8
352+
resultLeading |= otherBits &<< insertShift
353+
#else
354+
resultLeading |= otherBits &<< shiftAmount
355+
#endif
356+
} else {
357+
// Spans both words
358+
let leadingBytes = 8 - baseCount
359+
let trailingBytes = otherCount - leadingBytes
360+
361+
// Copy leading portion
362+
let shiftAmount = UInt64(baseCount) &* 8
363+
let leadingMask = (UInt64(1) &<< (UInt64(leadingBytes) &* 8)) &- 1
364+
let leadingBits = otherLeading & leadingMask
365+
#if _endian(big)
366+
resultLeading |= leadingBits &<< (UInt64(8 - baseCount - leadingBytes) &* 8)
367+
#else
368+
resultLeading |= leadingBits &<< shiftAmount
369+
#endif
330370

331-
var result = base
332-
var writeIdx = base.count
333-
for readIdx in 0..<other.count {
334-
result[writeIdx] = other[readIdx]
335-
writeIdx &+= 1
371+
// Copy trailing portion
372+
let trailingMask = (UInt64(1) &<< (UInt64(trailingBytes) &* 8)) &- 1
373+
#if _endian(big)
374+
if leadingBytes < 8 {
375+
let otherShift = UInt64(8 - leadingBytes) &* 8
376+
let trailingBits = (otherLeading &>> otherShift) | (otherTrailing &<< (UInt64(leadingBytes) &* 8))
377+
resultTrailing = trailingBits & trailingMask
378+
} else {
379+
resultTrailing = otherTrailing & trailingMask
380+
}
381+
#else
382+
if leadingBytes < 8 {
383+
let otherShift = UInt64(leadingBytes) &* 8
384+
let trailingBits = (otherLeading &>> otherShift) | (otherTrailing &<< (UInt64(8 - leadingBytes) &* 8))
385+
resultTrailing = trailingBits & trailingMask
386+
} else {
387+
resultTrailing = otherTrailing & trailingMask
388+
}
389+
#endif
390+
}
391+
} else {
392+
// Base uses trailing word, append to trailing
393+
let trailingIndex = baseCount - 8
394+
let shiftAmount = UInt64(trailingIndex) &* 8
395+
let availableBytes = 8 - trailingIndex
396+
397+
if otherCount <= availableBytes {
398+
// Fits in trailing word
399+
let otherMask = (UInt64(1) &<< (UInt64(otherCount) &* 8)) &- 1
400+
let otherBits = otherLeading & otherMask
401+
#if _endian(big)
402+
let insertShift = UInt64(availableBytes - otherCount) &* 8
403+
resultTrailing |= otherBits &<< insertShift
404+
#else
405+
resultTrailing |= otherBits &<< shiftAmount
406+
#endif
407+
} else {
408+
// This case shouldn't happen as totalCount > capacity would be caught above
409+
// But for safety, fall back to original implementation
410+
var result = base
411+
var writeIdx = base.count
412+
for readIdx in 0..<other.count {
413+
result[writeIdx] = other[readIdx]
414+
writeIdx &+= 1
415+
}
416+
let (leading, trailing) = result.zeroTerminatedRawCodeUnits
417+
self.init(leading: leading, trailing: trailing, count: totalCount)
418+
return
419+
}
336420
}
337-
_internalInvariant(writeIdx == totalCount)
338421

339-
let (leading, trailing) = result.zeroTerminatedRawCodeUnits
340-
self.init(leading: leading, trailing: trailing, count: totalCount)
422+
// Data is already clean from zeroTerminatedRawCodeUnits, no additional masking needed
423+
self.init(leading: resultLeading, trailing: resultTrailing, count: totalCount)
341424
}
342425
}
343426

0 commit comments

Comments
 (0)