Skip to content

Commit 6d4f4a4

Browse files
authored
Improve RangeSet union performance (swiftlang#74963)
1 parent 255a941 commit 6d4f4a4

File tree

4 files changed

+127
-7
lines changed

4 files changed

+127
-7
lines changed

stdlib/public/core/RangeSet.swift

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,11 @@ extension RangeSet {
251251
/// Adds the contents of the given range set to this range set.
252252
///
253253
/// - Parameter other: A range set to merge with this one.
254+
///
255+
/// - Complexity: O(*m* + *n*), where *m* and *n* are the number of ranges in
256+
/// this and the other range set.
254257
public mutating func formUnion(_ other: __owned RangeSet<Bound>) {
255-
for range in other._ranges {
256-
insert(contentsOf: range)
257-
}
258+
self = self.union(other)
258259
}
259260

260261
/// Removes the contents of this range set that aren't also in the given
@@ -293,9 +294,7 @@ extension RangeSet {
293294
public __consuming func union(
294295
_ other: __owned RangeSet<Bound>
295296
) -> RangeSet<Bound> {
296-
var result = self
297-
result.formUnion(other)
298-
return result
297+
return RangeSet(_ranges: _ranges._union(other._ranges))
299298
}
300299

301300
/// Returns a new range set containing the contents of both this set and the

stdlib/public/core/RangeSetRanges.swift

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,77 @@ extension RangeSet.Ranges {
265265

266266
return Self(_ranges: result)
267267
}
268+
269+
@usableFromInline
270+
internal func _union(_ other: Self) -> Self {
271+
// Empty cases
272+
if other.isEmpty {
273+
return self
274+
} else if self.isEmpty {
275+
return other
276+
}
277+
278+
// Instead of naively inserting the ranges of `other` into `self`,
279+
// which can cause reshuffling with every insertion, this approach
280+
// uses the guarantees that each array of ranges is non-overlapping and in
281+
// increasing order to directly derive the union.
282+
//
283+
// Each range in the resulting range set is found by:
284+
//
285+
// 1. Finding the current lowest bound of the two range sets.
286+
// 2. Searching for the first upper bound that is outside the merged
287+
// boundaries of the two range sets.
288+
289+
// Use temporaries so that we can swap a/b, to simplify the logic below
290+
var a = self._storage
291+
var b = other._storage
292+
var aIndex = a.startIndex
293+
var bIndex = b.startIndex
294+
295+
var result: [Range<Bound>] = []
296+
while aIndex < a.endIndex, bIndex < b.endIndex {
297+
// Make sure that `a` is the source of the lower bound and `b` is the
298+
// potential source for the upper bound.
299+
if b[bIndex].lowerBound < a[aIndex].lowerBound {
300+
swap(&a, &b)
301+
swap(&aIndex, &bIndex)
302+
}
303+
304+
var candidateRange = a[aIndex]
305+
aIndex += 1
306+
307+
// Look for the correct upper bound, which is the first upper bound that
308+
// isn't contained in the next range of the "other" ranges array.
309+
while bIndex < b.endIndex, candidateRange.upperBound >= b[bIndex].lowerBound {
310+
if candidateRange.upperBound >= b[bIndex].upperBound {
311+
// The range `b[bIndex]` is entirely contained by `candidateRange`,
312+
// so we need to advance and look at the next range in `b`.
313+
bIndex += 1
314+
} else {
315+
// The range `b[bIndex]` extends past `candidateRange`, so:
316+
//
317+
// 1. We grow `candidateRange` to the upper bound of `b[bIndex]`
318+
// 2. We swap the two range arrays, so that we're looking for the
319+
// new upper bound in the other array.
320+
candidateRange = candidateRange.lowerBound ..< b[bIndex].upperBound
321+
bIndex += 1
322+
swap(&a, &b)
323+
swap(&aIndex, &bIndex)
324+
}
325+
}
326+
327+
result.append(candidateRange)
328+
}
329+
330+
// Collect any remaining ranges without needing to merge.
331+
if aIndex < a.endIndex {
332+
result.append(contentsOf: a[aIndex...])
333+
} else if bIndex < b.endIndex {
334+
result.append(contentsOf: b[bIndex...])
335+
}
336+
337+
return Self(_ranges: result)
338+
}
268339
}
269340

270341
@available(SwiftStdlib 6.0, *)
@@ -344,4 +415,4 @@ internal struct _Pair<Element>: RandomAccessCollection {
344415
}
345416
}
346417
}
347-
}
418+
}

test/abi/macOS/x86_64/stdlib.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ Added: _$ss8RangeSetV6RangesV10startIndexSivpMV
163163
Added: _$ss8RangeSetV6RangesV11descriptionSSvg
164164
Added: _$ss8RangeSetV6RangesV11descriptionSSvpMV
165165
Added: _$ss8RangeSetV6RangesV13_intersectionyADyx_GAFF
166+
Added: _$ss8RangeSetV6RangesV6_unionyADyx_GAFF
166167
Added: _$ss8RangeSetV6RangesV2eeoiySbADyx_G_AFtFZ
167168
Added: _$ss8RangeSetV6RangesV5_gaps9boundedByADyx_GSnyxG_tF
168169
Added: _$ss8RangeSetV6RangesV5countSivg

validation-test/stdlib/RangeSet.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,55 @@ if #available(SwiftStdlib 6.0, *) {
174174
}
175175
}
176176

177+
RangeSetTests.test("union") {
178+
func unionViaSet(
179+
_ s1: RangeSet<Int>,
180+
_ s2: RangeSet<Int>
181+
) -> RangeSet<Int> {
182+
let set1 = Set(parent.indices[s1])
183+
let set2 = Set(parent.indices[s2])
184+
return RangeSet(set1.union(set2), within: parent)
185+
}
186+
187+
func testUnion(
188+
_ set1: RangeSet<Int>,
189+
_ set2: RangeSet<Int>,
190+
expect union: RangeSet<Int>
191+
) {
192+
expectEqual(set1.union(set2), union)
193+
expectEqual(set2.union(set1), union)
194+
195+
var set3 = set1
196+
set3.formUnion(set2)
197+
expectEqual(set3, union)
198+
199+
set3 = set2
200+
set3.formUnion(set1)
201+
expectEqual(set3, union)
202+
}
203+
204+
// Simple tests
205+
testUnion([0..<5, 9..<14],
206+
[1..<3, 4..<6, 8..<12],
207+
expect: [0..<6, 8..<14])
208+
209+
testUnion([10..<20, 50..<60],
210+
[15..<55, 58..<65],
211+
expect: [10..<65])
212+
213+
// Test with upper bound / lower bound equality
214+
testUnion([10..<20, 30..<40],
215+
[15..<30, 40..<50],
216+
expect: [10..<50])
217+
218+
for _ in 0..<100 {
219+
let set1 = buildRandomRangeSet()
220+
let set2 = buildRandomRangeSet()
221+
testUnion(set1, set2,
222+
expect: unionViaSet(set1, set2))
223+
}
224+
}
225+
177226
RangeSetTests.test("intersection") {
178227
func intersectionViaSet(
179228
_ s1: RangeSet<Int>,

0 commit comments

Comments
 (0)