Skip to content

Commit 96b338d

Browse files
committed
Speed up processor initialization
Moves register initialization to the program where it can be done once. Speeds up use cases where the same regex is applied to many small inputs.
1 parent 43b9032 commit 96b338d

File tree

5 files changed

+69
-101
lines changed

5 files changed

+69
-101
lines changed

Sources/_StringProcessing/Engine/MEBuilder.swift

+21-22
Original file line numberDiff line numberDiff line change
@@ -419,34 +419,33 @@ extension MEProgram.Builder {
419419
inst.opcode, payload)
420420
}
421421

422-
var regInfo = MEProgram.RegisterInfo()
423-
regInfo.elements = elements.count
424-
regInfo.utf8Contents = utf8Contents.count
425-
regInfo.ints = nextIntRegister.rawValue
426-
regInfo.values = nextValueRegister.rawValue
427-
regInfo.positions = nextPositionRegister.rawValue
428-
regInfo.bitsets = asciiBitsets.count
429-
regInfo.consumeFunctions = consumeFunctions.count
430-
regInfo.transformFunctions = transformFunctions.count
431-
regInfo.matcherFunctions = matcherFunctions.count
432-
regInfo.captures = nextCaptureRegister.rawValue
433-
regInfo.wholeMatchValue = wholeMatchValue?.rawValue
434-
435-
return MEProgram(
422+
let regs = Processor.Registers(
423+
elements: elements.stored,
424+
utf8Contents: utf8Contents.stored,
425+
bitsets: asciiBitsets,
426+
consumeFunctions: consumeFunctions,
427+
transformFunctions: transformFunctions,
428+
matcherFunctions: matcherFunctions,
429+
numInts: nextIntRegister.rawValue,
430+
numValues: nextValueRegister.rawValue,
431+
numPositions: nextPositionRegister.rawValue
432+
)
433+
434+
let storedCaps = Array(
435+
repeating: Processor._StoredCapture(), count: nextCaptureRegister.rawValue)
436+
437+
let meProgram = MEProgram(
436438
instructions: InstructionList(instructions),
437-
staticElements: elements.stored,
438-
staticUTF8Contents: utf8Contents.stored,
439-
staticBitsets: asciiBitsets,
440-
staticConsumeFunctions: consumeFunctions,
441-
staticTransformFunctions: transformFunctions,
442-
staticMatcherFunctions: matcherFunctions,
443-
registerInfo: regInfo,
439+
wholeMatchValueRegister: wholeMatchValue,
444440
enableTracing: enableTracing,
445441
enableMetrics: enableMetrics,
446442
captureList: captureList,
447443
referencedCaptureOffsets: referencedCaptureOffsets,
448444
initialOptions: initialOptions,
449-
canOnlyMatchAtStart: canOnlyMatchAtStart)
445+
canOnlyMatchAtStart: canOnlyMatchAtStart,
446+
registers: regs,
447+
storedCaptures: storedCaps)
448+
return meProgram
450449
}
451450

452451
mutating func reset() { self = Self() }

Sources/_StringProcessing/Engine/MEProgram.swift

+10-14
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,7 @@ struct MEProgram {
2121
(Input, Input.Index, Range<Input.Index>) throws -> (Input.Index, Any)?
2222

2323
var instructions: InstructionList<Instruction>
24-
25-
var staticElements: [Input.Element]
26-
var staticUTF8Contents: [[UInt8]]
27-
var staticBitsets: [DSLTree.CustomCharacterClass.AsciiBitset]
28-
var staticConsumeFunctions: [ConsumeFunction]
29-
var staticTransformFunctions: [TransformFunction]
30-
var staticMatcherFunctions: [MatcherFunction]
31-
32-
var registerInfo: RegisterInfo
24+
var wholeMatchValueRegister: ValueRegister?
3325

3426
var enableTracing: Bool
3527
var enableMetrics: Bool
@@ -39,18 +31,22 @@ struct MEProgram {
3931

4032
var initialOptions: MatchingOptions
4133
var canOnlyMatchAtStart: Bool
34+
35+
// We store the initial register state in the program, so that
36+
// processors can be spun up quicker (useful for running same regex
37+
// over many, many smaller inputs).
38+
var registers: Processor.Registers
39+
var storedCaptures: [Processor._StoredCapture]
40+
4241
}
4342

4443
extension MEProgram: CustomStringConvertible {
4544
var description: String {
45+
// TODO: Re-instate better pretty-printing functionality
46+
4647
var result = """
47-
Elements: \(staticElements)
4848
4949
"""
50-
if !staticConsumeFunctions.isEmpty {
51-
result += "Consume functions: \(staticConsumeFunctions)"
52-
}
53-
5450
// TODO: Extract into formatting code
5551

5652
for idx in instructions.indices {

Sources/_StringProcessing/Engine/Processor.swift

+8-4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ struct Processor {
4949
let subjectBounds: Range<Position>
5050

5151
let matchMode: MatchMode
52+
5253
let instructions: InstructionList<Instruction>
5354

5455
// MARK: Update-only state
@@ -100,6 +101,9 @@ extension Processor {
100101
}
101102

102103
extension Processor {
104+
// TODO: This has lots of retain/release traffic. We really just
105+
// want to borrow the program and most of its static stuff. The only
106+
// thing we need an actual copy of is the modifyable-resettable state
103107
init(
104108
program: MEProgram,
105109
input: Input,
@@ -120,10 +124,10 @@ extension Processor {
120124

121125
self.currentPosition = searchBounds.lowerBound
122126

123-
// Initialize registers with end of search bounds
124-
self.registers = Registers(program, searchBounds.upperBound)
125-
self.storedCaptures = Array(
126-
repeating: .init(), count: program.registerInfo.captures)
127+
// Initialize registers from stored starting state
128+
self.registers = program.registers
129+
130+
self.storedCaptures = program.storedCaptures
127131

128132
_checkInvariants()
129133
}

Sources/_StringProcessing/Engine/Registers.swift

+28-59
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,31 @@ extension Processor {
4747
var values: [Any]
4848

4949
var positions: [Input.Index]
50+
51+
init(
52+
elements: [Element],
53+
utf8Contents: [[UInt8]],
54+
bitsets: [DSLTree.CustomCharacterClass.AsciiBitset],
55+
consumeFunctions: [MEProgram.ConsumeFunction],
56+
transformFunctions: [MEProgram.TransformFunction],
57+
matcherFunctions: [MEProgram.MatcherFunction],
58+
isDirty: Bool = false,
59+
numInts: Int,
60+
numValues: Int,
61+
numPositions: Int
62+
) {
63+
self.elements = elements
64+
self.utf8Contents = utf8Contents
65+
self.bitsets = bitsets
66+
self.consumeFunctions = consumeFunctions
67+
self.transformFunctions = transformFunctions
68+
self.matcherFunctions = matcherFunctions
69+
self.isDirty = isDirty
70+
self.ints = Array(repeating: 0, count: numInts)
71+
self.values = Array(repeating: SentinelValue(), count: numValues)
72+
self.positions = Array(
73+
repeating: Self.sentinelIndex, count: numPositions)
74+
}
5075
}
5176
}
5277

@@ -97,39 +122,9 @@ extension Processor.Registers {
97122
}
98123

99124
extension Processor.Registers {
100-
static let sentinelIndex = "".startIndex
101-
102-
init(
103-
_ program: MEProgram,
104-
_ sentinel: String.Index
105-
) {
106-
let info = program.registerInfo
107-
108-
self.elements = program.staticElements
109-
assert(elements.count == info.elements)
110-
111-
self.utf8Contents = program.staticUTF8Contents
112-
assert(utf8Contents.count == info.utf8Contents)
113-
114-
self.bitsets = program.staticBitsets
115-
assert(bitsets.count == info.bitsets)
116-
117-
self.consumeFunctions = program.staticConsumeFunctions
118-
assert(consumeFunctions.count == info.consumeFunctions)
119-
120-
self.transformFunctions = program.staticTransformFunctions
121-
assert(transformFunctions.count == info.transformFunctions)
122-
123-
self.matcherFunctions = program.staticMatcherFunctions
124-
assert(matcherFunctions.count == info.matcherFunctions)
125-
126-
self.ints = Array(repeating: 0, count: info.ints)
127-
128-
self.values = Array(
129-
repeating: SentinelValue(), count: info.values)
130-
self.positions = Array(
131-
repeating: Processor.Registers.sentinelIndex,
132-
count: info.positions)
125+
static var sentinelIndex: String.Index {
126+
let maxPos = 0x0000_FFFF_FFFF_FFFF
127+
return String.Index(encodedOffset: maxPos)
133128
}
134129

135130
mutating func reset(sentinel: Input.Index) {
@@ -151,32 +146,6 @@ extension MutableCollection {
151146
}
152147
}
153148

154-
extension MEProgram {
155-
struct RegisterInfo {
156-
var elements = 0
157-
var utf8Contents = 0
158-
var bools = 0
159-
var strings = 0
160-
var bitsets = 0
161-
var consumeFunctions = 0
162-
var transformFunctions = 0
163-
var matcherFunctions = 0
164-
var ints = 0
165-
var floats = 0
166-
var positions = 0
167-
var values = 0
168-
var instructionAddresses = 0
169-
var classStackAddresses = 0
170-
var positionStackAddresses = 0
171-
var savePointAddresses = 0
172-
var captures = 0
173-
174-
// The value register holding the whole-match value, if there
175-
// is one
176-
var wholeMatchValue: Int? = nil
177-
}
178-
}
179-
180149
extension Processor.Registers: CustomStringConvertible {
181150
var description: String {
182151
func formatRegisters<T>(

Sources/_StringProcessing/Executor.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ extension Executor {
193193
let range = startPosition..<endIdx
194194

195195
let wholeMatchValue: Any?
196-
if let val = program.registerInfo.wholeMatchValue {
197-
wholeMatchValue = cpu.registers.values[val]
196+
if let reg = program.wholeMatchValueRegister {
197+
wholeMatchValue = cpu.registers[reg]
198198
} else {
199199
wholeMatchValue = nil
200200
}

0 commit comments

Comments
 (0)