Skip to content

Speed up processor initialization #789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 21 additions & 22 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
@@ -419,34 +419,33 @@ extension MEProgram.Builder {
inst.opcode, payload)
}

var regInfo = MEProgram.RegisterInfo()
regInfo.elements = elements.count
regInfo.utf8Contents = utf8Contents.count
regInfo.ints = nextIntRegister.rawValue
regInfo.values = nextValueRegister.rawValue
regInfo.positions = nextPositionRegister.rawValue
regInfo.bitsets = asciiBitsets.count
regInfo.consumeFunctions = consumeFunctions.count
regInfo.transformFunctions = transformFunctions.count
regInfo.matcherFunctions = matcherFunctions.count
regInfo.captures = nextCaptureRegister.rawValue
regInfo.wholeMatchValue = wholeMatchValue?.rawValue

return MEProgram(
let regs = Processor.Registers(
elements: elements.stored,
utf8Contents: utf8Contents.stored,
bitsets: asciiBitsets,
consumeFunctions: consumeFunctions,
transformFunctions: transformFunctions,
matcherFunctions: matcherFunctions,
numInts: nextIntRegister.rawValue,
numValues: nextValueRegister.rawValue,
numPositions: nextPositionRegister.rawValue
)

let storedCaps = Array(
repeating: Processor._StoredCapture(), count: nextCaptureRegister.rawValue)

let meProgram = MEProgram(
instructions: InstructionList(instructions),
staticElements: elements.stored,
staticUTF8Contents: utf8Contents.stored,
staticBitsets: asciiBitsets,
staticConsumeFunctions: consumeFunctions,
staticTransformFunctions: transformFunctions,
staticMatcherFunctions: matcherFunctions,
registerInfo: regInfo,
wholeMatchValueRegister: wholeMatchValue,
enableTracing: enableTracing,
enableMetrics: enableMetrics,
captureList: captureList,
referencedCaptureOffsets: referencedCaptureOffsets,
initialOptions: initialOptions,
canOnlyMatchAtStart: canOnlyMatchAtStart)
canOnlyMatchAtStart: canOnlyMatchAtStart,
registers: regs,
storedCaptures: storedCaps)
return meProgram
}

mutating func reset() { self = Self() }
24 changes: 10 additions & 14 deletions Sources/_StringProcessing/Engine/MEProgram.swift
Original file line number Diff line number Diff line change
@@ -21,15 +21,7 @@ struct MEProgram {
(Input, Input.Index, Range<Input.Index>) throws -> (Input.Index, Any)?

var instructions: InstructionList<Instruction>

var staticElements: [Input.Element]
var staticUTF8Contents: [[UInt8]]
var staticBitsets: [DSLTree.CustomCharacterClass.AsciiBitset]
var staticConsumeFunctions: [ConsumeFunction]
var staticTransformFunctions: [TransformFunction]
var staticMatcherFunctions: [MatcherFunction]

var registerInfo: RegisterInfo
var wholeMatchValueRegister: ValueRegister?

var enableTracing: Bool
var enableMetrics: Bool
@@ -39,18 +31,22 @@ struct MEProgram {

var initialOptions: MatchingOptions
var canOnlyMatchAtStart: Bool

// We store the initial register state in the program, so that
// processors can be spun up quicker (useful for running same regex
// over many, many smaller inputs).
var registers: Processor.Registers
var storedCaptures: [Processor._StoredCapture]

}

extension MEProgram: CustomStringConvertible {
var description: String {
// TODO: Re-instate better pretty-printing functionality

var result = """
Elements: \(staticElements)
"""
if !staticConsumeFunctions.isEmpty {
result += "Consume functions: \(staticConsumeFunctions)"
}

// TODO: Extract into formatting code

for idx in instructions.indices {
14 changes: 9 additions & 5 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ struct Processor {
let subjectBounds: Range<Position>

let matchMode: MatchMode

let instructions: InstructionList<Instruction>

// MARK: Update-only state
@@ -100,6 +101,9 @@ extension Processor {
}

extension Processor {
// TODO: This has lots of retain/release traffic. We really just
// want to borrow the program and most of its static stuff. The only
// thing we need an actual copy of is the modifyable-resettable state
init(
program: MEProgram,
input: Input,
@@ -120,10 +124,10 @@ extension Processor {

self.currentPosition = searchBounds.lowerBound

// Initialize registers with end of search bounds
self.registers = Registers(program, searchBounds.upperBound)
self.storedCaptures = Array(
repeating: .init(), count: program.registerInfo.captures)
// Initialize registers from stored starting state
self.registers = program.registers

self.storedCaptures = program.storedCaptures

_checkInvariants()
}
@@ -137,7 +141,7 @@ extension Processor {

self.controller = Controller(pc: 0)

self.registers.reset(sentinel: searchBounds.upperBound)
self.registers.reset()

if !self.savePoints.isEmpty {
self.savePoints.removeAll(keepingCapacity: true)
88 changes: 28 additions & 60 deletions Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
@@ -47,6 +47,31 @@ extension Processor {
var values: [Any]

var positions: [Input.Index]

init(
elements: [Element],
utf8Contents: [[UInt8]],
bitsets: [DSLTree.CustomCharacterClass.AsciiBitset],
consumeFunctions: [MEProgram.ConsumeFunction],
transformFunctions: [MEProgram.TransformFunction],
matcherFunctions: [MEProgram.MatcherFunction],
isDirty: Bool = false,
numInts: Int,
numValues: Int,
numPositions: Int
) {
self.elements = elements
self.utf8Contents = utf8Contents
self.bitsets = bitsets
self.consumeFunctions = consumeFunctions
self.transformFunctions = transformFunctions
self.matcherFunctions = matcherFunctions
self.isDirty = isDirty
self.ints = Array(repeating: 0, count: numInts)
self.values = Array(repeating: SentinelValue(), count: numValues)
self.positions = Array(
repeating: Self.sentinelIndex, count: numPositions)
}
}
}

@@ -97,42 +122,11 @@ extension Processor.Registers {
}

extension Processor.Registers {
static let sentinelIndex = "".startIndex

init(
_ program: MEProgram,
_ sentinel: String.Index
) {
let info = program.registerInfo

self.elements = program.staticElements
assert(elements.count == info.elements)

self.utf8Contents = program.staticUTF8Contents
assert(utf8Contents.count == info.utf8Contents)

self.bitsets = program.staticBitsets
assert(bitsets.count == info.bitsets)

self.consumeFunctions = program.staticConsumeFunctions
assert(consumeFunctions.count == info.consumeFunctions)

self.transformFunctions = program.staticTransformFunctions
assert(transformFunctions.count == info.transformFunctions)

self.matcherFunctions = program.staticMatcherFunctions
assert(matcherFunctions.count == info.matcherFunctions)

self.ints = Array(repeating: 0, count: info.ints)

self.values = Array(
repeating: SentinelValue(), count: info.values)
self.positions = Array(
repeating: Processor.Registers.sentinelIndex,
count: info.positions)
static var sentinelIndex: String.Index {
"".startIndex
}

mutating func reset(sentinel: Input.Index) {
mutating func reset() {
guard isDirty else {
return
}
@@ -151,32 +145,6 @@ extension MutableCollection {
}
}

extension MEProgram {
struct RegisterInfo {
var elements = 0
var utf8Contents = 0
var bools = 0
var strings = 0
var bitsets = 0
var consumeFunctions = 0
var transformFunctions = 0
var matcherFunctions = 0
var ints = 0
var floats = 0
var positions = 0
var values = 0
var instructionAddresses = 0
var classStackAddresses = 0
var positionStackAddresses = 0
var savePointAddresses = 0
var captures = 0

// The value register holding the whole-match value, if there
// is one
var wholeMatchValue: Int? = nil
}
}

extension Processor.Registers: CustomStringConvertible {
var description: String {
func formatRegisters<T>(
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
@@ -193,8 +193,8 @@ extension Executor {
let range = startPosition..<endIdx

let wholeMatchValue: Any?
if let val = program.registerInfo.wholeMatchValue {
wholeMatchValue = cpu.registers.values[val]
if let reg = program.wholeMatchValueRegister {
wholeMatchValue = cpu.registers[reg]
} else {
wholeMatchValue = nil
}