Skip to content

Speed up processor initialization #789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 21 additions & 22 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -419,34 +419,33 @@ extension MEProgram.Builder {
inst.opcode, payload)
}

var regInfo = MEProgram.RegisterInfo()
regInfo.elements = elements.count
regInfo.utf8Contents = utf8Contents.count
regInfo.ints = nextIntRegister.rawValue
regInfo.values = nextValueRegister.rawValue
regInfo.positions = nextPositionRegister.rawValue
regInfo.bitsets = asciiBitsets.count
regInfo.consumeFunctions = consumeFunctions.count
regInfo.transformFunctions = transformFunctions.count
regInfo.matcherFunctions = matcherFunctions.count
regInfo.captures = nextCaptureRegister.rawValue
regInfo.wholeMatchValue = wholeMatchValue?.rawValue

return MEProgram(
let regs = Processor.Registers(
elements: elements.stored,
utf8Contents: utf8Contents.stored,
bitsets: asciiBitsets,
consumeFunctions: consumeFunctions,
transformFunctions: transformFunctions,
matcherFunctions: matcherFunctions,
numInts: nextIntRegister.rawValue,
numValues: nextValueRegister.rawValue,
numPositions: nextPositionRegister.rawValue
)

let storedCaps = Array(
repeating: Processor._StoredCapture(), count: nextCaptureRegister.rawValue)

let meProgram = MEProgram(
instructions: InstructionList(instructions),
staticElements: elements.stored,
staticUTF8Contents: utf8Contents.stored,
staticBitsets: asciiBitsets,
staticConsumeFunctions: consumeFunctions,
staticTransformFunctions: transformFunctions,
staticMatcherFunctions: matcherFunctions,
registerInfo: regInfo,
wholeMatchValueRegister: wholeMatchValue,
enableTracing: enableTracing,
enableMetrics: enableMetrics,
captureList: captureList,
referencedCaptureOffsets: referencedCaptureOffsets,
initialOptions: initialOptions,
canOnlyMatchAtStart: canOnlyMatchAtStart)
canOnlyMatchAtStart: canOnlyMatchAtStart,
registers: regs,
storedCaptures: storedCaps)
return meProgram
}

mutating func reset() { self = Self() }
Expand Down
24 changes: 10 additions & 14 deletions Sources/_StringProcessing/Engine/MEProgram.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,7 @@ struct MEProgram {
(Input, Input.Index, Range<Input.Index>) throws -> (Input.Index, Any)?

var instructions: InstructionList<Instruction>

var staticElements: [Input.Element]
var staticUTF8Contents: [[UInt8]]
var staticBitsets: [DSLTree.CustomCharacterClass.AsciiBitset]
var staticConsumeFunctions: [ConsumeFunction]
var staticTransformFunctions: [TransformFunction]
var staticMatcherFunctions: [MatcherFunction]

var registerInfo: RegisterInfo
var wholeMatchValueRegister: ValueRegister?

var enableTracing: Bool
var enableMetrics: Bool
Expand All @@ -39,18 +31,22 @@ struct MEProgram {

var initialOptions: MatchingOptions
var canOnlyMatchAtStart: Bool

// We store the initial register state in the program, so that
// processors can be spun up quicker (useful for running same regex
// over many, many smaller inputs).
var registers: Processor.Registers
var storedCaptures: [Processor._StoredCapture]

}

extension MEProgram: CustomStringConvertible {
var description: String {
// TODO: Re-instate better pretty-printing functionality

var result = """
Elements: \(staticElements)

"""
if !staticConsumeFunctions.isEmpty {
result += "Consume functions: \(staticConsumeFunctions)"
}

// TODO: Extract into formatting code

for idx in instructions.indices {
Expand Down
14 changes: 9 additions & 5 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct Processor {
let subjectBounds: Range<Position>

let matchMode: MatchMode

let instructions: InstructionList<Instruction>

// MARK: Update-only state
Expand Down Expand Up @@ -100,6 +101,9 @@ extension Processor {
}

extension Processor {
// TODO: This has lots of retain/release traffic. We really just
// want to borrow the program and most of its static stuff. The only
// thing we need an actual copy of is the modifyable-resettable state
init(
program: MEProgram,
input: Input,
Expand All @@ -120,10 +124,10 @@ extension Processor {

self.currentPosition = searchBounds.lowerBound

// Initialize registers with end of search bounds
self.registers = Registers(program, searchBounds.upperBound)
self.storedCaptures = Array(
repeating: .init(), count: program.registerInfo.captures)
// Initialize registers from stored starting state
self.registers = program.registers

self.storedCaptures = program.storedCaptures

_checkInvariants()
}
Expand All @@ -137,7 +141,7 @@ extension Processor {

self.controller = Controller(pc: 0)

self.registers.reset(sentinel: searchBounds.upperBound)
self.registers.reset()

if !self.savePoints.isEmpty {
self.savePoints.removeAll(keepingCapacity: true)
Expand Down
88 changes: 28 additions & 60 deletions Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ extension Processor {
var values: [Any]

var positions: [Input.Index]

init(
elements: [Element],
utf8Contents: [[UInt8]],
bitsets: [DSLTree.CustomCharacterClass.AsciiBitset],
consumeFunctions: [MEProgram.ConsumeFunction],
transformFunctions: [MEProgram.TransformFunction],
matcherFunctions: [MEProgram.MatcherFunction],
isDirty: Bool = false,
numInts: Int,
numValues: Int,
numPositions: Int
) {
self.elements = elements
self.utf8Contents = utf8Contents
self.bitsets = bitsets
self.consumeFunctions = consumeFunctions
self.transformFunctions = transformFunctions
self.matcherFunctions = matcherFunctions
self.isDirty = isDirty
self.ints = Array(repeating: 0, count: numInts)
self.values = Array(repeating: SentinelValue(), count: numValues)
self.positions = Array(
repeating: Self.sentinelIndex, count: numPositions)
}
}
}

Expand Down Expand Up @@ -97,42 +122,11 @@ extension Processor.Registers {
}

extension Processor.Registers {
static let sentinelIndex = "".startIndex

init(
_ program: MEProgram,
_ sentinel: String.Index
) {
let info = program.registerInfo

self.elements = program.staticElements
assert(elements.count == info.elements)

self.utf8Contents = program.staticUTF8Contents
assert(utf8Contents.count == info.utf8Contents)

self.bitsets = program.staticBitsets
assert(bitsets.count == info.bitsets)

self.consumeFunctions = program.staticConsumeFunctions
assert(consumeFunctions.count == info.consumeFunctions)

self.transformFunctions = program.staticTransformFunctions
assert(transformFunctions.count == info.transformFunctions)

self.matcherFunctions = program.staticMatcherFunctions
assert(matcherFunctions.count == info.matcherFunctions)

self.ints = Array(repeating: 0, count: info.ints)

self.values = Array(
repeating: SentinelValue(), count: info.values)
self.positions = Array(
repeating: Processor.Registers.sentinelIndex,
count: info.positions)
static var sentinelIndex: String.Index {
"".startIndex
}

mutating func reset(sentinel: Input.Index) {
mutating func reset() {
guard isDirty else {
return
}
Expand All @@ -151,32 +145,6 @@ extension MutableCollection {
}
}

extension MEProgram {
struct RegisterInfo {
var elements = 0
var utf8Contents = 0
var bools = 0
var strings = 0
var bitsets = 0
var consumeFunctions = 0
var transformFunctions = 0
var matcherFunctions = 0
var ints = 0
var floats = 0
var positions = 0
var values = 0
var instructionAddresses = 0
var classStackAddresses = 0
var positionStackAddresses = 0
var savePointAddresses = 0
var captures = 0

// The value register holding the whole-match value, if there
// is one
var wholeMatchValue: Int? = nil
}
}

extension Processor.Registers: CustomStringConvertible {
var description: String {
func formatRegisters<T>(
Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ extension Executor {
let range = startPosition..<endIdx

let wholeMatchValue: Any?
if let val = program.registerInfo.wholeMatchValue {
wholeMatchValue = cpu.registers.values[val]
if let reg = program.wholeMatchValueRegister {
wholeMatchValue = cpu.registers[reg]
} else {
wholeMatchValue = nil
}
Expand Down