Skip to content

Commit cefdc80

Browse files
authored
[Multipart] Introduce a bytes -> frames parser (#72)
[Multipart] Introduce a bytes -> frames parser ### Motivation Start landing the parts of the multipart machinery that is unlikely to change as part of the multipart proposal that's finishing review tomorrow. ### Modifications Introduce a bytes -> frames parser and an async sequence that wraps it. A "frame" is either the full header fields section or a single chunk of a part body. ### Result We can now frame bytes of a multipart body. ### Test Plan Added unit tests for the state machine, the parser, and the async sequence. Reviewed by: simonjbeaumont Builds: ✔︎ pull request validation (5.10) - Build finished. ✔︎ pull request validation (5.8) - Build finished. ✔︎ pull request validation (5.9) - Build finished. ✔︎ pull request validation (api breakage) - Build finished. ✔︎ pull request validation (docc test) - Build finished. ✔︎ pull request validation (integration test) - Build finished. ✔︎ pull request validation (nightly) - Build finished. ✔︎ pull request validation (soundness) - Build finished. #72
1 parent 9da9ad6 commit cefdc80

8 files changed

+799
-0
lines changed

NOTICE.txt

+9
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,12 @@ This product contains coder implementations inspired by swift-http-structured-he
4141
* https://www.apache.org/licenses/LICENSE-2.0
4242
* HOMEPAGE:
4343
* https://github.com/apple/swift-http-structured-headers
44+
45+
---
46+
47+
This product contains header character set validation logic inspired by swift-http-types.
48+
49+
* LICENSE (Apache License 2.0):
50+
* https://www.apache.org/licenses/LICENSE-2.0
51+
* HOMEPAGE:
52+
* https://github.com/apple/swift-http-types
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftOpenAPIGenerator open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
/// A namespace of utilities for byte parsers and serializers.
16+
enum ASCII {
17+
18+
/// The dash `-` character.
19+
static let dash: UInt8 = 0x2d
20+
21+
/// The carriage return `<CR>` character.
22+
static let cr: UInt8 = 0x0d
23+
24+
/// The line feed `<LF>` character.
25+
static let lf: UInt8 = 0x0a
26+
27+
/// The colon `:` character.
28+
static let colon: UInt8 = 0x3a
29+
30+
/// The space ` ` character.
31+
static let space: UInt8 = 0x20
32+
33+
/// The horizontal tab `<TAB>` character.
34+
static let tab: UInt8 = 0x09
35+
36+
/// Two dash characters.
37+
static let dashes: [UInt8] = [dash, dash]
38+
39+
/// The `<CR>` character follow by the `<LF>` character.
40+
static let crlf: [UInt8] = [cr, lf]
41+
42+
/// The characters that represent optional whitespace (OWS).
43+
static let optionalWhitespace: Set<UInt8> = [space, tab]
44+
45+
/// Checks whether the provided byte can appear in a header field name.
46+
/// - Parameter byte: The byte to check.
47+
/// - Returns: A Boolean value; `true` if the byte is valid in a header field
48+
/// name, `false` otherwise.
49+
static func isValidHeaderFieldNameByte(_ byte: UInt8) -> Bool {
50+
// Copied from swift-http-types, because we create HTTPField.Name from these anyway later.
51+
switch byte {
52+
case 0x21, 0x23, 0x24, 0x25, 0x26, 0x27, 0x2A, 0x2B, 0x2D, 0x2E, 0x5E, 0x5F, 0x60, 0x7C, 0x7E: return true
53+
case 0x30...0x39, 0x41...0x5A, 0x61...0x7A: // DIGHT, ALPHA
54+
return true
55+
default: return false
56+
}
57+
}
58+
}
59+
60+
/// A value returned by the `firstIndexAfterPrefix` method.
61+
enum FirstIndexAfterPrefixResult<C: RandomAccessCollection> {
62+
63+
/// The index after the end of the prefix match.
64+
case index(C.Index)
65+
66+
/// Matched all characters so far, but reached the end of self before matching all.
67+
/// When more data is fetched, it's possible this will fully match.
68+
case reachedEndOfSelf
69+
70+
/// The character at the provided index does not match the expected character.
71+
case unexpectedPrefix(C.Index)
72+
}
73+
74+
extension RandomAccessCollection where Element: Equatable {
75+
76+
/// Verifies that the elements match the provided sequence and returns the first index past the match.
77+
/// - Parameter expectedElements: The elements to match against.
78+
/// - Returns: The result.
79+
func firstIndexAfterPrefix(_ expectedElements: some Sequence<Element>) -> FirstIndexAfterPrefixResult<Self> {
80+
var index = startIndex
81+
for expectedElement in expectedElements {
82+
guard index < endIndex else { return .reachedEndOfSelf }
83+
guard self[index] == expectedElement else { return .unexpectedPrefix(index) }
84+
formIndex(after: &index)
85+
}
86+
return .index(index)
87+
}
88+
}
89+
90+
/// A value returned by the `longestMatch` method.
91+
enum LongestMatchResult<C: RandomAccessCollection> {
92+
93+
/// No match found at any position in self.
94+
case noMatch
95+
96+
/// Found a prefix match but reached the end of self.
97+
/// Provides the index of the first matching character.
98+
/// When more data is fetched, this might become a full match.
99+
case prefixMatch(fromIndex: C.Index)
100+
101+
/// Found a full match within self at the provided range.
102+
case fullMatch(Range<C.Index>)
103+
}
104+
105+
extension RandomAccessCollection where Element: Equatable {
106+
107+
/// Returns the longest match found within the sequence.
108+
/// - Parameter expectedElements: The elements to match in the sequence.
109+
/// - Returns: The result.
110+
func longestMatch(_ expectedElements: some Sequence<Element>) -> LongestMatchResult<Self> {
111+
var index = startIndex
112+
while index < endIndex {
113+
switch self[index...].firstIndexAfterPrefix(expectedElements) {
114+
case .index(let end): return .fullMatch(index..<end)
115+
case .reachedEndOfSelf: return .prefixMatch(fromIndex: index)
116+
case .unexpectedPrefix: formIndex(after: &index)
117+
}
118+
}
119+
return .noMatch
120+
}
121+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftOpenAPIGenerator open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
import HTTPTypes
16+
17+
/// A sequence that parses multipart frames from bytes.
18+
struct MultipartBytesToFramesSequence<Upstream: AsyncSequence & Sendable>: Sendable
19+
where Upstream.Element == ArraySlice<UInt8> {
20+
21+
/// The source of byte chunks.
22+
var upstream: Upstream
23+
24+
/// The boundary string used to separate multipart parts.
25+
var boundary: String
26+
}
27+
28+
extension MultipartBytesToFramesSequence: AsyncSequence {
29+
30+
/// The type of element produced by this asynchronous sequence.
31+
typealias Element = MultipartFrame
32+
33+
/// Creates the asynchronous iterator that produces elements of this
34+
/// asynchronous sequence.
35+
///
36+
/// - Returns: An instance of the `AsyncIterator` type used to produce
37+
/// elements of the asynchronous sequence.
38+
func makeAsyncIterator() -> Iterator<Upstream.AsyncIterator> {
39+
Iterator(upstream: upstream.makeAsyncIterator(), boundary: boundary)
40+
}
41+
42+
/// An iterator that pulls byte chunks from the upstream iterator and provides
43+
/// parsed multipart frames.
44+
struct Iterator<UpstreamIterator: AsyncIteratorProtocol>: AsyncIteratorProtocol
45+
where UpstreamIterator.Element == ArraySlice<UInt8> {
46+
/// The iterator that provides the byte chunks.
47+
private var upstream: UpstreamIterator
48+
49+
/// The multipart frame parser.
50+
private var parser: MultipartParser
51+
/// Creates a new iterator from the provided source of byte chunks and a boundary string.
52+
/// - Parameters:
53+
/// - upstream: The iterator that provides the byte chunks.
54+
/// - boundary: The boundary separating the multipart parts.
55+
init(upstream: UpstreamIterator, boundary: String) {
56+
self.upstream = upstream
57+
self.parser = .init(boundary: boundary)
58+
}
59+
60+
/// Asynchronously advances to the next element and returns it, or ends the
61+
/// sequence if there is no next element.
62+
///
63+
/// - Returns: The next element, if it exists, or `nil` to signal the end of
64+
/// the sequence.
65+
mutating func next() async throws -> MultipartFrame? { try await parser.next { try await upstream.next() } }
66+
}
67+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftOpenAPIGenerator open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
import HTTPTypes
16+
17+
/// A frame of a multipart message, either the whole header fields
18+
/// section or a chunk of the body bytes.
19+
enum MultipartFrame: Sendable, Hashable {
20+
21+
/// The header fields section.
22+
case headerFields(HTTPFields)
23+
24+
/// One byte chunk of the part's body.
25+
case bodyChunk(ArraySlice<UInt8>)
26+
}

0 commit comments

Comments
 (0)