Skip to content

Commit 5df006a

Browse files
allow custom cmark options and extensions when converting Markdown
updated from: swiftlang#23
1 parent c211079 commit 5df006a

File tree

6 files changed

+202
-30
lines changed

6 files changed

+202
-30
lines changed

Sources/Markdown/Base/Document.swift

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,31 +38,49 @@ public extension Document {
3838
/// Parse a string into a `Document`.
3939
///
4040
/// - parameter string: the input Markdown text to parse.
41-
/// - parameter options: options for parsing Markdown text.
42-
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
41+
/// - parameter options: options for parsing Markdown text, including
42+
/// Commonmark-specific options and extensions.
4343
/// This need not be a file URL.
44-
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
45-
if options.contains(.parseBlockDirectives) {
44+
init(parsing string: String, source: URL? = nil, convertOptions options: ConvertOptions) {
45+
if options.parseOptions.contains(.parseBlockDirectives) {
4646
self = BlockDirectiveParser.parse(string, source: source,
4747
options: options)
4848
} else {
4949
self = MarkupParser.parseString(string, source: source, options: options)
5050
}
5151
}
52+
53+
/// Parse a string into a `Document`.
54+
///
55+
/// - parameter string: the input Markdown text to parse.
56+
/// - parameter options: options for parsing Markdown text.
57+
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
58+
/// This need not be a file URL.
59+
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
60+
self.init(parsing: string, source: source, convertOptions: .init(fromParseOptions: options))
61+
}
5262

5363
/// Parse a file's contents into a `Document`.
5464
///
55-
/// - parameter file: a file URL from which to load Markdown text to parse.
56-
/// - parameter options: options for parsing Markdown text.
57-
init(parsing file: URL, options: ParseOptions = []) throws {
65+
/// - parameter options: options for parsing Markdown text, including
66+
/// Commonmark-specific options and extensions.
67+
init(parsing file: URL, convertOptions options: ConvertOptions) throws {
5868
let string = try String(contentsOf: file)
59-
if options.contains(.parseBlockDirectives) {
69+
if options.parseOptions.contains(.parseBlockDirectives) {
6070
self = BlockDirectiveParser.parse(string, source: file,
6171
options: options)
6272
} else {
6373
self = MarkupParser.parseString(string, source: file, options: options)
6474
}
6575
}
76+
77+
/// Parse a file's contents into a `Document`.
78+
///
79+
/// - parameter file: a file URL from which to load Markdown text to parse.
80+
/// - parameter options: options for parsing Markdown text.
81+
init(parsing file: URL, options: ParseOptions = []) throws {
82+
try self.init(parsing: file, convertOptions: .init(fromParseOptions: options))
83+
}
6684

6785
/// Create a document from a sequence of block markup elements.
6886
init<Children: Sequence>(_ children: Children) where Children.Element == BlockMarkup {

Sources/Markdown/Markdown.docc/Markdown/FormatterAndOptions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,7 @@
99
### Options
1010

1111
- ``MarkupDumpOptions``
12+
- ``ParseOptions``
13+
- ``ConvertOptions``
1214

1315
<!-- Copyright (c) 2021-2022 Apple Inc and the Swift Project authors. All Rights Reserved. -->

Sources/Markdown/Parser/BlockDirectiveParser.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ private enum ParseContainer: CustomStringConvertible {
491491
/// A Doxygen command, which can contain arbitrary markup (but not block directives).
492492
case doxygenCommand(PendingDoxygenCommand, [TrimmedLine])
493493

494-
init<TrimmedLines: Sequence>(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ParseOptions) where TrimmedLines.Element == TrimmedLine {
494+
init<TrimmedLines: Sequence>(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ConvertOptions) where TrimmedLines.Element == TrimmedLine {
495495
self = ParseContainerStack(parsingHierarchyFrom: trimmedLines, options: options).top
496496
}
497497

@@ -663,7 +663,7 @@ private enum ParseContainer: CustomStringConvertible {
663663
/// Convert this container to the corresponding ``RawMarkup`` node.
664664
func convertToRawMarkup(ranges: inout RangeTracker,
665665
parent: ParseContainer?,
666-
options: ParseOptions) -> [RawMarkup] {
666+
options: ConvertOptions) -> [RawMarkup] {
667667
switch self {
668668
case let .root(children):
669669
let rawChildren = children.flatMap {
@@ -749,9 +749,9 @@ struct ParseContainerStack {
749749
/// The stack of containers to be incrementally folded into a hierarchy.
750750
private var stack: [ParseContainer]
751751

752-
private let options: ParseOptions
752+
private let options: ConvertOptions
753753

754-
init<TrimmedLines: Sequence>(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ParseOptions) where TrimmedLines.Element == TrimmedLine {
754+
init<TrimmedLines: Sequence>(parsingHierarchyFrom trimmedLines: TrimmedLines, options: ConvertOptions) where TrimmedLines.Element == TrimmedLine {
755755
self.stack = [.root([])]
756756
self.options = options
757757
for line in trimmedLines {
@@ -772,7 +772,7 @@ struct ParseContainerStack {
772772
}
773773

774774
private var canParseDoxygenCommand: Bool {
775-
guard options.contains(.parseMinimalDoxygen) else { return false }
775+
guard options.parseOptions.contains(.parseMinimalDoxygen) else { return false }
776776

777777
guard !isInBlockDirective else { return false }
778778

@@ -1105,7 +1105,7 @@ extension Document {
11051105
///
11061106
/// - Precondition: The `rootContainer` must be the `.root` case.
11071107
fileprivate init(converting rootContainer: ParseContainer, from source: URL?,
1108-
options: ParseOptions) {
1108+
options: ConvertOptions) {
11091109
guard case .root = rootContainer else {
11101110
fatalError("Tried to convert a non-root container to a `Document`")
11111111
}
@@ -1128,14 +1128,14 @@ extension Document {
11281128
}
11291129

11301130
struct BlockDirectiveParser {
1131-
static func parse(_ input: URL, options: ParseOptions = []) throws -> Document {
1131+
static func parse(_ input: URL, options: ConvertOptions = .init()) throws -> Document {
11321132
let string = try String(contentsOf: input, encoding: .utf8)
11331133
return parse(string, source: input, options: options)
11341134
}
11351135

11361136
/// Parse the input.
11371137
static func parse(_ input: String, source: URL?,
1138-
options: ParseOptions = []) -> Document {
1138+
options: ConvertOptions = .init()) -> Document {
11391139
// Phase 0: Split the input into lines lazily, keeping track of
11401140
// line numbers, consecutive blank lines, and start positions on each line where indentation ends.
11411141
// These trim points may be used to adjust the indentation seen by the CommonMark parser when

Sources/Markdown/Parser/CommonMarkConverter.swift

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -608,25 +608,18 @@ struct MarkupParser {
608608
return MarkupConversion(state: childConversion.state.next(), result: .inlineAttributes(attributes: attributes, parsedRange: parsedRange, childConversion.result))
609609
}
610610

611-
static func parseString(_ string: String, source: URL?, options: ParseOptions) -> Document {
611+
static func parseString(_ string: String, source: URL?, options: ConvertOptions) -> Document {
612612
cmark_gfm_core_extensions_ensure_registered()
613-
614-
var cmarkOptions = CMARK_OPT_TABLE_SPANS
615-
if !options.contains(.disableSmartOpts) {
616-
cmarkOptions |= CMARK_OPT_SMART
617-
}
618-
if !options.contains(.disableSourcePosOpts) {
619-
cmarkOptions |= CMARK_OPT_SOURCEPOS
620-
}
621613

622-
let parser = cmark_parser_new(cmarkOptions)
614+
let parser = cmark_parser_new(options.commonmarkOptions.rawValue)
615+
616+
for ext in options.commonmarkExtensions {
617+
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension(ext))
618+
}
623619

624-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("table"))
625-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough"))
626-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("tasklist"))
627620
cmark_parser_feed(parser, string, string.utf8.count)
628621
let rawDocument = cmark_parser_finish(parser)
629-
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options, headerSeen: false, pendingTableBody: nil).next()
622+
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options.parseOptions, headerSeen: false, pendingTableBody: nil).next()
630623
precondition(initialState.event == CMARK_EVENT_ENTER)
631624
precondition(initialState.nodeType == .document)
632625
let conversion = convertAnyElement(initialState)
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2021 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import cmark_gfm
12+
13+
/// Options to use when converting Markdown.
14+
public struct ConvertOptions {
15+
public let parseOptions: ParseOptions
16+
public let commonmarkOptions: CommonmarkOptions
17+
public let commonmarkExtensions: [String]
18+
19+
public init(parseOptions: ParseOptions, commonmarkOptions: CommonmarkOptions, extensions: [String]) {
20+
self.parseOptions = parseOptions
21+
self.commonmarkOptions = commonmarkOptions
22+
self.commonmarkExtensions = extensions
23+
}
24+
25+
public init(fromParseOptions options: ParseOptions) {
26+
var commonmarkOptions = ConvertOptions.defaultCommonmarkOptions
27+
if options.contains(.disableSmartOpts) {
28+
commonmarkOptions.remove(.smart)
29+
}
30+
if options.contains(.disableSourcePosOpts) {
31+
commonmarkOptions.remove(.sourcepos)
32+
}
33+
self.init(
34+
parseOptions: options,
35+
commonmarkOptions: commonmarkOptions,
36+
extensions: ConvertOptions.defaultCommonmarkExtensions
37+
)
38+
}
39+
40+
public init() {
41+
self.init(fromParseOptions: ConvertOptions.defaultParseOptions)
42+
}
43+
44+
public static let defaultParseOptions: ParseOptions = []
45+
public static let defaultCommonmarkOptions: CommonmarkOptions = [
46+
.smart,
47+
.tableSpans,
48+
.sourcepos
49+
]
50+
public static let defaultCommonmarkExtensions: [String] = [
51+
"table",
52+
"strikethrough",
53+
"tasklist",
54+
]
55+
}
56+
57+
/// Options given to the Commonmark converter.
58+
public struct CommonmarkOptions: OptionSet {
59+
public var rawValue: Int32
60+
61+
public init(rawValue: Int32) {
62+
self.rawValue = rawValue
63+
}
64+
65+
/// The default Commonmark behavior, no special options.
66+
public static let `default` = CommonmarkOptions(rawValue: CMARK_OPT_DEFAULT)
67+
68+
/// Include a `data-sourcepos` element on all block elements.
69+
public static let sourcepos = CommonmarkOptions(rawValue: CMARK_OPT_SOURCEPOS)
70+
71+
/// Render `softbreak` elements as hard line breaks.
72+
public static let hardBreaks = CommonmarkOptions(rawValue: CMARK_OPT_HARDBREAKS)
73+
74+
/// Render raw HTML and unsafe links.
75+
///
76+
/// Unsafe links are `javascript:`, `vbscript:`, `file:`, and
77+
/// `data:`, except for `image/png`, `image/gif`, `image/jpeg`
78+
/// or `image/webp` MIME types. Without this option, raw HTML
79+
/// is replaced by a placeholder HTML comment. Unsafe links
80+
/// are replaced by empty strings.
81+
public static let unsafe = CommonmarkOptions(rawValue: CMARK_OPT_UNSAFE)
82+
83+
/// Render `softbreak` elements as spaces.
84+
public static let noBreaks = CommonmarkOptions(rawValue: CMARK_OPT_NOBREAKS)
85+
86+
/// Validate UTF-8 in the input before parsing, replacing illegal
87+
/// sequences with the replacement character `U+FFFD`.
88+
public static let validateUtf8 = CommonmarkOptions(rawValue: CMARK_OPT_VALIDATE_UTF8)
89+
90+
/// Convert straight quotes to curly, `---` to em dashes, `--` to en dashes.
91+
public static let smart = CommonmarkOptions(rawValue: CMARK_OPT_SMART)
92+
93+
/// Use GitHub-style `<pre lang="x">` tags for code blocks instead of
94+
/// `<pre><code class="language-x">`.
95+
public static let githubPreLang = CommonmarkOptions(rawValue: CMARK_OPT_GITHUB_PRE_LANG)
96+
97+
/// Be liberal in interpreting inline HTML tags.
98+
public static let liberalHtmlTag = CommonmarkOptions(rawValue: CMARK_OPT_LIBERAL_HTML_TAG)
99+
100+
/// Parse footnotes.
101+
public static let footnotes = CommonmarkOptions(rawValue: CMARK_OPT_FOOTNOTES)
102+
103+
/// Only parse strikethroughs if surrounded by exactly 2 tildes.
104+
///
105+
/// Strikethroughs are still only parsed when the `"strikethrough"`
106+
/// extension is enabled.
107+
public static let strikethroughDoubleTilde = CommonmarkOptions(rawValue: CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE)
108+
109+
/// Use style attributes to align table cells instead of align attributes.
110+
public static let tablePreferStyleAttributes = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES)
111+
112+
/// Include the remainder of the info string in code blocks in
113+
/// a separate attribute.
114+
public static let fullInfoString = CommonmarkOptions(rawValue: CMARK_OPT_FULL_INFO_STRING)
115+
116+
/// Parse only inline markdown directives. Block directives will not be
117+
/// parsed (their literal representations will remain in the output).
118+
public static let inlineOnly = CommonmarkOptions(rawValue: CMARK_OPT_INLINE_ONLY)
119+
120+
/// Parse the markdown input without removing preceding/trailing whitespace and
121+
/// without converting newline characters to breaks.
122+
///
123+
/// Using this option also enables the `CMARK_OPT_INLINE_ONLY` option.
124+
// FIXME: the original `CMARK_OPT_PRESERVE_WHITESPACE` isn't available to the swift compiler?
125+
public static let preserveWhitespace = CommonmarkOptions(rawValue: (1 << 19) | CMARK_OPT_INLINE_ONLY)
126+
127+
/// Enable the row- and column-span syntax in the tables extension.
128+
public static let tableSpans = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_SPANS)
129+
130+
/// Use a "ditto mark" (`"`) instead of a caret (`^`) to indicate row-spans in the tables extension.
131+
public static let tableRowspanDitto = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_ROWSPAN_DITTO)
132+
}

Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,31 @@ class CommonMarkConverterTests: XCTestCase {
3333
let document = Document(parsing: text, source: nil, options: [.parseBlockDirectives, .parseSymbolLinks])
3434
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
3535
}
36+
37+
/// Test using a custom set of Commonmark options to convert Markdown.
38+
func testCustomOpts() {
39+
let text = "~This is not strikethrough~ -- but ~~this is strikethrough~~."
40+
41+
// Because the "smart" option is not set, the `--` should not be converted
42+
// to an en-dash.
43+
let expectedDump = """
44+
Document @1:1-1:62
45+
└─ Paragraph @1:1-1:62
46+
├─ Text @1:1-1:36 "~This is not strikethrough~ -- but "
47+
├─ Strikethrough @1:36-1:61
48+
│ └─ Text @1:38-1:59 "this is strikethrough"
49+
└─ Text @1:61-1:62 "."
50+
"""
51+
52+
let document = Document(
53+
parsing: text,
54+
source: nil,
55+
convertOptions: .init(
56+
parseOptions: ConvertOptions.defaultParseOptions,
57+
commonmarkOptions: .strikethroughDoubleTilde,
58+
extensions: ConvertOptions.defaultCommonmarkExtensions
59+
)
60+
)
61+
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
62+
}
3663
}

0 commit comments

Comments
 (0)