Skip to content

Improve sphinx reStructuredText parsing #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 137 additions & 9 deletions docstring_to_markdown/rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,153 @@


class Directive:
def __init__(self, pattern: str, replacement: str, name: Union[str, None] = None):
def __init__(
self, pattern: str, replacement: str,
name: Union[str, None] = None,
flags: int = 0
):
self.pattern = pattern
self.replacement = replacement
self.name = name

self.flags = flags


# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#cross-referencing-python-objects
SPHINX_CROSS_REF_PYTHON = (
'mod',
'func',
'data',
'const',
'class',
'meth',
'attr',
'exc',
'obj'
)

# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#cross-referencing-c-constructs
SPHINX_CROSS_REF_C = (
'member',
'data',
'func',
'macro',
'struct',
'union',
'enum',
'enumerator',
'type'
)

# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#cross-referencing
SPHINX_CROSS_REF_CPP = (
'any',
'class',
'struct',
'func',
'member',
'var',
'type',
'concept',
'enum',
'enumerator'
)

# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#the-javascript-domain
SPHINX_CROSS_REF_JS = (
'mod',
'func',
'meth',
'class',
'data',
'attr'
)

# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#the-restructuredtext-domain
SPHINX_CROSS_REF_RST = (
'dir',
'role'
)

# https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
SPHINX_CROSS_REF_OTHER = (
'any',
# https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#cross-referencing-other-items-of-interest
'envvar',
'token',
'keyword',
'option',
'term',
)

SPHINX_PARAM = (
'param',
'parameter',
'arg',
'argument',
'key',
'keyword'
)

SPHINX_RULES: List[Directive] = [
Directive(
pattern=r':(func|meth|class|obj|term):`\.?(?P<name>[^`]+?)`',
pattern=r':c:({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_C)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r'^:param (?P<param>\S+):',
replacement=r'- `\g<param>`:'
pattern=r':cpp:({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_CPP)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r'^:return:',
replacement=r'Returns:'
)
pattern=r':js:({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_JS)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r'(:py)?:({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_PYTHON)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r'(:rst)?:({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_RST)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r':({}):`\.?(?P<name>[^`]+?)`'.format('|'.join(SPHINX_CROSS_REF_OTHER)),
replacement=r'`\g<name>`'
),
Directive(
pattern=r'^\s*:({}) (?P<type>\S+) (?P<param>\S+):'.format('|'.join(SPHINX_PARAM)),
replacement=r'- `\g<param>` (`\g<type>`):',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:({}) (?P<param>\S+): (?P<desc>.*)(\n|\r\n?):type \2: (?P<type>.*)$'.format('|'.join(SPHINX_PARAM)),
replacement=r'- `\g<param>` (\g<type>): \g<desc>',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:({}) (?P<param>\S+):'.format('|'.join(SPHINX_PARAM)),
replacement=r'- `\g<param>`:',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:type (?P<param>\S+):',
replacement=r' . Type: `\g<param>`:',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:(return|returns):',
replacement=r'- returns:',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:rtype: (?P<type>\S+)',
replacement=r'- return type: `\g<type>`',
flags=re.MULTILINE
),
Directive(
pattern=r'^\s*:(raises|raise|except|exception) (?P<exception>\S+):',
replacement=r'- raises `\g<exception>`:',
flags=re.MULTILINE
),
]


Expand Down Expand Up @@ -555,7 +683,7 @@ def flush_buffer():
lines = '\n'.join(lines_buffer)
# rst markup handling
for directive in DIRECTIVES:
lines = re.sub(directive.pattern, directive.replacement, lines)
lines = re.sub(directive.pattern, directive.replacement, lines, flags=directive.flags)

for (section, header) in RST_SECTIONS.items():
lines = lines.replace(header, '\n#### ' + section + '\n')
Expand Down
67 changes: 63 additions & 4 deletions tests/test_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,41 @@ def func(): pass
"""


# https://www.sphinx-doc.org/en/master/usage/restructuredtext/domains.html#info-field-lists
SPHINX_SIGNATURE = """
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there code that handles any code blocks before this? Otherwise something like

.. code-block:: python
    def foo():
        """:param  str message_body: blah blah"""

could cause problems. I'll admit it's an edge case that would likely only come up in docstring parsing code.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, one other thing that comes to mind! If the programmer put these directives in a random order for some reason, it would be nice if all directives of a certain type were grouped together. Also definitely a bit of an edge case & not required, but there's plenty of less-than-finely crafted code out there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it will not cause problems. Code blocks are properly parsed out at an earlier stage. I added a test case in 1771df2 to demonstrate this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree about the order thing. That would require a proper parser though so we cannot just use regexp here. It is definitely in scope, but I don't have more time this weekend. Would you mind opening an issue so we can track this as a future improvement?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resolved with #14

:param str sender: The person sending the message
:param str recipient: The recipient of the message
:param str message_body: The body of the message
:param priority: The priority of the message, can be a number 1-5
:type priority: integer or None
:return: the message id
:rtype: int
:raises ValueError: if the message_body exceeds 160 characters
"""

SPHINX_SIGNATURE_MARKDOWN = """\
- `sender` (`str`): The person sending the message
- `recipient` (`str`): The recipient of the message
- `message_body` (`str`): The body of the message
- `priority` (integer or None): The priority of the message, can be a number 1-5
- returns: the message id
- return type: `int`
- raises `ValueError`: if the message_body exceeds 160 characters
"""

SPHINX_NESTED = """\
.. code-block:: python
def foo():
''':param str message_body: blah blah'''
"""

SPHINX_NESTED_MARKDOWN = """\
```python
def foo():
''':param str message_body: blah blah'''
```
"""

RST_CASES = {
'handles prompt continuation and multi-line output': {
'rst': CODE_MULTI_LINE_CODE_OUTPUT,
Expand Down Expand Up @@ -715,17 +750,33 @@ def func(): pass
'rst': REFERENCES,
'md': REFERENCES_MARKDOWN
},
'converts sphinx func, meth, and class': {
'converts sphinx cross-references to func, meth, class, etc.': {
'rst': ':func:`function1`, :meth:`.Script.inline`, :class:`.Environment`',
'md': '`function1`, `Script.inline`, `Environment`'
},
'converts sphinx cross-references in Python domain': {
'rst': ':py:func:`function1`, :py:meth:`.Script.inline`, :py:class:`.Environment`',
'md': '`function1`, `Script.inline`, `Environment`'
},
'converts sphinx cross-references in C domain': {
'rst': ':c:func:`function1`, :c:struct:`Data`',
'md': '`function1`, `Data`'
},
'converts sphinx cross-references in C++ domain': {
'rst': ':cpp:func:`function1`, :cpp:var:`data`',
'md': '`function1`, `data`'
},
'converts sphinx cross-references in JS domain': {
'rst': ':js:func:`function1`, :js:class:`Math`',
'md': '`function1`, `Math`'
},
'converts sphinx params': {
'rst': ':param x: test arg',
'md': '- `x`: test arg'
},
'converts sphinx return': {
'rst': ':return: return description',
'md': 'Returns: return description'
'converts indented sphinx params': {
'rst': '\t:param x: test arg',
'md': '- `x`: test arg'
},
'converts non-standard simple table': {
'rst': SIMPLE_TABLE,
Expand All @@ -746,6 +797,14 @@ def func(): pass
'converts nested parameter lists': {
'rst': NESTED_PARAMETERS,
'md': NESTED_PARAMETERS_MARKDOWN
},
'converts sphinx signatures': {
'rst': SPHINX_SIGNATURE,
'md': SPHINX_SIGNATURE_MARKDOWN
},
'keeps params intact in code blocks': {
'rst': SPHINX_NESTED,
'md': SPHINX_NESTED_MARKDOWN
}
}

Expand Down