Skip to content

Use antsibull-docutils 1.2.0 to simplify rst-yamllint checker #2745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
365 changes: 111 additions & 254 deletions tests/checkers/rst-yamllint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,9 @@
import pathlib
import sys
import traceback
import typing as t

from docutils import nodes
from docutils.core import Publisher
from docutils.io import StringInput
from docutils.parsers.rst import Directive
from docutils.parsers.rst.directives import register_directive
from docutils.parsers.rst.directives import unchanged as directive_param_unchanged
from docutils.utils import Reporter, SystemMessage
from antsibull_docutils.rst_code_finder import find_code_blocks
from yamllint import linter
from yamllint.config import YamlLintConfig
from yamllint.linter import PROBLEM_LEVELS
Expand Down Expand Up @@ -44,215 +39,47 @@
}


class IgnoreDirective(Directive):
has_content = True

def run(self) -> list:
return []


class CodeBlockDirective(Directive):
has_content = True
optional_arguments = 1

# These are all options Sphinx allows for code blocks.
# We need to have them here so that docutils successfully parses this extension.
option_spec = {
"caption": directive_param_unchanged,
"class": directive_param_unchanged,
"dedent": directive_param_unchanged,
"emphasize-lines": directive_param_unchanged,
"name": directive_param_unchanged,
"force": directive_param_unchanged,
"linenos": directive_param_unchanged,
"lineno-start": directive_param_unchanged,
}

def run(self) -> list[nodes.literal_block]:
code = "\n".join(self.content)
literal = nodes.literal_block(code, code)
literal["classes"].append("code-block")
literal["ansible-code-language"] = self.arguments[0] if self.arguments else None
literal["ansible-code-block"] = True
literal["ansible-code-lineno"] = self.lineno
return [literal]


class YamlLintVisitor(nodes.SparseNodeVisitor):
def __init__(
self,
document: nodes.document,
path: str,
results: list[dict],
content: str,
yamllint_config: YamlLintConfig,
):
super().__init__(document)
self.__path = path
self.__results = results
self.__content_lines = content.splitlines()
self.__yamllint_config = yamllint_config

def visit_system_message(self, node: nodes.system_message) -> None:
raise nodes.SkipNode

def visit_error(self, node: nodes.error) -> None:
raise nodes.SkipNode

def visit_literal_block(self, node: nodes.literal_block) -> None:
if "ansible-code-block" not in node.attributes:
if node.attributes["classes"]:
self.__results.append(
{
"path": self.__path,
"line": node.line or "unknown",
"col": 0,
"message": (
"Warning: found unknown literal block! Check for double colons '::'."
" If that is not the cause, please report this warning."
" It might indicate a bug in the checker or an unsupported Sphinx directive."
f" Node: {node!r}; attributes: {node.attributes}; content: {node.rawsource!r}"
),
}
)
else:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
self.__results.append(
{
"path": self.__path,
"line": node.line or "unknown",
"col": 0,
"message": (
"Warning: literal block (check for double colons '::')."
" Please convert this to a regular code block with an appropriate language."
f" Allowed languages: {allowed_languages}"
),
}
)
raise nodes.SkipNode

language = node.attributes["ansible-code-language"]
lineno = node.attributes["ansible-code-lineno"]

# Ok, we have to find both the row and the column offset for the actual code content
row_offset = lineno
found_empty_line = False
found_content_lines = False
content_lines = node.rawsource.count("\n") + 1
min_indent = None
for offset, line in enumerate(self.__content_lines[lineno:]):
stripped_line = line.strip()
if not stripped_line:
if not found_empty_line:
row_offset = lineno + offset + 1
found_empty_line = True
elif not found_content_lines:
found_content_lines = True
row_offset = lineno + offset

if found_content_lines and content_lines > 0:
if stripped_line:
indent = len(line) - len(line.lstrip())
if min_indent is None or min_indent > indent:
min_indent = indent
content_lines -= 1
elif not content_lines:
break

min_source_indent = None
for line in node.rawsource.split("\n"):
stripped_line = line.lstrip()
if stripped_line:
indent = len(line) - len(line.lstrip())
if min_source_indent is None or min_source_indent > indent:
min_source_indent = indent

col_offset = max(0, (min_indent or 0) - (min_source_indent or 0))

# Now that we have the offsets, we can actually do some processing...
if language not in {"YAML", "yaml", "yaml+jinja", "YAML+Jinja"}:
if language is None:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
self.__results.append(
{
"path": self.__path,
"line": row_offset + 1,
"col": col_offset + 1,
"message": (
"Literal block without language!"
f" Allowed languages are: {allowed_languages}."
),
}
)
return
if language not in ALLOWED_LANGUAGES:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
self.__results.append(
{
"path": self.__path,
"line": row_offset + 1,
"col": col_offset + 1,
"message": (
f"Warning: literal block with disallowed language: {language}."
" If the language should be allowed, the checker needs to be updated."
f" Currently allowed languages are: {allowed_languages}."
),
}
)
raise nodes.SkipNode

# So we have YAML. Let's lint it!
try:
problems = linter.run(
io.StringIO(node.rawsource.rstrip() + "\n"),
self.__yamllint_config,
self.__path,
def create_warn_unknown_block(
results: list[dict[str, t.Any]], path: str
) -> t.Callable[[int | str, int, str, bool], None]:
def warn_unknown_block(
line: int | str, col: int, content: str, unknown_directive: bool
) -> None:
if unknown_directive:
results.append(
{
"path": path,
"line": line,
"col": col,
"message": (
"Warning: found unknown literal block! Check for double colons '::'."
" If that is not the cause, please report this warning."
" It might indicate a bug in the checker or an unsupported Sphinx directive."
f" Content: {content!r}"
),
}
)
for problem in problems:
if problem.level not in REPORT_LEVELS:
continue
msg = f"{problem.level}: {problem.desc}"
if problem.rule:
msg += f" ({problem.rule})"
self.__results.append(
{
"path": self.__path,
"line": row_offset + problem.line,
"col": col_offset + problem.column,
"message": msg,
}
)
except Exception as exc:
error = str(exc).replace("\n", " / ")
self.__results.append(
else:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
results.append(
{
"path": self.__path,
"line": row_offset + 1,
"col": col_offset + 1,
"path": path,
"line": line,
"col": 0,
"message": (
f"Internal error while linting YAML: exception {type(exc)}:"
f" {error}; traceback: {traceback.format_exc()!r}"
"Warning: literal block (check for double colons '::')."
" Please convert this to a regular code block with an appropriate language."
f" Allowed languages: {allowed_languages}"
),
}
)

raise nodes.SkipNode
return warn_unknown_block


def main():
def main() -> None:
paths = sys.argv[1:] or sys.stdin.read().splitlines()
results = []

for directive in (
"code",
"code-block",
"sourcecode",
):
register_directive(directive, CodeBlockDirective)

# The following docutils directives should better be ignored:
for directive in ("parsed-literal",):
register_directive(directive, IgnoreDirective)
results: list[dict[str, t.Any]] = []

# TODO: should we handle the 'literalinclude' directive? maybe check file directly if right extension?
# (https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude)
Expand All @@ -267,56 +94,86 @@ def main():
with open(path, "rt", encoding="utf-8") as f:
content = f.read()

# We create a Publisher only to have a mechanism which gives us the settings object.
# Doing this more explicit is a bad idea since the classes used are deprecated and will
# eventually get replaced. Publisher.get_settings() looks like a stable enough API that
# we can 'just use'.
publisher = Publisher(source_class=StringInput)
publisher.set_components("standalone", "restructuredtext", "pseudoxml")
override = {
"root_prefix": docs_root,
"input_encoding": "utf-8",
"file_insertion_enabled": False,
"raw_enabled": False,
"_disable_config": True,
"report_level": Reporter.ERROR_LEVEL,
"warning_stream": io.StringIO(),
}
publisher.process_programmatic_settings(None, override, None)
publisher.set_source(content, path)

# Parse the document
try:
doc = publisher.reader.read(
publisher.source, publisher.parser, publisher.settings
)
except SystemMessage as exc:
error = str(exc).replace("\n", " / ")
results.append(
{
"path": path,
"line": 0,
"col": 0,
"message": f"Cannot parse document: {error}",
}
)
continue
except Exception as exc:
error = str(exc).replace("\n", " / ")
results.append(
{
"path": path,
"line": 0,
"col": 0,
"message": f"Cannot parse document, unexpected error {type(exc)}: {error}; traceback: {traceback.format_exc()!r}",
}
)
continue
for code_block in find_code_blocks(
content,
path=path,
root_prefix=docs_root,
warn_unknown_block_w_unknown_info=create_warn_unknown_block(
results, path
),
):
# Now that we have the offsets, we can actually do some processing...
if code_block.language not in {
"YAML",
"yaml",
"yaml+jinja",
"YAML+Jinja",
}:
if code_block.language is None:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
results.append(
{
"path": path,
"line": code_block.row_offset + 1,
"col": code_block.col_offset + 1,
"message": (
"Literal block without language!"
f" Allowed languages are: {allowed_languages}."
),
}
)
return
if code_block.language not in ALLOWED_LANGUAGES:
allowed_languages = ", ".join(sorted(ALLOWED_LANGUAGES))
results.append(
{
"path": path,
"line": code_block.row_offset + 1,
"col": code_block.col_offset + 1,
"message": (
f"Warning: literal block with disallowed language: {code_block.language}."
" If the language should be allowed, the checker needs to be updated."
f" Currently allowed languages are: {allowed_languages}."
),
}
)
continue

# Process the document
try:
visitor = YamlLintVisitor(doc, path, results, content, yamllint_config)
doc.walk(visitor)
# So we have YAML. Let's lint it!
try:
problems = linter.run(
io.StringIO(code_block.content),
yamllint_config,
path,
)
for problem in problems:
if problem.level not in REPORT_LEVELS:
continue
msg = f"{problem.level}: {problem.desc}"
if problem.rule:
msg += f" ({problem.rule})"
results.append(
{
"path": path,
"line": code_block.row_offset + problem.line,
"col": code_block.col_offset + problem.column,
"message": msg,
}
)
except Exception as exc:
error = str(exc).replace("\n", " / ")
results.append(
{
"path": path,
"line": code_block.row_offset + 1,
"col": code_block.col_offset + 1,
"message": (
f"Internal error while linting YAML: exception {type(exc)}:"
f" {error}; traceback: {traceback.format_exc()!r}"
),
}
)
except Exception as exc:
error = str(exc).replace("\n", " / ")
results.append(
Expand Down
Loading