Skip to content

Commit b2d637e

Browse files
vdusekclaude
andcommitted
refactor: replace redbaron with griffe in async docstring scripts
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6185d55 commit b2d637e

File tree

4 files changed

+172
-144
lines changed

4 files changed

+172
-144
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ dev = [
5858
"pytest-timeout<3.0.0",
5959
"pytest-xdist<4.0.0",
6060
"pytest<9.0.0",
61-
"redbaron<1.0.0",
6261
"ruff~=0.15.0",
6362
"setuptools", # setuptools are used by pytest but not explicitly required
6463
"types-colorama<0.5.0",

scripts/check_async_docstrings.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
"""Check if async docstrings are the same as sync."""
44

5-
import re
65
import sys
6+
from collections.abc import Generator
77
from pathlib import Path
88

9-
from redbaron import RedBaron
9+
from griffe import Module, load
1010
from utils import sync_to_async_docstring
1111

1212
found_issues = False
@@ -17,50 +17,56 @@
1717
'with_custom_http_client',
1818
}
1919

20-
# Get the directory of the source files
21-
clients_path = Path(__file__).parent.resolve() / '../src/apify_client'
20+
# Load the apify_client package
21+
src_path = Path(__file__).parent.resolve() / '../src'
22+
package = load('apify_client', search_paths=[str(src_path)])
2223

23-
# Go through every Python file in that directory
24-
for client_source_path in clients_path.glob('**/*.py'):
25-
with open(client_source_path, encoding='utf-8') as source_file:
26-
# Read the source file and parse the code using Red Baron
27-
red = RedBaron(source_code=source_file.read())
2824

29-
# Find all classes which end with "ClientAsync" (there should be at most 1 per file)
30-
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
31-
if not async_class:
25+
def walk_modules(module: Module) -> Generator[Module]:
26+
"""Recursively yield all modules in the package."""
27+
yield module
28+
for submodule in module.modules.values():
29+
yield from walk_modules(submodule)
30+
31+
32+
# Go through every module in the package
33+
if not isinstance(package, Module):
34+
raise TypeError('Expected griffe to load a Module')
35+
for module in walk_modules(package):
36+
for async_class in module.classes.values():
37+
if not async_class.name.endswith('ClientAsync'):
3238
continue
3339

34-
# Find the corresponding sync classes (same name, but without -Async)
35-
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
40+
# Find the corresponding sync class (same name, but without Async)
41+
sync_class = module.classes.get(async_class.name.replace('ClientAsync', 'Client'))
42+
if not sync_class:
43+
continue
3644

3745
# Go through all methods in the async class
38-
for async_method in async_class.find_all('DefNode'):
39-
# Find corresponding sync method in the sync class
40-
sync_method = sync_class.find('DefNode', name=async_method.name)
41-
46+
for async_method in async_class.functions.values():
4247
# Skip methods with @ignore_docs decorator
43-
if len(async_method.decorators) and str(async_method.decorators[0].value) == 'ignore_docs':
48+
if any(str(d.value) == 'ignore_docs' for d in async_method.decorators):
4449
continue
4550

4651
# Skip methods whose docstrings are intentionally different
4752
if async_method.name in SKIPPED_METHODS:
4853
continue
4954

50-
# If the sync method has a docstring, check if it matches the async dostring
51-
if sync_method and isinstance(sync_method.value[0].value, str):
52-
sync_docstring = sync_method.value[0].value
53-
async_docstring = async_method.value[0].value
54-
expected_docstring = sync_to_async_docstring(sync_docstring)
55-
56-
if not isinstance(async_docstring, str):
57-
print(f'Missing docstring for "{async_class.name}.{async_method.name}"!')
58-
found_issues = True
59-
elif expected_docstring != async_docstring:
60-
print(
61-
f'Docstring for "{async_class.name}.{async_method.name}" is out of sync with "{sync_class.name}.{sync_method.name}"!' # noqa: E501
62-
)
63-
found_issues = True
55+
# Find corresponding sync method in the sync class
56+
sync_method = sync_class.functions.get(async_method.name)
57+
if not sync_method or not sync_method.docstring:
58+
continue
59+
60+
expected_docstring = sync_to_async_docstring(sync_method.docstring.value)
61+
62+
if not async_method.docstring:
63+
print(f'Missing docstring for "{async_class.name}.{async_method.name}"!')
64+
found_issues = True
65+
elif async_method.docstring.value != expected_docstring:
66+
print(
67+
f'Docstring for "{async_class.name}.{async_method.name}" is out of sync with "{sync_class.name}.{sync_method.name}"!' # noqa: E501
68+
)
69+
found_issues = True
6470

6571
if found_issues:
6672
print()

scripts/fix_async_docstrings.py

Lines changed: 133 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
#!/usr/bin/env python3
22

3-
import re
3+
import ast
4+
from collections.abc import Generator
45
from pathlib import Path
56

6-
from redbaron import RedBaron
7+
from griffe import Module, load
78
from utils import sync_to_async_docstring
89

910
# Methods where the async docstring is intentionally different from the sync one
@@ -12,78 +13,147 @@
1213
'with_custom_http_client',
1314
}
1415

15-
# Get the directory of the source files
16-
clients_path = Path(__file__).parent.resolve() / '../src/apify_client'
17-
18-
# Go through every Python file in that directory
19-
for client_source_path in clients_path.glob('**/*.py'):
20-
with open(client_source_path, 'r+', encoding='utf-8') as source_file:
21-
# Read the source file and parse the code using Red Baron
22-
red = RedBaron(source_code=source_file.read())
23-
24-
# Find all classes which end with "ClientAsync" (there should be at most 1 per file)
25-
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
26-
27-
if async_class is None:
28-
# No async client class in this file, nothing to fix
16+
# Load the apify_client package
17+
src_path = Path(__file__).parent.resolve() / '../src'
18+
package = load('apify_client', search_paths=[str(src_path)])
19+
20+
21+
def walk_modules(module: Module) -> Generator[Module]:
22+
"""Recursively yield all modules in the package."""
23+
yield module
24+
for submodule in module.modules.values():
25+
yield from walk_modules(submodule)
26+
27+
28+
def format_docstring(content: str, indent: str) -> str:
29+
"""Format a docstring with proper indentation and triple quotes."""
30+
lines = content.split('\n')
31+
if len(lines) == 1:
32+
return f'{indent}"""{lines[0]}"""\n'
33+
34+
result_lines = [f'{indent}"""{lines[0]}']
35+
for line in lines[1:]:
36+
if line.strip():
37+
result_lines.append(f'{indent}{line}')
38+
else:
39+
result_lines.append('')
40+
result_lines.append(f'{indent}"""')
41+
return '\n'.join(result_lines) + '\n'
42+
43+
44+
def find_docstring_range(tree: ast.AST, class_name: str, method_name: str) -> tuple[int, int | None, int] | None:
45+
"""Find the line range of a method's docstring using ast.
46+
47+
Returns (start_line, end_line, col_offset) 1-indexed, or None.
48+
"""
49+
for node in ast.walk(tree):
50+
if isinstance(node, ast.ClassDef) and node.name == class_name:
51+
for item in node.body:
52+
if (
53+
isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef))
54+
and item.name == method_name
55+
and item.body
56+
and isinstance(item.body[0], ast.Expr)
57+
and isinstance(item.body[0].value, ast.Constant)
58+
and isinstance(item.body[0].value.value, str)
59+
):
60+
expr = item.body[0]
61+
return expr.lineno, expr.end_lineno, expr.col_offset
62+
return None
63+
64+
65+
def find_method_body_start(tree: ast.AST, class_name: str, method_name: str) -> tuple[int, int] | None:
66+
"""Find where a method's body starts (for inserting a missing docstring).
67+
68+
Returns (line_number, col_offset) 1-indexed, or None.
69+
"""
70+
for node in ast.walk(tree):
71+
if isinstance(node, ast.ClassDef) and node.name == class_name:
72+
for item in node.body:
73+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)) and item.name == method_name and item.body:
74+
first_stmt = item.body[0]
75+
return first_stmt.lineno, first_stmt.col_offset
76+
return None
77+
78+
79+
# Go through every module in the package
80+
if not isinstance(package, Module):
81+
raise TypeError('Expected griffe to load a Module')
82+
for module in walk_modules(package):
83+
replacements = []
84+
85+
for async_class in module.classes.values():
86+
if not async_class.name.endswith('ClientAsync'):
2987
continue
3088

31-
# Find the corresponding sync classes (same name, but without -Async)
32-
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
89+
# Find the corresponding sync class (same name, but without Async)
90+
sync_class = module.classes.get(async_class.name.replace('ClientAsync', 'Client'))
91+
if not sync_class:
92+
continue
3393

3494
# Go through all methods in the async class
35-
for async_method in async_class.find_all('DefNode'):
36-
# Find corresponding sync method in the sync class
37-
sync_method = sync_class.find('DefNode', name=async_method.name)
38-
95+
for async_method in async_class.functions.values():
3996
# Skip methods with @ignore_docs decorator
40-
if len(async_method.decorators) and str(async_method.decorators[0].value) == 'ignore_docs':
97+
if any(str(d.value) == 'ignore_docs' for d in async_method.decorators):
4198
continue
4299

43100
# Skip methods whose docstrings are intentionally different
44101
if async_method.name in SKIPPED_METHODS:
45102
continue
46103

47-
# Skip methods that don't exist in the sync class
48-
if sync_method is None:
104+
# Find corresponding sync method in the sync class
105+
sync_method = sync_class.functions.get(async_method.name)
106+
if not sync_method or not sync_method.docstring:
49107
continue
50108

51-
# If the sync method has a docstring, copy it to the async method (with adjustments)
52-
if isinstance(sync_method.value[0].value, str):
53-
sync_docstring = sync_method.value[0].value
54-
async_docstring = async_method.value[0].value
55-
56-
correct_async_docstring = sync_to_async_docstring(sync_docstring)
57-
if async_docstring == correct_async_docstring:
109+
correct_docstring = sync_to_async_docstring(sync_method.docstring.value)
110+
111+
if not async_method.docstring:
112+
print(f'Fixing missing docstring for "{async_class.name}.{async_method.name}"...')
113+
replacements.append((async_class.name, async_method.name, correct_docstring, False))
114+
elif async_method.docstring.value != correct_docstring:
115+
replacements.append((async_class.name, async_method.name, correct_docstring, True))
116+
117+
if not replacements:
118+
continue
119+
120+
# Read the source file and parse with ast for precise locations
121+
filepath = module.filepath
122+
if not isinstance(filepath, Path):
123+
continue
124+
source = filepath.read_text(encoding='utf-8')
125+
source_lines = source.splitlines(keepends=True)
126+
tree = ast.parse(source)
127+
128+
# Collect replacement operations with line numbers
129+
ops = []
130+
for class_name, method_name, correct_docstring, has_existing in replacements:
131+
if has_existing:
132+
result = find_docstring_range(tree, class_name, method_name)
133+
if result:
134+
start_line, end_line, col_offset = result
135+
if end_line is None:
58136
continue
59-
60-
# Work around a bug in Red Baron, which indents docstrings too much when you insert them,
61-
# so we have to un-indent it one level first.
62-
correct_async_docstring = re.sub('^ ', '', correct_async_docstring, flags=re.MULTILINE)
63-
64-
if not isinstance(async_docstring, str):
65-
print(f'Fixing missing docstring for "{async_class.name}.{async_method.name}"...')
66-
async_method.value.insert(0, correct_async_docstring)
67-
else:
68-
async_method.value[0] = correct_async_docstring
69-
70-
updated_source_code = red.dumps()
71-
72-
# Work around a bug in Red Baron, which adds indents to docstrings when you insert them (including empty lines),
73-
# so we have to remove the extra whitespace
74-
updated_source_code = re.sub('^ $', '', updated_source_code, flags=re.MULTILINE)
75-
76-
# Work around a bug in Red Baron, which indents `except` and `finally` statements wrong
77-
# so we have to add some extra whitespace
78-
updated_source_code = re.sub('^except', ' except', updated_source_code, flags=re.MULTILINE)
79-
updated_source_code = re.sub('^ except', ' except', updated_source_code, flags=re.MULTILINE)
80-
updated_source_code = re.sub('^finally', ' finally', updated_source_code, flags=re.MULTILINE)
81-
updated_source_code = re.sub('^ finally', ' finally', updated_source_code, flags=re.MULTILINE)
82-
83-
# Work around a bug in Red Baron, which sometimes adds an extra new line to the end of a file
84-
updated_source_code = updated_source_code.rstrip() + '\n'
85-
86-
# Save the updated source code back to the file
87-
source_file.seek(0)
88-
source_file.write(updated_source_code)
89-
source_file.truncate()
137+
indent = ' ' * col_offset
138+
formatted = format_docstring(correct_docstring, indent)
139+
ops.append(('replace', start_line, end_line, formatted))
140+
else:
141+
result = find_method_body_start(tree, class_name, method_name)
142+
if result:
143+
insert_line, col_offset = result
144+
indent = ' ' * col_offset
145+
formatted = format_docstring(correct_docstring, indent)
146+
ops.append(('insert', insert_line, None, formatted))
147+
148+
# Sort by start line descending (process bottom-up to preserve line numbers)
149+
ops.sort(key=lambda x: x[1], reverse=True)
150+
151+
for op_type, start_line, end_line, formatted in ops:
152+
formatted_lines = formatted.splitlines(keepends=True)
153+
if op_type == 'replace':
154+
source_lines[start_line - 1 : end_line] = formatted_lines
155+
elif op_type == 'insert':
156+
source_lines[start_line - 1 : start_line - 1] = formatted_lines
157+
158+
# Save the updated source code back to the file
159+
filepath.write_text(''.join(source_lines), encoding='utf-8')

0 commit comments

Comments
 (0)