Automated Spellcheck & PR Creation #11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Automated Spellcheck & PR Creation | |
on: | |
workflow_dispatch: # Runs only when manually triggered | |
permissions: | |
contents: write # Needed to push changes after approval | |
jobs: | |
spellcheck_and_apply: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
- name: Install Dependencies | |
run: | | |
pip install codespell | |
pip install fuzzywuzzy[speedup] | |
pip install sentence-splitter | |
- name: Verify Spellcheck Ignore List Exists | |
run: | | |
if [ ! -f .github/spellcheck-ignore.txt ]; then | |
echo "❌ Error: spellcheck-ignore.txt not found!" | |
exit 1 | |
fi | |
- name: Run Spellcheck and Apply Corrections | |
run: | | |
set -e # Exit on error | |
python3 <<EOF | |
import re | |
import os | |
import subprocess | |
from fuzzywuzzy import process | |
from sentence_splitter import SentenceSplitter | |
# Load ignore list with correct case preservation | |
ignore_list = {} | |
ignore_file = ".github/spellcheck-ignore.txt" | |
if os.path.exists(ignore_file): | |
with open(ignore_file, "r", encoding="utf-8") as f: | |
ignore_list = {word.strip().lower(): word.strip() for word in f.readlines()} | |
# Context phrases to prioritize in spellcheck corrections | |
context_phrases = { | |
"identity provider": ["identiy provider", "identify provider"], | |
"access token": ["access toekn", "acess token"], | |
"user authentication": ["user authentification", "user authenthication"], | |
"API gateway": ["API getway", "API gatway"] | |
} | |
# Function to check if a word is inside a Markdown link (corrects only link text, not URL) | |
def is_inside_markdown_link(line, original): | |
return bool(re.search(r'\[.*?\]\(.*' + re.escape(original) + r'.*\)', line)) | |
# Function to check if a word is in a title or hint block | |
def is_in_title_or_hint(line): | |
return bool(re.search(r'{% (tab title|hint style)=', line)) | |
# Function to check if a match is a full-word match | |
def is_full_word_match(misspelled, correct): | |
return misspelled.lower() == correct.lower() # Ensure full word match | |
# Run codespell and capture output | |
result = subprocess.run([ | |
"codespell", | |
"--ignore-words=.github/spellcheck-ignore.txt", | |
"--skip=.git,*.lock,*.json,*.yaml,*.yml,*.css,*.html", | |
"--quiet-level=2" | |
], capture_output=True, text=True, check=False) | |
spellcheck_output = result.stdout.strip() | |
if not spellcheck_output: | |
print("✅ No spelling corrections found. Exiting.") | |
exit(0) | |
# Process and apply corrections directly | |
for line in spellcheck_output.splitlines(): | |
match = re.match(r"(.*):(\d+): (\S+) ==> (\S+)", line) | |
if match: | |
file_path, line_number, original, suggestion = match.groups() | |
line_number = int(line_number) - 1 # Adjust to zero-based index | |
# Read the full line for context (with safety check) | |
with open(file_path, "r", encoding="utf-8") as file: | |
lines = file.readlines() | |
context_line = lines[line_number] if 0 <= line_number < len(lines) else "" | |
# ✅ Ignore corrections inside Markdown URLs | |
if re.search(r"\[.*?\]\(https?://.*?\)", context_line): | |
continue | |
# ✅ Always enforce exact case of ignore words in valid regions | |
if original.lower() in ignore_list: | |
suggestion = ignore_list[original.lower()] # Replace with exact capitalization | |
# ✅ Ensure 90% match rule only applies to full words | |
else: | |
best_match, score = process.extractOne(original, ignore_list.keys()) if ignore_list else (None, 0) | |
if best_match and score >= 90 and is_full_word_match(original, best_match): | |
suggestion = ignore_list[best_match] # Replace with exact capitalization from ignore list | |
# ✅ If a correction is an ignore word but capitalization is wrong, fix it | |
if suggestion.lower() in ignore_list and suggestion != ignore_list[suggestion.lower()]: | |
suggestion = ignore_list[suggestion.lower()] # Enforce correct capitalization | |
# ✅ Ensure only the incorrect word is replaced, NOT the full phrase | |
for correct_phrase, wrong_variants in context_phrases.items(): | |
for wrong_phrase in wrong_variants: | |
if wrong_phrase in context_line: | |
correct_words = correct_phrase.split() # ["identity", "provider"] | |
wrong_words = wrong_phrase.split() # ["identiy", "provider"] | |
for i in range(len(wrong_words)): | |
if wrong_words[i] == original: # Only replace the exact incorrect word | |
suggestion = correct_words[i] # Get the correct word at the same position | |
break | |
# ✅ Prevent punctuation modifications | |
if suggestion.endswith((",", ".", ";")) and not original.endswith((",", ".", ";")): | |
suggestion = suggestion.rstrip(",.;") | |
# ✅ Preserve indentation and replace only the first instance in a line | |
def replace_first_instance(text, old, new): | |
pattern = r"(\s*)(\b" + re.escape(old) + r"\b)" | |
return re.sub(pattern, r"\1" + new, text, count=1) | |
updated_line = replace_first_instance(context_line, original, suggestion) | |
# Replace only if there is a valid correction | |
if updated_line != context_line: | |
lines[line_number] = updated_line # Maintain the original indentation | |
with open(file_path, "w", encoding="utf-8") as f: | |
f.writelines(lines) | |
print(f"✅ Corrected '{original}' to '{suggestion}' in {file_path} (line {line_number + 1})") | |
print("✅ All corrections applied successfully.") | |
EOF | |
- name: Commit and Push Changes | |
env: | |
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB_ACTIONS }} | |
run: | | |
git config --global user.name "github-actions[bot]" | |
git config --global user.email "github-actions[bot]@users.noreply.github.com" | |
if git diff --quiet; then | |
echo "✅ No changes detected after spellcheck. Exiting." | |
exit 0 | |
fi | |
git checkout -b spellcheck-auto-fixes | |
git add . | |
git commit -m "✅ Automated Spellcheck & Grammar Fixes" | |
# ⏬ NEW: Ensure local branch is up to date before pushing | |
git pull --rebase origin spellcheck-auto-fixes || echo "No existing remote branch, continuing..." | |
git push origin spellcheck-auto-fixes --force # Force push to update remote branch | |
gh pr create --base main --head spellcheck-auto-fixes --title "Automated Spellcheck & Grammar Fixes" --body "This PR contains automated spellcheck and grammar fixes." |