Skip to content

Automated Spellcheck & PR Creation #11

Automated Spellcheck & PR Creation

Automated Spellcheck & PR Creation #11

name: Automated Spellcheck & PR Creation
on:
workflow_dispatch: # Runs only when manually triggered
permissions:
contents: write # Needed to push changes after approval
jobs:
spellcheck_and_apply:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Install Dependencies
run: |
pip install codespell
pip install fuzzywuzzy[speedup]
pip install sentence-splitter
- name: Verify Spellcheck Ignore List Exists
run: |
if [ ! -f .github/spellcheck-ignore.txt ]; then
echo "❌ Error: spellcheck-ignore.txt not found!"
exit 1
fi
- name: Run Spellcheck and Apply Corrections
run: |
set -e # Exit on error
python3 <<EOF
import re
import os
import subprocess
from fuzzywuzzy import process
from sentence_splitter import SentenceSplitter
# Load ignore list with correct case preservation
ignore_list = {}
ignore_file = ".github/spellcheck-ignore.txt"
if os.path.exists(ignore_file):
with open(ignore_file, "r", encoding="utf-8") as f:
ignore_list = {word.strip().lower(): word.strip() for word in f.readlines()}
# Context phrases to prioritize in spellcheck corrections
context_phrases = {
"identity provider": ["identiy provider", "identify provider"],
"access token": ["access toekn", "acess token"],
"user authentication": ["user authentification", "user authenthication"],
"API gateway": ["API getway", "API gatway"]
}
# Function to check if a word is inside a Markdown link (corrects only link text, not URL)
def is_inside_markdown_link(line, original):
return bool(re.search(r'\[.*?\]\(.*' + re.escape(original) + r'.*\)', line))
# Function to check if a word is in a title or hint block
def is_in_title_or_hint(line):
return bool(re.search(r'{% (tab title|hint style)=', line))
# Function to check if a match is a full-word match
def is_full_word_match(misspelled, correct):
return misspelled.lower() == correct.lower() # Ensure full word match
# Run codespell and capture output
result = subprocess.run([
"codespell",
"--ignore-words=.github/spellcheck-ignore.txt",
"--skip=.git,*.lock,*.json,*.yaml,*.yml,*.css,*.html",
"--quiet-level=2"
], capture_output=True, text=True, check=False)
spellcheck_output = result.stdout.strip()
if not spellcheck_output:
print("✅ No spelling corrections found. Exiting.")
exit(0)
# Process and apply corrections directly
for line in spellcheck_output.splitlines():
match = re.match(r"(.*):(\d+): (\S+) ==> (\S+)", line)
if match:
file_path, line_number, original, suggestion = match.groups()
line_number = int(line_number) - 1 # Adjust to zero-based index
# Read the full line for context (with safety check)
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()
context_line = lines[line_number] if 0 <= line_number < len(lines) else ""
# ✅ Ignore corrections inside Markdown URLs
if re.search(r"\[.*?\]\(https?://.*?\)", context_line):
continue
# ✅ Always enforce exact case of ignore words in valid regions
if original.lower() in ignore_list:
suggestion = ignore_list[original.lower()] # Replace with exact capitalization
# ✅ Ensure 90% match rule only applies to full words
else:
best_match, score = process.extractOne(original, ignore_list.keys()) if ignore_list else (None, 0)
if best_match and score >= 90 and is_full_word_match(original, best_match):
suggestion = ignore_list[best_match] # Replace with exact capitalization from ignore list
# ✅ If a correction is an ignore word but capitalization is wrong, fix it
if suggestion.lower() in ignore_list and suggestion != ignore_list[suggestion.lower()]:
suggestion = ignore_list[suggestion.lower()] # Enforce correct capitalization
# ✅ Ensure only the incorrect word is replaced, NOT the full phrase
for correct_phrase, wrong_variants in context_phrases.items():
for wrong_phrase in wrong_variants:
if wrong_phrase in context_line:
correct_words = correct_phrase.split() # ["identity", "provider"]
wrong_words = wrong_phrase.split() # ["identiy", "provider"]
for i in range(len(wrong_words)):
if wrong_words[i] == original: # Only replace the exact incorrect word
suggestion = correct_words[i] # Get the correct word at the same position
break
# ✅ Prevent punctuation modifications
if suggestion.endswith((",", ".", ";")) and not original.endswith((",", ".", ";")):
suggestion = suggestion.rstrip(",.;")
# ✅ Preserve indentation and replace only the first instance in a line
def replace_first_instance(text, old, new):
pattern = r"(\s*)(\b" + re.escape(old) + r"\b)"
return re.sub(pattern, r"\1" + new, text, count=1)
updated_line = replace_first_instance(context_line, original, suggestion)
# Replace only if there is a valid correction
if updated_line != context_line:
lines[line_number] = updated_line # Maintain the original indentation
with open(file_path, "w", encoding="utf-8") as f:
f.writelines(lines)
print(f"✅ Corrected '{original}' to '{suggestion}' in {file_path} (line {line_number + 1})")
print("✅ All corrections applied successfully.")
EOF
- name: Commit and Push Changes
env:
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB_ACTIONS }}
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
if git diff --quiet; then
echo "✅ No changes detected after spellcheck. Exiting."
exit 0
fi
git checkout -b spellcheck-auto-fixes
git add .
git commit -m "✅ Automated Spellcheck & Grammar Fixes"
# ⏬ NEW: Ensure local branch is up to date before pushing
git pull --rebase origin spellcheck-auto-fixes || echo "No existing remote branch, continuing..."
git push origin spellcheck-auto-fixes --force # Force push to update remote branch
gh pr create --base main --head spellcheck-auto-fixes --title "Automated Spellcheck & Grammar Fixes" --body "This PR contains automated spellcheck and grammar fixes."