|
| 1 | +#!/bin/bash |
| 2 | +# requires apt packages: aspell, aspell-en |
| 3 | +# Adapted from https://github.com/eleven-labs/eleven-labs.github.io/blob/master/bin/check-spelling.sh |
| 4 | + |
| 5 | +# Usages: |
| 6 | +# Ensure you have aspell installed. |
| 7 | +# ./ci/spellcheck.sh [file,] |
| 8 | + |
| 9 | +RED='\033[0;31m' |
| 10 | +GREEN='\033[0;32m' |
| 11 | +BLUE='\033[0;36m' |
| 12 | +LIGHT_GREY='\033[0;37m' |
| 13 | +GREY='\033[0;90m' |
| 14 | +NC='\033[0m' # No Color |
| 15 | + |
| 16 | +if [ -n "$1" ]; then |
| 17 | + MARKDOWN_FILES_CHANGED=`echo "${@:1}" | tr " " "\n"` |
| 18 | + |
| 19 | + echo -e "$BLUE>> Following markdown files are being checked:$NC" |
| 20 | + echo -e "$MARKDOWN_FILES_CHANGED" |
| 21 | +elif [ -n "$TRAVIS_COMMIT_RANGE" ]; then |
| 22 | + echo -e "$BLUE>> Checking all files modified between $MARKDOWN_FILES_CHANGED $NC" |
| 23 | + MARKDOWN_FILES_CHANGED=`(git diff --name-only $TRAVIS_COMMIT_RANGE || true) | grep .md` |
| 24 | + |
| 25 | + echo -e "$BLUE>> Following markdown files were changed in this pull request (commit range: $TRAVIS_COMMIT_RANGE):$NC" |
| 26 | + echo -e "$MARKDOWN_FILES_CHANGED" |
| 27 | +else |
| 28 | + echo -e "$BLUE>> Checking all .md files $NC" |
| 29 | + MARKDOWN_FILES_CHANGED=`git ls-tree --full-tree --name-only -r HEAD | grep .md` |
| 30 | + |
| 31 | + echo -e "$BLUE>> Following markdown files were changed in this repository:$NC" |
| 32 | + echo -e "$MARKDOWN_FILES_CHANGED" |
| 33 | +fi |
| 34 | + |
| 35 | + |
| 36 | +if [ -z "$MARKDOWN_FILES_CHANGED" ] |
| 37 | +then |
| 38 | + echo -e "$GREEN>> No markdown file to check $NC" |
| 39 | + |
| 40 | + exit 0; |
| 41 | +fi |
| 42 | + |
| 43 | +echo -e "$BLUE>> Assuming language is 'en'. $NC" |
| 44 | + |
| 45 | +STATUS=0 |
| 46 | + |
| 47 | +while read -r file; do |
| 48 | + echo -e "$BLUE>> Checking file: $file $NC" |
| 49 | + |
| 50 | + if [ ! -f $file ]; then |
| 51 | + echo -e "$RED>> File $file does not exist $NC" |
| 52 | + STATUS=1 |
| 53 | + continue |
| 54 | + fi |
| 55 | + |
| 56 | + # cat all markdown files that changed |
| 57 | + TEXT_CONTENT_WITHOUT_METADATA=`sed -E ':a;N;$!ba;s/\n/ /g' $file` |
| 58 | + |
| 59 | + echo $TEXT_CONTENT_WITHOUT_METADATA >> before |
| 60 | + |
| 61 | + # remove metadata tags |
| 62 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | grep -v -E '^(layout:|permalink:|date:|date_gmt:|authors:|categories:|tags:|cover:)(.*)'` |
| 63 | + |
| 64 | + # remove { } attributes |
| 65 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/\{:([^\}]+)\}//g'` |
| 66 | + |
| 67 | + # remove html |
| 68 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/<([^<]+)>//g'` |
| 69 | + |
| 70 | + # remove code blocks |
| 71 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -n '/^\`\`\`/,/^\`\`\`/ !p'` |
| 72 | + |
| 73 | + # remove links |
| 74 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/http(s)?:\/\/([^ ]+)//g'` |
| 75 | + |
| 76 | + # remove what is probably a domain |
| 77 | + TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | perl -pe 's/\W\w*\.\w{2,}//g'` |
| 78 | + |
| 79 | + echo $TEXT_CONTENT_WITHOUT_METADATA >> after |
| 80 | + |
| 81 | + MISSPELLED=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | aspell --lang=en --encoding=utf-8 --personal=./.aspell.en.pws list | sort -u` |
| 82 | + |
| 83 | + OUTPUT="" |
| 84 | + |
| 85 | + if [ -z "$MISSPELLED" ]; then |
| 86 | + NB_MISSPELLED=0 |
| 87 | + COMMENT="No spelling errors were found" |
| 88 | + echo -e "$GREEN>> $COMMENT $NC" |
| 89 | + else |
| 90 | + echo -e "$RED>> Words that might be misspelled, please check:$NC" |
| 91 | + |
| 92 | + while read -r word; do |
| 93 | + line=`grep -n "$word" $file | awk -F ":" '{print $1}'` |
| 94 | + |
| 95 | + while read -r location; do |
| 96 | + context=`awk -v word="$word" -v location="$location" 'word && NR == location' "$file"` |
| 97 | + context=`echo "$context" | ack -o ".{0,15}$word.{0,15}"` |
| 98 | + MSG="$NC$file:$location$NC$RED\t$word$NC\t$GREY$context$NC\n" |
| 99 | + OUTPUT="$OUTPUT$MSG\n" |
| 100 | + done <<< "$line" |
| 101 | + done <<< "$MISSPELLED" |
| 102 | + |
| 103 | + echo -ne $OUTPUT | column -ts $'\t' |
| 104 | + |
| 105 | + STATUS=1 |
| 106 | + fi |
| 107 | +done <<< "$MARKDOWN_FILES_CHANGED" |
| 108 | + |
| 109 | +exit $STATUS |
0 commit comments