Skip to content

Commit 8a99cee

Browse files
authored
Merge pull request #195 from alexbowers/spellchecker
Spellchecker
2 parents 2472dd3 + 22d6a78 commit 8a99cee

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed

.travis.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
language: generic
2+
3+
addons:
4+
apt:
5+
packages:
6+
- aspell
7+
- aspell-en
8+
- ack-grep
9+
10+
before_install:
11+
- chmod +x ci/spellcheck.sh
12+
- sudo ln -sf /usr/bin/ack-grep /usr/local/bin/ack
13+
14+
script: ./ci/spellcheck.sh
15+
16+
branches:
17+
only:
18+
- gh-pages
19+
- /.*/

ci/.aspell.en.pws

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
personal_ws-1.1 en 9
2+
github
3+
metadata

ci/spellcheck.sh

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/bin/bash
2+
# requires apt packages: aspell, aspell-en
3+
# Adapted from https://github.com/eleven-labs/eleven-labs.github.io/blob/master/bin/check-spelling.sh
4+
5+
# Usages:
6+
# Ensure you have aspell installed.
7+
# ./ci/spellcheck.sh [file,]
8+
9+
RED='\033[0;31m'
10+
GREEN='\033[0;32m'
11+
BLUE='\033[0;36m'
12+
LIGHT_GREY='\033[0;37m'
13+
GREY='\033[0;90m'
14+
NC='\033[0m' # No Color
15+
16+
if [ -n "$1" ]; then
17+
MARKDOWN_FILES_CHANGED=`echo "${@:1}" | tr " " "\n"`
18+
19+
echo -e "$BLUE>> Following markdown files are being checked:$NC"
20+
echo -e "$MARKDOWN_FILES_CHANGED"
21+
elif [ -n "$TRAVIS_COMMIT_RANGE" ]; then
22+
echo -e "$BLUE>> Checking all files modified between $MARKDOWN_FILES_CHANGED $NC"
23+
MARKDOWN_FILES_CHANGED=`(git diff --name-only $TRAVIS_COMMIT_RANGE || true) | grep .md`
24+
25+
echo -e "$BLUE>> Following markdown files were changed in this pull request (commit range: $TRAVIS_COMMIT_RANGE):$NC"
26+
echo -e "$MARKDOWN_FILES_CHANGED"
27+
else
28+
echo -e "$BLUE>> Checking all .md files $NC"
29+
MARKDOWN_FILES_CHANGED=`git ls-tree --full-tree --name-only -r HEAD | grep .md`
30+
31+
echo -e "$BLUE>> Following markdown files were changed in this repository:$NC"
32+
echo -e "$MARKDOWN_FILES_CHANGED"
33+
fi
34+
35+
36+
if [ -z "$MARKDOWN_FILES_CHANGED" ]
37+
then
38+
echo -e "$GREEN>> No markdown file to check $NC"
39+
40+
exit 0;
41+
fi
42+
43+
echo -e "$BLUE>> Assuming language is 'en'. $NC"
44+
45+
STATUS=0
46+
47+
while read -r file; do
48+
echo -e "$BLUE>> Checking file: $file $NC"
49+
50+
if [ ! -f $file ]; then
51+
echo -e "$RED>> File $file does not exist $NC"
52+
STATUS=1
53+
continue
54+
fi
55+
56+
# cat all markdown files that changed
57+
TEXT_CONTENT_WITHOUT_METADATA=`sed -E ':a;N;$!ba;s/\n/ /g' $file`
58+
59+
echo $TEXT_CONTENT_WITHOUT_METADATA >> before
60+
61+
# remove metadata tags
62+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | grep -v -E '^(layout:|permalink:|date:|date_gmt:|authors:|categories:|tags:|cover:)(.*)'`
63+
64+
# remove { } attributes
65+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/\{:([^\}]+)\}//g'`
66+
67+
# remove html
68+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/<([^<]+)>//g'`
69+
70+
# remove code blocks
71+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -n '/^\`\`\`/,/^\`\`\`/ !p'`
72+
73+
# remove links
74+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | sed -E 's/http(s)?:\/\/([^ ]+)//g'`
75+
76+
# remove what is probably a domain
77+
TEXT_CONTENT_WITHOUT_METADATA=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | perl -pe 's/\W\w*\.\w{2,}//g'`
78+
79+
echo $TEXT_CONTENT_WITHOUT_METADATA >> after
80+
81+
MISSPELLED=`echo "$TEXT_CONTENT_WITHOUT_METADATA" | aspell --lang=en --encoding=utf-8 --personal=./.aspell.en.pws list | sort -u`
82+
83+
OUTPUT=""
84+
85+
if [ -z "$MISSPELLED" ]; then
86+
NB_MISSPELLED=0
87+
COMMENT="No spelling errors were found"
88+
echo -e "$GREEN>> $COMMENT $NC"
89+
else
90+
echo -e "$RED>> Words that might be misspelled, please check:$NC"
91+
92+
while read -r word; do
93+
line=`grep -n "$word" $file | awk -F ":" '{print $1}'`
94+
95+
while read -r location; do
96+
context=`awk -v word="$word" -v location="$location" 'word && NR == location' "$file"`
97+
context=`echo "$context" | ack -o ".{0,15}$word.{0,15}"`
98+
MSG="$NC$file:$location$NC$RED\t$word$NC\t$GREY$context$NC\n"
99+
OUTPUT="$OUTPUT$MSG\n"
100+
done <<< "$line"
101+
done <<< "$MISSPELLED"
102+
103+
echo -ne $OUTPUT | column -ts $'\t'
104+
105+
STATUS=1
106+
fi
107+
done <<< "$MARKDOWN_FILES_CHANGED"
108+
109+
exit $STATUS

0 commit comments

Comments
 (0)