Skip to content

Commit 53aa839

Browse files
authored
Merge pull request #209 from Dennis40816/clear-where-the-non-american-english-appears
Clear where non-American English appears
2 parents bb50402 + d39d920 commit 53aa839

File tree

1 file changed

+48
-7
lines changed

1 file changed

+48
-7
lines changed

scripts/commit-msg.hook

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,41 @@ read_commit_message() {
124124
done < $COMMIT_MSG_FILE
125125
}
126126

127+
# Get positions (line, column) for each target word in a multiline string.
128+
# Output format: "target: line"
129+
get_all_match_positions() {
130+
local text="$1"
131+
local targets="$2"
132+
local start_line=1
133+
local start_col=1
134+
135+
while IFS= read -r target; do
136+
# search for the target string
137+
local result
138+
result=$(
139+
awk -v t="$target" -v sl="$start_line" -v sc="$start_col" '{
140+
if (NR < sl) next
141+
pos = index(NR == sl ? substr($0, sc) : $0, t)
142+
if (pos) {
143+
print NR, (NR == sl ? pos + sc - 1 : pos)
144+
exit
145+
}
146+
}' <<< "$text"
147+
)
148+
149+
# skip if the target is not found
150+
[ -z "$result" ] && continue
151+
152+
# output and update states
153+
local line col
154+
read -r line col <<< "$result"
155+
echo "$target: $line"
156+
start_line="$line"
157+
start_col=$((col + 1))
158+
159+
done <<< "$targets"
160+
}
161+
127162
#
128163
# Validate the contents of the commmit msg agains the good commit guidelines.
129164
#
@@ -348,8 +383,10 @@ done
348383
# 12. Avoid abusive language in commit message content
349384
# ------------------------------------------------------------------------------
350385

351-
FULL_COMMIT_MSG=$(sed '/^#/d;/^[[:space:]]*$/d;/^[[:space:]]*Change-Id:/d' "$COMMIT_MSG_FILE" | \
352-
sed -E "s@${URL_REGEX#^}@@g")
386+
FULL_COMMIT_MSG_WITH_SPACE=$(sed '/^#/d;/^[[:space:]]*Change-Id:/d' "$COMMIT_MSG_FILE" | \
387+
sed -E "s@${URL_REGEX#^}@@g")
388+
FULL_COMMIT_MSG=$(echo "$FULL_COMMIT_MSG_WITH_SPACE" | sed '/^[[:space:]]*$/d')
389+
353390
# Extended list of abusive words (case-insensitive).
354391
# Adjust the list as needed.
355392
ABUSIVE_WORDS_REGEX='\b(fuck|fucking|dick|shit|bitch|asshole|cunt|motherfucker|damn|crap|dumbass|piss)\b'
@@ -367,16 +404,20 @@ done
367404
add_warning 1 "Commit message appears to be written in Chinese: $MISSPELLED_WORDS"
368405
fi
369406

370-
# Remove quoted text and commit hashes from $FULL_COMMIT_MSG for spell checking.
371-
# Handles commit references like "commit 7d05741" (short) or full 40-char hashes.
372-
MSG_FOR_SPELLCHECK=$(echo "$FULL_COMMIT_MSG" | sed -E \
407+
MSG_FOR_SPELLCHECK_LINE_FINDING=$(echo "$FULL_COMMIT_MSG_WITH_SPACE" | sed -E \
373408
-e "s/(['\"][^'\"]*['\"])//g" \
374409
-e "s/\bcommit[[:space:]]+[0-9a-fA-F]{7,40}\b/commit/g")
375-
410+
MSG_FOR_SPELLCHECK=$(echo "$MSG_FOR_SPELLCHECK_LINE_FINDING" | sed '/^[[:space:]]*$/d')
411+
412+
376413
# Use aspell to list misspelled words according to American English, ignoring quoted text.
377414
MISSPELLED_WORDS=$(echo "$MSG_FOR_SPELLCHECK" | $ASPELL --lang=en --list --home-dir=scripts --personal=aspell-pws)
378415
if [ -n "$MISSPELLED_WORDS" ]; then
379-
add_warning 1 "Avoid using non-American English words"
416+
results=$(get_all_match_positions "$MSG_FOR_SPELLCHECK_LINE_FINDING" "$MISSPELLED_WORDS")
417+
418+
while read -r result; do
419+
add_warning "${result#*:}" "Avoid using non-American English words: ${result%%:*}"
420+
done <<< "$results"
380421
fi
381422
}
382423

0 commit comments

Comments
 (0)