Skip to content

Commit 6f809e6

Browse files
authored
Update spellcheck.yml
1 parent 16bc9de commit 6f809e6

File tree

1 file changed

+18
-11
lines changed

1 file changed

+18
-11
lines changed

.github/workflows/spellcheck.yml

+18-11
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ jobs:
1919
run: |
2020
pip install codespell
2121
pip install fuzzywuzzy[speedup]
22+
pip install nltk
2223
2324
- name: Verify Spellcheck Ignore List Exists
2425
run: |
@@ -38,7 +39,10 @@ jobs:
3839
# Process corrections with Python
3940
python3 <<EOF
4041
import re
42+
import nltk
4143
from fuzzywuzzy import process
44+
from nltk.tokenize import word_tokenize
45+
nltk.download('punkt')
4246
4347
# Load spellcheck ignore list with case sensitivity
4448
ignore_list = {}
@@ -109,16 +113,21 @@ jobs:
109113
if original.lower() in ignore_list and any(c.isupper() for c in original):
110114
corrected_word = ignore_list[original.lower()]
111115
112-
# Use English dictionary for weak matches
113-
elif should_use_ignore_list(original, suggestion, context_line):
114-
best_match, _ = process.extractOne(original, ignore_list.keys())
115-
if not is_code_or_url_or_file(context_line) and not is_markdown_link(context_line, original):
116-
corrected_word = ignore_list[best_match]
117-
else:
118-
corrected_word = best_match.lower() # Keep it lowercase in URLs/links/files
116+
# Tokenize line to avoid replacing partial words
117+
words = word_tokenize(context_line)
119118
120-
# Apply context-based correction
121-
corrected_line = apply_context_based_correction(context_line, original, corrected_word)
119+
# Ensure weak matches use the English dictionary
120+
if not should_use_ignore_list(original, suggestion, context_line):
121+
corrected_word = suggestion # Use dictionary match
122+
123+
# Apply corrections **only** to the first exact word match, not substrings
124+
for i, word in enumerate(words):
125+
if word.lower() == original.lower():
126+
words[i] = corrected_word
127+
break
128+
129+
# Rebuild corrected line
130+
corrected_line = " ".join(words)
122131
123132
# Write final output
124133
outfile.write(f"{file_path}:{line_number}: {original} ==> {corrected_word}\n")
@@ -147,13 +156,11 @@ jobs:
147156
BRANCH_NAME="spellcheck-fixes-$(date +%s)"
148157
git checkout -b $BRANCH_NAME
149158
150-
# Commit the changes if there are any
151159
if [ -n "$(git status --porcelain)" ]; then
152160
git add .
153161
git commit -m "Spellcheck: Automatically fixed detected misspellings"
154162
git push origin $BRANCH_NAME
155163
156-
# Create PR using GitHub CLI
157164
gh pr create \
158165
--base main \
159166
--head $BRANCH_NAME \

0 commit comments

Comments
 (0)