Skip to content

Commit bd9ed2a

Browse files
authored
Update spellcheck.yml
1 parent 9125424 commit bd9ed2a

File tree

1 file changed

+16
-17
lines changed

1 file changed

+16
-17
lines changed

.github/workflows/spellcheck.yml

+16-17
Original file line numberDiff line numberDiff line change
@@ -57,29 +57,28 @@ jobs:
5757
5858
# Function to check if a word is inside a code block, backticks, URL, or file reference
5959
def is_code_or_url_or_file(line):
60-
return re.search(r'`.*?`|https?://\S+|www\.\S+|/[\w./-]+', line)
60+
return bool(re.search(r'`.*?`|https?://\S+|www\.\S+|/[\w./-]+', line))
6161
6262
# Function to check if a word is part of a Markdown link
6363
def is_markdown_link(line, original):
64-
return re.search(r'\[.*?\]\(.*' + re.escape(original) + r'.*\)', line)
64+
return bool(re.search(r'\[.*?\]\(.*' + re.escape(original) + r'.*\)', line))
6565
6666
# Function to determine if an ignore list word should be used
6767
def should_use_ignore_list(original, suggestion, line):
68-
best_match, score = process.extractOne(original, ignore_list.keys())
68+
best_match, score = process.extractOne(original, ignore_list.keys(), scorer=lambda x, y: sum(c1 == c2 for c1, c2 in zip(x, y)))
6969
7070
# Must be at least 90% similar to be considered a match
7171
if score < 90:
7272
return False
7373
74-
# Reject if original contains best_match as a substring (e.g., "certifcate" vs "CE")
74+
# Reject if original contains best_match as a substring
7575
if best_match in original and len(original) > len(best_match):
7676
return False
7777
78-
# Enforce case-sensitive corrections for regular text, but NOT for file references/URLs/links
78+
# Enforce case-sensitive corrections for regular text, but lowercase for files/URLs
7979
if not is_code_or_url_or_file(line) and not is_markdown_link(line, original):
8080
return best_match in ignore_list
8181
82-
# Allow case-insensitive corrections for code blocks, backticks, URLs, and markdown links
8382
return best_match.lower() in ignore_list
8483
8584
# Function to apply context-based correction
@@ -88,7 +87,7 @@ jobs:
8887
for wrong_phrase in wrong_variants:
8988
if wrong_phrase in line:
9089
return line.replace(wrong_phrase, correct_phrase)
91-
return line.replace(original, suggestion)
90+
return re.sub(r'\b' + re.escape(original) + r'\b', suggestion, line, count=1)
9291
9392
# Process spellcheck output and apply fixes
9493
with open("spellcheck_report_raw.txt", "r", encoding="utf-8") as infile, open("spellcheck_report.txt", "w", encoding="utf-8") as outfile:
@@ -103,24 +102,24 @@ jobs:
103102
content_lines = file.readlines()
104103
context_line = content_lines[int(line_number) - 1].strip()
105104
106-
# **Fix #1: Preserve case-sensitive ignored terms exactly**
107-
if original in ignore_list.values():
108-
corrected_word = original # Use exact case from ignore list
105+
# Fix #1: Ensure case-sensitive corrections match exactly
106+
if original.lower() in ignore_list:
107+
corrected_word = ignore_list[original.lower()]
109108
110-
# **Fix #2: Use English dictionary for weak matches**
109+
# Fix #2: Use English dictionary for weak matches
111110
elif should_use_ignore_list(original, suggestion, context_line):
112111
best_match, _ = process.extractOne(original, ignore_list.keys())
113-
if not is_code_or_url_or_file(context_line) and not is_markdown_link(context_line, original):
114-
corrected_word = ignore_list[best_match]
115-
else:
116-
corrected_word = best_match.lower() # Keep it lowercase in URLs/links/files
112+
corrected_word = ignore_list[best_match] if not is_code_or_url_or_file(context_line) else best_match.lower()
117113
118-
# **Fix #3: Apply context-based correction**
114+
# Fix #3: Apply context-based correction
119115
corrected_line = apply_context_based_correction(context_line, original, corrected_word)
120116
121-
# **Fix #4: Replace only the first occurrence of the word**
117+
# Fix #4: Replace only the first occurrence of the word
122118
corrected_line = re.sub(r'\b' + re.escape(original) + r'\b', corrected_word, corrected_line, count=1)
123119
120+
# Debugging Output
121+
print(f"🔍 Correction: {original} -> {corrected_word} in {file_path}:{line_number}")
122+
124123
# Write final output
125124
outfile.write(f"{file_path}:{line_number}: {original} ==> {corrected_word}\n")
126125

0 commit comments

Comments
 (0)