52
52
"identity provider": ["identiy provider", "identify provider"],
53
53
"access token": ["access toekn", "acess token"],
54
54
"user authentication": ["user authentification", "user authenthication"],
55
- "API gateway": ["API getway", "API gatway"]
55
+ "API gateway": ["API getway", "API gatway"],
56
+ "default reporter": ["defaul reporter"],
57
+ "default identity provider": ["defaul identity provider"]
56
58
}
57
59
58
60
# Function to check if a word is inside a code block, backticks, URL, or file reference
88
90
for wrong_phrase in wrong_variants:
89
91
if wrong_phrase in line:
90
92
return line.replace(wrong_phrase, correct_phrase)
91
- return line.replace(original, suggestion)
93
+ return line.replace(original, suggestion, 1) # Replace only the first occurrence
92
94
93
95
# Process spellcheck output and apply fixes
94
96
with open("spellcheck_report_raw.txt", "r", encoding="utf-8") as infile, open("spellcheck_report.txt", "w", encoding="utf-8") as outfile:
@@ -103,24 +105,21 @@ jobs:
103
105
content_lines = file.readlines()
104
106
context_line = content_lines[int(line_number) - 1].strip()
105
107
106
- # **Fix #1: Preserve case-sensitive ignored terms exactly**
107
- if original in ignore_list.values( ):
108
- corrected_word = original # Use exact case from original text
108
+ # Preserve case-sensitive ignored terms exactly
109
+ if original.lower() in ignore_list and any(c.isupper() for c in original ):
110
+ corrected_word = ignore_list[ original.lower()]
109
111
110
- # **Fix #2: Use English dictionary for weak matches**
112
+ # Use English dictionary for weak matches
111
113
elif should_use_ignore_list(original, suggestion, context_line):
112
114
best_match, _ = process.extractOne(original, ignore_list.keys())
113
115
if not is_code_or_url_or_file(context_line) and not is_markdown_link(context_line, original):
114
116
corrected_word = ignore_list[best_match]
115
117
else:
116
118
corrected_word = best_match.lower() # Keep it lowercase in URLs/links/files
117
119
118
- # **Fix #3: Apply context-based correction**
120
+ # Apply context-based correction
119
121
corrected_line = apply_context_based_correction(context_line, original, corrected_word)
120
122
121
- # **Fix #4: Replace only the first occurrence of the word**
122
- corrected_line = re.sub(r'\b' + re.escape(original) + r'\b', corrected_word, corrected_line, count=1)
123
-
124
123
# Write final output
125
124
outfile.write(f"{file_path}:{line_number}: {original} ==> {corrected_word}\n")
126
125
0 commit comments