Skip to content

Commit 9661cc4

Browse files
2992: Use unicode character classes to keep unicode letters and numbers
1 parent 49a5832 commit 9661cc4

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

shared/utils/__tests__/normalizeString.spec.ts

+11
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,15 @@ describe('normalizeString', () => {
1515
expect(normalizeString('Bergstraße')).toBe('bergstrasse')
1616
expect(normalizeString('äßoßsß')).toBe('assosssss')
1717
})
18+
19+
it('should keep characters in other alphabets', () => {
20+
expect(normalizeString('اللغة')).toBe('اللغة')
21+
expect(normalizeString('በጀርመን')).toBe('በጀርመን')
22+
expect(normalizeString('त')).toBe('त')
23+
expect(normalizeString('本页内容来自')).toBe('本页内容来自')
24+
})
25+
26+
it('should keep numbers', () => {
27+
expect(normalizeString('غ12te57ة')).toBe('غ12te57ة')
28+
})
1829
})

shared/utils/normalizeString.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// eslint-disable-next-line no-control-regex
2-
const nonAsciiRegex = /[^\x00-\x7F\xDF]/g
1+
const nonAsciiRegex = /[^\p{Letter}|\p{Number}]/gu
32

43
const normalizeToAscii = (str: string): string => str.normalize('NFKD').replace(nonAsciiRegex, '').replace(/ß/g, 'ss')
54

0 commit comments

Comments
 (0)