-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
enhnace(frontend): 文字列比較のためのローマナイズを強化(設定の検索) (#15632)
* enhnace(frontend): 文字列比較のためのローマナイズを強化 * docs * fix * fix * fix * comment * wanakanaの初回ロードをコンポーネント内に移動 * comment * fix * add tests --------- Co-authored-by: syuilo <4439005+syuilo@users.noreply.github.com>
- Loading branch information
1 parent
bdb7453
commit f35eb0f
Showing
5 changed files
with
279 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* SPDX-FileCopyrightText: syuilo and misskey-project | ||
* SPDX-License-Identifier: AGPL-3.0-only | ||
*/ | ||
|
||
import { versatileLang } from '@@/js/intl-const.js'; | ||
import type { toHiragana as toHiraganaType } from 'wanakana'; | ||
|
||
let toHiragana: typeof toHiraganaType = (str?: string) => str ?? ''; | ||
let isWanakanaLoaded = false; | ||
|
||
/** | ||
* ローマ字変換のセットアップ(日本語以外の環境で読み込まないのでlazy-loading) | ||
* | ||
* ここの比較系関数を使う際は事前に呼び出す必要がある | ||
*/ | ||
export async function initIntlString(forceWanakana = false) { | ||
if ((!versatileLang.includes('ja') && !forceWanakana) || isWanakanaLoaded) return; | ||
const { toHiragana: _toHiragana } = await import('wanakana'); | ||
toHiragana = _toHiragana; | ||
isWanakanaLoaded = true; | ||
} | ||
|
||
/** | ||
* - 全角英数字を半角に | ||
* - 半角カタカナを全角に | ||
* - 濁点・半濁点がリガチャになっている(例: `か` + `゛` )ひらがな・カタカナを結合 | ||
* - 異体字を正規化 | ||
* - 小文字に揃える | ||
* - 文字列のトリム | ||
*/ | ||
export function normalizeString(str: string) { | ||
const segmenter = new Intl.Segmenter(versatileLang, { granularity: 'grapheme' }); | ||
return [...segmenter.segment(str)].map(({ segment }) => segment.normalize('NFKC')).join('').toLowerCase().trim(); | ||
} | ||
|
||
// https://qiita.com/non-caffeine/items/77360dda05c8ce510084 | ||
const hyphens = [ | ||
0x002d, // hyphen-minus | ||
0x02d7, // modifier letter minus sign | ||
0x1173, // hangul jongseong eu | ||
0x1680, // ogham space mark | ||
0x1b78, // balinese musical symbol left-hand open pang | ||
0x2010, // hyphen | ||
0x2011, // non-breaking hyphen | ||
0x2012, // figure dash | ||
0x2013, // en dash | ||
0x2014, // em dash | ||
0x2015, // horizontal bar | ||
0x2043, // hyphen bullet | ||
0x207b, // superscript minus | ||
0x2212, // minus sign | ||
0x25ac, // black rectangle | ||
0x2500, // box drawings light horizontal | ||
0x2501, // box drawings heavy horizontal | ||
0x2796, // heavy minus sign | ||
0x30fc, // katakana-hiragana prolonged sound mark | ||
0x3161, // hangul letter eu | ||
0xfe58, // small em dash | ||
0xfe63, // small hyphen-minus | ||
0xff0d, // fullwidth hyphen-minus | ||
0xff70, // halfwidth katakana-hiragana prolonged sound mark | ||
0x10110, // aegean number ten | ||
0x10191, // roman uncia sign | ||
]; | ||
|
||
const hyphensCodePoints = hyphens.map(code => `\\u{${code.toString(16).padStart(4, '0')}}`); | ||
|
||
/** ハイフンを統一(ローマ字半角入力時に`ー`と`-`が判定できない問題の調整) */ | ||
export function normalizeHyphens(str: string) { | ||
return str.replace(new RegExp(`[${hyphensCodePoints.join('')}]`, 'ug'), '\u002d'); | ||
} | ||
|
||
/** | ||
* `normalizeString` に加えて、カタカナ・ローマ字をひらがなに揃え、ハイフンを統一 | ||
* | ||
* (ローマ字じゃないものもローマ字として認識され変換されるので、文字列比較の際は `normalizeString` を併用する必要あり) | ||
*/ | ||
export function normalizeStringWithHiragana(str: string) { | ||
return normalizeHyphens(toHiragana(normalizeString(str), { convertLongVowelMark: false })); | ||
} | ||
|
||
/** aとbが同じかどうか */ | ||
export function compareStringEquals(a: string, b: string) { | ||
return ( | ||
normalizeString(a) === normalizeString(b) || | ||
normalizeStringWithHiragana(a) === normalizeStringWithHiragana(b) | ||
); | ||
} | ||
|
||
/** baseにqueryが含まれているかどうか */ | ||
export function compareStringIncludes(base: string, query: string) { | ||
return ( | ||
normalizeString(base).includes(normalizeString(query)) || | ||
normalizeStringWithHiragana(base).includes(normalizeStringWithHiragana(query)) | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
/* | ||
* SPDX-FileCopyrightText: syuilo and misskey-project | ||
* SPDX-License-Identifier: AGPL-3.0-only | ||
*/ | ||
|
||
import { assert, beforeEach, describe, test } from 'vitest'; | ||
import { | ||
normalizeString, | ||
initIntlString, | ||
normalizeStringWithHiragana, | ||
compareStringEquals, | ||
compareStringIncludes, | ||
} from '@/scripts/intl-string.js'; | ||
|
||
// 共通のテストを実行するヘルパー関数 | ||
const runCommonTests = (normalizeFn: (str: string) => string) => { | ||
test('全角英数字が半角の小文字になる', () => { | ||
// ローマ字にならないようにする | ||
const input = 'B123'; | ||
const expected = 'b123'; | ||
assert.strictEqual(normalizeFn(input), expected); | ||
}); | ||
test('濁点・半濁点が正しく結合される', () => { | ||
const input = 'か\u3099'; | ||
const expected = 'が'; | ||
assert.strictEqual(normalizeFn(input), expected); | ||
}); | ||
test('小文字に揃う', () => { | ||
// ローマ字にならないようにする | ||
const input = 'tSt'; | ||
const expected = 'tst'; | ||
assert.strictEqual(normalizeFn(input), expected); | ||
}); | ||
test('文字列の前後の空白が削除される', () => { | ||
const input = ' tst '; | ||
const expected = 'tst'; | ||
assert.strictEqual(normalizeFn(input), expected); | ||
}); | ||
}; | ||
|
||
describe('normalize string', () => { | ||
runCommonTests(normalizeString); | ||
|
||
test('異体字の正規化 (ligature)', () => { | ||
const input = 'fi'; | ||
const expected = 'fi'; | ||
assert.strictEqual(normalizeString(input), expected); | ||
}); | ||
|
||
test('半角カタカナは全角に変換される', () => { | ||
const input = 'カタカナ'; | ||
const expected = 'カタカナ'; | ||
assert.strictEqual(normalizeString(input), expected); | ||
}); | ||
}); | ||
|
||
// normalizeStringWithHiraganaのテスト | ||
describe('normalize string with hiragana', () => { | ||
beforeEach(async () => { | ||
await initIntlString(true); | ||
}); | ||
|
||
// 共通テスト | ||
describe('共通のnormalizeStringテスト', () => { | ||
runCommonTests(normalizeStringWithHiragana); | ||
}); | ||
|
||
test('半角カタカナがひらがなに変換される', () => { | ||
const input = 'カタカナ'; | ||
const expected = 'かたかな'; | ||
assert.strictEqual(normalizeStringWithHiragana(input), expected); | ||
}); | ||
|
||
// normalizeStringWithHiragana特有のテスト | ||
test('カタカナがひらがなに変換される・伸ばし棒はハイフンに変換される', () => { | ||
const input = 'カタカナひーらがーな'; | ||
const expected = 'かたかなひ-らが-な'; | ||
assert.strictEqual(normalizeStringWithHiragana(input), expected); | ||
}); | ||
|
||
test('ローマ字がひらがなに変換される', () => { | ||
const input = 'ro-majimohiragananinarimasu'; | ||
const expected = 'ろ-まじもひらがなになります'; | ||
assert.strictEqual(normalizeStringWithHiragana(input), expected); | ||
}); | ||
}); | ||
|
||
describe('compareStringEquals', () => { | ||
beforeEach(async () => { | ||
await initIntlString(true); | ||
}); | ||
|
||
test('完全一致ならtrue', () => { | ||
assert.isTrue(compareStringEquals('テスト', 'テスト')); | ||
}); | ||
|
||
test('大文字・小文字の違いを無視', () => { | ||
assert.isTrue(compareStringEquals('TeSt', 'test')); | ||
}); | ||
|
||
test('全角・半角の違いを無視', () => { | ||
assert.isTrue(compareStringEquals('ABC', 'abc')); | ||
}); | ||
|
||
test('カタカナとひらがなの違いを無視', () => { | ||
assert.isTrue(compareStringEquals('カタカナ', 'かたかな')); | ||
}); | ||
|
||
test('ローマ字をひらがなと比較可能', () => { | ||
assert.isTrue(compareStringEquals('hiragana', 'ひらがな')); | ||
}); | ||
|
||
test('異なる文字列はfalse', () => { | ||
assert.isFalse(compareStringEquals('テスト', 'サンプル')); | ||
}); | ||
}); | ||
|
||
describe('compareStringIncludes', () => { | ||
test('部分一致ならtrue', () => { | ||
assert.isTrue(compareStringIncludes('これはテストです', 'テスト')); | ||
}); | ||
|
||
test('大文字・小文字の違いを無視', () => { | ||
assert.isTrue(compareStringIncludes('This is a Test', 'test')); | ||
}); | ||
|
||
test('全角・半角の違いを無視', () => { | ||
assert.isTrue(compareStringIncludes('ABCDE', 'abc')); | ||
}); | ||
|
||
test('カタカナとひらがなの違いを無視', () => { | ||
assert.isTrue(compareStringIncludes('カタカナのテスト', 'かたかな')); | ||
}); | ||
|
||
test('ローマ字をひらがなと比較可能', () => { | ||
assert.isTrue(compareStringIncludes('これはhiraganaのテスト', 'ひらがな')); | ||
}); | ||
|
||
test('異なる文字列はfalse', () => { | ||
assert.isFalse(compareStringIncludes('これはテストです', 'サンプル')); | ||
}); | ||
}); |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
f35eb0f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Chromatic detects changes. Please review the changes on Chromatic.