Skip to content

Commit

Permalink
Bugfix: fix error where digits are categorized as emoji
Browse files Browse the repository at this point in the history
This is a bug in `unic_emoji_char`. See open-i18n/rust-unic#280 for details. I make this change because ASCII should never be an emoji.
  • Loading branch information
kkew3 committed Feb 15, 2025
1 parent 4994982 commit 22c4d8a
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions pythonx/jieba_vim_rs_core/src/token/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ pub fn categorize_char(c: char, word_predicate: &WordPredicate) -> CharType {
match c {
SPACE!() => CharType::Space,
COMBINING_DIACRITICAL_MARK!() => CharType::CombiningDiacriticalMark,
c if unic_emoji_char::is_emoji(c) => CharType::Emoji,
c => match ascii_or(c) {
Ok(ascii) => {
if word_predicate.is_ascii_word(ascii) {
Expand All @@ -214,7 +213,9 @@ pub fn categorize_char(c: char, word_predicate: &WordPredicate) -> CharType {
CharType::Word(WordCharType::Other)
}
c => {
if c.is_alphabetic()
if unic_emoji_char::is_emoji(c) {
CharType::Emoji
} else if c.is_alphabetic()
&& word_predicate.is_unicode_alphabet_word()
{
CharType::Word(WordCharType::Other)
Expand Down

0 comments on commit 22c4d8a

Please sign in to comment.