Skip to content

Commit 0e62c0e

Browse files
authored
Improve mixed CJK/Latin linebreaking. (#1986)
1 parent 5500895 commit 0e62c0e

File tree

1 file changed

+62
-18
lines changed

1 file changed

+62
-18
lines changed

crates/epaint/src/text/text_layout.rs

+62-18
Original file line numberDiff line numberDiff line change
@@ -719,11 +719,11 @@ struct RowBreakCandidates {
719719
/// is always the primary candidate.
720720
space: Option<usize>,
721721

722-
/// Logograms (single character representing a whole word) are good candidates for line break.
723-
logogram: Option<usize>,
722+
/// Logograms (single character representing a whole word) or kana (Japanese hiragana and katakana) are good candidates for line break.
723+
cjk: Option<usize>,
724724

725-
/// Kana (Japanese hiragana and katakana) may be line broken unless before a gyōtō kinsoku character.
726-
kana: Option<usize>,
725+
/// Breaking anywhere before a CJK character is acceptable too.
726+
pre_cjk: Option<usize>,
727727

728728
/// Breaking at a dash is a super-
729729
/// good idea.
@@ -744,37 +744,38 @@ impl RowBreakCandidates {
744744
const NON_BREAKING_SPACE: char = '\u{A0}';
745745
if chr.is_whitespace() && chr != NON_BREAKING_SPACE {
746746
self.space = Some(index);
747-
} else if is_cjk_ideograph(chr) {
748-
self.logogram = Some(index);
747+
} else if is_cjk(chr) && (glyphs.len() == 1 || is_cjk_break_allowed(glyphs[1].chr)) {
748+
self.cjk = Some(index);
749749
} else if chr == '-' {
750750
self.dash = Some(index);
751751
} else if chr.is_ascii_punctuation() {
752752
self.punctuation = Some(index);
753-
} else if is_kana(chr) && (glyphs.len() == 1 || !is_gyoto_kinsoku(glyphs[1].chr)) {
754-
self.kana = Some(index);
753+
} else if glyphs.len() > 1 && is_cjk(glyphs[1].chr) {
754+
self.pre_cjk = Some(index);
755755
}
756756
self.any = Some(index);
757757
}
758758

759-
fn has_word_boundary(&self) -> bool {
760-
self.space.is_some() || self.logogram.is_some()
759+
fn word_boundary(&self) -> Option<usize> {
760+
[self.space, self.cjk, self.pre_cjk]
761+
.into_iter()
762+
.max()
763+
.flatten()
761764
}
762765

763766
fn has_good_candidate(&self, break_anywhere: bool) -> bool {
764767
if break_anywhere {
765768
self.any.is_some()
766769
} else {
767-
self.has_word_boundary()
770+
self.word_boundary().is_some()
768771
}
769772
}
770773

771774
fn get(&self, break_anywhere: bool) -> Option<usize> {
772775
if break_anywhere {
773776
self.any
774777
} else {
775-
self.space
776-
.or(self.kana)
777-
.or(self.logogram)
778+
self.word_boundary()
778779
.or(self.dash)
779780
.or(self.punctuation)
780781
.or(self.any)
@@ -796,10 +797,15 @@ fn is_kana(c: char) -> bool {
796797
}
797798

798799
#[inline]
799-
fn is_gyoto_kinsoku(c: char) -> bool {
800-
// Gyōtō (meaning "beginning of line") kinsoku characters in Japanese typesetting are characters that may not appear at the start of a line, according to kinsoku shori rules.
801-
// The list of gyōtō kinsoku characters can be found at https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
802-
")]}〕〉》」』】〙〗〟'\"⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.".contains(c)
800+
fn is_cjk(c: char) -> bool {
801+
// TODO: Add support for Korean Hangul.
802+
is_cjk_ideograph(c) || is_kana(c)
803+
}
804+
805+
#[inline]
806+
fn is_cjk_break_allowed(c: char) -> bool {
807+
// See: https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
808+
!")]}〕〉》」』】〙〗〟'\"⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。.".contains(c)
803809
}
804810

805811
// ----------------------------------------------------------------------------
@@ -812,3 +818,41 @@ fn test_zero_max_width() {
812818
let galley = super::layout(&mut fonts, layout_job.into());
813819
assert_eq!(galley.rows.len(), 1);
814820
}
821+
822+
#[test]
823+
fn test_cjk() {
824+
let mut fonts = FontsImpl::new(1.0, 1024, super::FontDefinitions::default());
825+
let mut layout_job = LayoutJob::single_section(
826+
"日本語とEnglishの混在した文章".into(),
827+
super::TextFormat::default(),
828+
);
829+
layout_job.wrap.max_width = 90.0;
830+
let galley = super::layout(&mut fonts, layout_job.into());
831+
assert_eq!(
832+
galley
833+
.rows
834+
.iter()
835+
.map(|row| row.glyphs.iter().map(|g| g.chr).collect::<String>())
836+
.collect::<Vec<_>>(),
837+
vec!["日本語と", "Englishの混在", "した文章"]
838+
);
839+
}
840+
841+
#[test]
842+
fn test_pre_cjk() {
843+
let mut fonts = FontsImpl::new(1.0, 1024, super::FontDefinitions::default());
844+
let mut layout_job = LayoutJob::single_section(
845+
"日本語とEnglishの混在した文章".into(),
846+
super::TextFormat::default(),
847+
);
848+
layout_job.wrap.max_width = 100.0;
849+
let galley = super::layout(&mut fonts, layout_job.into());
850+
assert_eq!(
851+
galley
852+
.rows
853+
.iter()
854+
.map(|row| row.glyphs.iter().map(|g| g.chr).collect::<String>())
855+
.collect::<Vec<_>>(),
856+
vec!["日本語とEnglish", "の混在した文章"]
857+
);
858+
}

0 commit comments

Comments
 (0)