@@ -719,11 +719,11 @@ struct RowBreakCandidates {
719
719
/// is always the primary candidate.
720
720
space : Option < usize > ,
721
721
722
- /// Logograms (single character representing a whole word) are good candidates for line break.
723
- logogram : Option < usize > ,
722
+ /// Logograms (single character representing a whole word) or kana (Japanese hiragana and katakana) are good candidates for line break.
723
+ cjk : Option < usize > ,
724
724
725
- /// Kana (Japanese hiragana and katakana) may be line broken unless before a gyōtō kinsoku character.
726
- kana : Option < usize > ,
725
+ /// Breaking anywhere before a CJK character is acceptable too .
726
+ pre_cjk : Option < usize > ,
727
727
728
728
/// Breaking at a dash is a super-
729
729
/// good idea.
@@ -744,37 +744,38 @@ impl RowBreakCandidates {
744
744
const NON_BREAKING_SPACE : char = '\u{A0}' ;
745
745
if chr. is_whitespace ( ) && chr != NON_BREAKING_SPACE {
746
746
self . space = Some ( index) ;
747
- } else if is_cjk_ideograph ( chr) {
748
- self . logogram = Some ( index) ;
747
+ } else if is_cjk ( chr) && ( glyphs . len ( ) == 1 || is_cjk_break_allowed ( glyphs [ 1 ] . chr ) ) {
748
+ self . cjk = Some ( index) ;
749
749
} else if chr == '-' {
750
750
self . dash = Some ( index) ;
751
751
} else if chr. is_ascii_punctuation ( ) {
752
752
self . punctuation = Some ( index) ;
753
- } else if is_kana ( chr ) && ( glyphs. len ( ) == 1 || ! is_gyoto_kinsoku ( glyphs[ 1 ] . chr ) ) {
754
- self . kana = Some ( index) ;
753
+ } else if glyphs. len ( ) > 1 && is_cjk ( glyphs[ 1 ] . chr ) {
754
+ self . pre_cjk = Some ( index) ;
755
755
}
756
756
self . any = Some ( index) ;
757
757
}
758
758
759
- fn has_word_boundary ( & self ) -> bool {
760
- self . space . is_some ( ) || self . logogram . is_some ( )
759
+ fn word_boundary ( & self ) -> Option < usize > {
760
+ [ self . space , self . cjk , self . pre_cjk ]
761
+ . into_iter ( )
762
+ . max ( )
763
+ . flatten ( )
761
764
}
762
765
763
766
fn has_good_candidate ( & self , break_anywhere : bool ) -> bool {
764
767
if break_anywhere {
765
768
self . any . is_some ( )
766
769
} else {
767
- self . has_word_boundary ( )
770
+ self . word_boundary ( ) . is_some ( )
768
771
}
769
772
}
770
773
771
774
fn get ( & self , break_anywhere : bool ) -> Option < usize > {
772
775
if break_anywhere {
773
776
self . any
774
777
} else {
775
- self . space
776
- . or ( self . kana )
777
- . or ( self . logogram )
778
+ self . word_boundary ( )
778
779
. or ( self . dash )
779
780
. or ( self . punctuation )
780
781
. or ( self . any )
@@ -796,10 +797,15 @@ fn is_kana(c: char) -> bool {
796
797
}
797
798
798
799
#[ inline]
799
- fn is_gyoto_kinsoku ( c : char ) -> bool {
800
- // Gyōtō (meaning "beginning of line") kinsoku characters in Japanese typesetting are characters that may not appear at the start of a line, according to kinsoku shori rules.
801
- // The list of gyōtō kinsoku characters can be found at https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
802
- ")]}〕〉》」』】〙〗〟'\" ⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。." . contains ( c)
800
+ fn is_cjk ( c : char ) -> bool {
801
+ // TODO: Add support for Korean Hangul.
802
+ is_cjk_ideograph ( c) || is_kana ( c)
803
+ }
804
+
805
+ #[ inline]
806
+ fn is_cjk_break_allowed ( c : char ) -> bool {
807
+ // See: https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages#Characters_not_permitted_on_the_start_of_a_line.
808
+ !")]}〕〉》」』】〙〗〟'\" ⦆»ヽヾーァィゥェォッャュョヮヵヶぁぃぅぇぉっゃゅょゎゕゖㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ々〻‐゠–〜?!‼⁇⁈⁉・、:;,。." . contains ( c)
803
809
}
804
810
805
811
// ----------------------------------------------------------------------------
@@ -812,3 +818,41 @@ fn test_zero_max_width() {
812
818
let galley = super :: layout ( & mut fonts, layout_job. into ( ) ) ;
813
819
assert_eq ! ( galley. rows. len( ) , 1 ) ;
814
820
}
821
+
822
+ #[ test]
823
+ fn test_cjk ( ) {
824
+ let mut fonts = FontsImpl :: new ( 1.0 , 1024 , super :: FontDefinitions :: default ( ) ) ;
825
+ let mut layout_job = LayoutJob :: single_section (
826
+ "日本語とEnglishの混在した文章" . into ( ) ,
827
+ super :: TextFormat :: default ( ) ,
828
+ ) ;
829
+ layout_job. wrap . max_width = 90.0 ;
830
+ let galley = super :: layout ( & mut fonts, layout_job. into ( ) ) ;
831
+ assert_eq ! (
832
+ galley
833
+ . rows
834
+ . iter( )
835
+ . map( |row| row. glyphs. iter( ) . map( |g| g. chr) . collect:: <String >( ) )
836
+ . collect:: <Vec <_>>( ) ,
837
+ vec![ "日本語と" , "Englishの混在" , "した文章" ]
838
+ ) ;
839
+ }
840
+
841
+ #[ test]
842
+ fn test_pre_cjk ( ) {
843
+ let mut fonts = FontsImpl :: new ( 1.0 , 1024 , super :: FontDefinitions :: default ( ) ) ;
844
+ let mut layout_job = LayoutJob :: single_section (
845
+ "日本語とEnglishの混在した文章" . into ( ) ,
846
+ super :: TextFormat :: default ( ) ,
847
+ ) ;
848
+ layout_job. wrap . max_width = 100.0 ;
849
+ let galley = super :: layout ( & mut fonts, layout_job. into ( ) ) ;
850
+ assert_eq ! (
851
+ galley
852
+ . rows
853
+ . iter( )
854
+ . map( |row| row. glyphs. iter( ) . map( |g| g. chr) . collect:: <String >( ) )
855
+ . collect:: <Vec <_>>( ) ,
856
+ vec![ "日本語とEnglish" , "の混在した文章" ]
857
+ ) ;
858
+ }
0 commit comments