Skip to content

Commit b1f5d15

Browse files
committed
1 parent 243d348 commit b1f5d15

File tree

2 files changed

+57
-9
lines changed

2 files changed

+57
-9
lines changed

lib/isodoc/i18n.rb

+36-8
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ def set(key, val)
5454
@labels[key] = val
5555
end
5656

57-
def initialize(lang, script, i18nyaml: nil, i18nhash: nil)
57+
def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
5858
@lang = lang
5959
@script = script
60+
@locale = locale
6061
y = load_yaml(lang, script, i18nyaml, i18nhash)
6162
@labels = y
6263
@labels["language"] = @lang
@@ -66,17 +67,16 @@ def initialize(lang, script, i18nyaml: nil, i18nhash: nil)
6667
end
6768
end
6869

69-
def self.l10n(text, lang = @lang, script = @script)
70-
l10n(text, lang, script)
70+
def self.l10n(text, lang = @lang, script = @script, locale = @locale)
71+
l10n(text, lang, script, locale)
7172
end
7273

73-
# TODO: move to localization file
7474
# function localising spaces and punctuation.
7575
# Not clear if period needs to be localised for zh
76-
def l10n(text, lang = @lang, script = @script)
77-
if lang == "zh" && script == "Hans" then l10n_zh(text)
78-
else bidiwrap(text, lang, script)
79-
end
76+
def l10n(text, lang = @lang, script = @script, locale = @locale)
77+
lang == "zh" && script == "Hans" and text = l10n_zh(text)
78+
lang == "fr" && text = l10n_fr(text, locale || "FR")
79+
bidiwrap(text, lang, script)
8080
end
8181

8282
def bidiwrap(text, lang, script)
@@ -107,23 +107,51 @@ def l10n_zh(text)
107107
xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
108108
end
109109

110+
def l10n_fr(text, locale)
111+
xml = Nokogiri::HTML::DocumentFragment.parse(text)
112+
xml.traverse do |n|
113+
next unless n.text?
114+
115+
n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116+
end
117+
xml.to_xml
118+
end
119+
110120
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
111121
"\\p{In Halfwidth And Fullwidth Forms}".freeze
112122

113123
# note: we can't differentiate comma from enumeration comma 、
114124
def l10_zh1(text)
125+
l10n_zh_remove_space(l10n_zh_punct(text))
126+
end
127+
128+
def l10n_zh_punct(text)
115129
["::", ",,", ".。", "))", "]】", "::", ";;", "??", "!!"].each do |m|
116130
text = text.gsub(/(?<=#{ZH_CHAR})#{Regexp.quote m[0]}/, m[1])
131+
text = text.gsub(/^#{Regexp.quote m[0]}/, m[1])
117132
end
118133
["((", "[【"].each do |m|
119134
text = text.gsub(/#{Regexp.quote m[0]}(?=#{ZH_CHAR})/, m[1])
120135
end
136+
text
137+
end
138+
139+
def l10n_zh_remove_space(text)
121140
text.gsub(/(?<=#{ZH_CHAR}) (?=#{ZH_CHAR})/o, "")
122141
.gsub(/(?<=\d) (?=#{ZH_CHAR})/o, "")
123142
.gsub(/(?<=#{ZH_CHAR}) (?=\d)/o, "")
124143
.gsub(/(?<=#{ZH_CHAR}) (?=[A-Za-z](#{ZH_CHAR}|$))/o, "")
125144
end
126145

146+
def l10n_fr1(text, locale)
147+
text = text.gsub(/(?<=\p{Alnum})([»›;?!])/, "\u202f\\1")
148+
text = text.gsub(/^([»›;?!])/, "\u202f\\1")
149+
text = text.gsub(/([«‹])/, "\\1\u202f")
150+
colonsp = locale == "CH" ? "\u202f" : "\u00a0"
151+
text = text.gsub(/(?<=\p{Alnum})(:)/, "#{colonsp}\\1")
152+
text.gsub(/^(:)/, "#{colonsp}\\1")
153+
end
154+
127155
def boolean_conj(list, conn)
128156
case list.size
129157
when 0 then ""

spec/isodoc/base_spec.rb

+21-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
end
3939

4040
it "loads language hash overrides" do
41-
c = IsoDoc::I18n.new("en", "Latn", i18nhash: YAML.load_file("spec/assets/new.yaml"))
41+
c = IsoDoc::I18n.new("en", "Latn",
42+
i18nhash: YAML.load_file("spec/assets/new.yaml"))
4243
expect(c.text).to eq "text2"
4344
expect(c.at).to eq "at"
4445
expect(c.hash.to_s).to be_equivalent_to '{"key1"=>"val1", "key2"=>"val2"}'
@@ -85,6 +86,25 @@
8586
.to be_equivalent_to "&#x61c;Code (hello, world.)&#x61c;"
8687
end
8788

89+
it "does French localisation" do
90+
e = HTMLEntities.new
91+
c = IsoDoc::I18n.new("fr", "Latn")
92+
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
93+
.to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
94+
"and&#xa0;: code&#x202f;!"
95+
expect(e.encode(c.l10n("Code; &#xab;code&#xbb; and: code!"), :hexadecimal))
96+
.to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
97+
"and&#xa0;: code&#x202f;!"
98+
c = IsoDoc::I18n.new("fr", "Latn", locale: "FR")
99+
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
100+
.to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
101+
"and&#xa0;: code&#x202f;!"
102+
c = IsoDoc::I18n.new("fr", "Latn", locale: "CH")
103+
expect(e.encode(c.l10n("Code; «code» and: code!"), :hexadecimal))
104+
.to be_equivalent_to "Code&#x202f;; &#xab;&#x202f;code&#x202f;&#xbb; "\
105+
"and&#x202f;: code&#x202f;!"
106+
end
107+
88108
it "does boolean conjunctions" do
89109
c = IsoDoc::I18n.new("en", "Latn")
90110
expect(c.boolean_conj([], "and")).to eq ""

0 commit comments

Comments
 (0)