Skip to content

Commit 4f107ef

Browse files
committed
debug l10n of XML fragments: metanorma/bipm-si-brochure#192
1 parent e643699 commit 4f107ef

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

lib/isodoc/i18n.rb

+7-6
Original file line numberDiff line numberDiff line change
@@ -98,23 +98,24 @@ def bidiwrap_vars(lang, script)
9898
end
9999

100100
def l10n_zh(text)
101-
xml = Nokogiri::HTML::DocumentFragment.parse(text)
101+
xml = Nokogiri::XML::DocumentFragment.parse(text)
102102
xml.traverse do |n|
103103
next unless n.text?
104104

105105
n.replace(cleanup_entities(l10_zh1(n.text), is_xml: false))
106106
end
107-
xml.to_xml.gsub(/<b>/, "").gsub("</b>", "").gsub(/<\?[^>]+>/, "")
107+
xml.to_xml(encoding: "UTF-8").gsub(/<b>/, "").gsub("</b>", "")
108+
.gsub(/<\?[^>]+>/, "")
108109
end
109110

110111
def l10n_fr(text, locale)
111-
xml = Nokogiri::HTML::DocumentFragment.parse(text)
112+
xml = Nokogiri::XML::DocumentFragment.parse(text)
112113
xml.traverse do |n|
113114
next unless n.text?
114115

115116
n.replace(cleanup_entities(l10n_fr1(n.text, locale), is_xml: false))
116117
end
117-
xml.to_xml
118+
xml.to_xml(encoding: "UTF-8")
118119
end
119120

120121
ZH_CHAR = "\\p{Han}|\\p{In CJK Symbols And Punctuation}|"\
@@ -171,11 +172,11 @@ def cleanup_entities(text, is_xml: true)
171172
c = HTMLEntities.new
172173
if is_xml
173174
text.split(/([<>])/).each_slice(4).map do |a|
174-
a[0] = c.encode(c.decode(a[0]), :hexadecimal)
175+
a[0] = c.decode(a[0])
175176
a
176177
end.join
177178
else
178-
c.encode(c.decode(text), :hexadecimal)
179+
c.decode(text)
179180
end
180181
end
181182

lib/isodoc/version.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module IsoDoc
22
class I18n
3-
VERSION = "1.1.1".freeze
3+
VERSION = "1.1.2".freeze
44
end
55
end

0 commit comments

Comments
 (0)