Class: IsoDoc::I18n

Inherits:
Object
  • Object
show all
Defined in:
lib/isodoc/date.rb,
lib/isodoc/i18n.rb,
lib/isodoc/l10n.rb,
lib/isodoc/l10n_cjk.rb,
lib/isodoc/i18n-yaml.rb,
lib/isodoc/i18n/version.rb,
lib/isodoc/liquid/liquid.rb

Defined Under Namespace

Modules: Liquid

Constant Summary collapse

CJK_SCRIPTS =
%w(Hans Hant Jpan Kore).freeze
INFLECTIONS =
{
  number: "sg",
  case: "nom",
  gender: "m",
  person: "3rd",
  voice: "act",
  mood: "ind",
  tense: "pres",
}.freeze
INFLECTION_ORDER =
%i(voice mood tense number case gender person).freeze
ZH_CHAR =

Use comprehensive CJK definition from metanorma-utils This includes Han, Katakana, Hiragana, Hangul, Bopomofo and all CJK extensions

"(#{Metanorma::Utils::CJK})".freeze
LATIN_PUNCT =
/[:,.()\[\];?!-]/.freeze
ZH_NON_PUNCT =

CJK character which is not punctuation

"(#{
[
  Metanorma::Utils.singleton_class::HAN,
  Metanorma::Utils.singleton_class::HAN_IDC,
  Metanorma::Utils.singleton_class::KANBUN,
  Metanorma::Utils.singleton_class::CJK_COMPAT_IDEOGRAPHS,
  Metanorma::Utils.singleton_class::HAN_COMPAT_IDEOGRAPHS,
  Metanorma::Utils.singleton_class::HANGUL,
  Metanorma::Utils.singleton_class::HIRAGANA,
  Metanorma::Utils.singleton_class::KATAKANA,
  Metanorma::Utils.singleton_class::BOPOMOFO,
].join("|")})".freeze
ZH1_PUNCT =

Condition for converting punctuation to double width, in case of options

  1. (Strict condition) CJK before, CJK after, modulo ignorable characters:

1a. CJK character, or start of string. Latin spaces optional.

/(#{ZH_CHAR}|^)(\s*)$/xo.freeze
ZH2_PUNCT =

1b. Latin spaces optional, Latin punct which will also convert to CJK, CJK character, or end of string.

/^\s*#{LATIN_PUNCT}*(#{ZH_CHAR}|$)/xo.freeze
ZH1_NO_SPACE =
  1. CJK before, space after:

2a. CJK char, followed by optional Latin punct which will also convert to CJK

/#{ZH_CHAR}#{LATIN_PUNCT}*$/xo.freeze
OPT_PUNCT_SPACE =

2b. optional Latin punct which wil also convert to CJK, then space

/^($|#{LATIN_PUNCT}*\s)/xo.freeze
ZH_NUMERALS =

Chinese numerals (common + formal/financial forms) Explicit characters needed because Chinese numeral ideographs are not tagged with Unicode Number property Using alternation instead of character class to properly include pN

"(?:[零一二三四五六七八九十百千万亿壹贰叁肆伍陆柒捌玖拾佰仟萬億兆]|\\p{N})".freeze
ZH1_DASH =

Contexts for converting en-dashes to full-width Before: CJK or start of string, no digits

/(#{ZH_CHAR}|^)(?<!=#{ZH_NUMERALS})$/xo.freeze
ZH2_DASH =

After: no optional digits, CJK or end of string

/^(?!#{ZH_NUMERALS})(#{ZH_CHAR}|$)/xo.freeze
ZH1_NUM_DASH =

Before: CJK or start of string, optional digits

/#{ZH_NUMERALS}$/xo.freeze
ZH2_NUM_DASH =

After: optional digits, CJK or end of string

/^#{ZH_NUMERALS}/xo.freeze
ZH_PUNCT_CONTEXTS =
[[ZH1_PUNCT, ZH2_PUNCT], [ZH1_NO_SPACE, OPT_PUNCT_SPACE],
[/(\s|^)$/, /^#{ZH_CHAR}/o]].freeze
ZH_PUNCT_AUTOTEXT =

map of YAML punct keys to auto-text Latin equivalents

{
  colon: ":",
  comma: ",",
  # "enum-comma": ",", # enum-comma is ambiguous with comma
  semicolon: ";",
  period: ".",
  "close-paren": ")",
  "open-paren": "(",
  "close-bracket": "]",
  "open-bracket": "[",
  "question-mark": "?",
  "exclamation-mark": "!",
  "em-dash": "",
  "open-quote": "",
  "close-quote": "",
  "open-nested-quote": "",
  "close-nested-quote": "",
  ellipse: "",
}.freeze
VERSION =
"1.4.4".freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil) ⇒ I18n

Returns a new instance of I18n.



16
17
18
19
20
21
22
23
24
25
26
# File 'lib/isodoc/i18n.rb', line 16

def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
  @lang = lang
  @script = script
  @locale = locale
  @cal = calendar_data
  @cal_en = TwitterCldr::Shared::Calendar.new(:en)
  @c = HTMLEntities.new
  init_labels(i18nyaml, i18nhash)
  liquid_init
  self
end

Instance Attribute Details

#labelsObject

Returns the value of attribute labels.



13
14
15
# File 'lib/isodoc/i18n.rb', line 13

def labels
  @labels
end

#langObject (readonly)

Returns the value of attribute lang.



14
15
16
# File 'lib/isodoc/i18n.rb', line 14

def lang
  @lang
end

#localeObject (readonly)

Returns the value of attribute locale.



14
15
16
# File 'lib/isodoc/i18n.rb', line 14

def locale
  @locale
end

#scriptObject (readonly)

Returns the value of attribute script.



14
15
16
# File 'lib/isodoc/i18n.rb', line 14

def script
  @script
end

Class Method Details

.cjk_extend(text) ⇒ Object



141
142
143
# File 'lib/isodoc/l10n_cjk.rb', line 141

def self.cjk_extend(text)
  cjk_extend(text)
end

.l10n(text, lang = @lang, script = @script, options = {}) ⇒ Object



6
7
8
# File 'lib/isodoc/l10n.rb', line 6

def self.l10n(text, lang = @lang, script = @script, options = {})
  l10n(text, lang, script, options)
end

Instance Method Details

#am_pm_i18n(val) ⇒ Object



19
20
21
22
23
24
# File 'lib/isodoc/date.rb', line 19

def am_pm_i18n(val)
  val.gsub(/%\u200cP<am>/, @cal.periods[:am].downcase)
    .gsub(/%\u200cP<pm>/, @cal.periods[:pm].downcase)
    .gsub(/%\u200cp<AM>/, @cal.periods[:am].upcase)
    .gsub(/%\u200cp<PM>/, @cal.periods[:pm].upcase)
end

#bidiwrap(text, lang, script) ⇒ Object



24
25
26
27
28
29
30
31
32
# File 'lib/isodoc/l10n.rb', line 24

def bidiwrap(text, lang, script)
  my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
  if my_rtl && !outer_rtl
    mark = %w(Arab Aran).include?(my_script) ? "&#x61c;" : "&#x200f;"
    "#{mark}#{text}#{mark}"
  elsif !my_rtl && outer_rtl then "&#x200e;#{text}&#x200e;"
  else text
  end
end

#bidiwrap_vars(lang, script) ⇒ Object



34
35
36
37
38
39
40
# File 'lib/isodoc/l10n.rb', line 34

def bidiwrap_vars(lang, script)
  my_script = script || Metanorma::Utils.default_script(lang)
  [my_script,
   Metanorma::Utils.rtl_script?(my_script),
   Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
     .default_script(@lang))]
end

#boolean_conj(list, conn) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/isodoc/i18n.rb', line 58

def boolean_conj(list, conn)
  case list.size
  when 0 then ""
  when 1 then list.first
  when 2 then @labels["binary_#{conn}"].sub(/%1/, list[0])
    .sub(/%2/, list[1])
  else
    @labels["multiple_#{conn}"]
      .sub(/%1/, l10n(list[0..-2].join(enum_comma), @lang, @script))
      .sub(/%2/, list[-1])
  end
end

#build_esc_indices(xml, text_nodes) ⇒ Object

Build set of indices for text nodes within <esc> tags Handles both namespaced and non-namespaced <esc> elements



56
57
58
59
60
61
62
63
64
65
# File 'lib/isodoc/l10n.rb', line 56

def build_esc_indices(xml, text_nodes)
  # Try both non-namespaced and namespace-agnostic queries
  esc_text_nodes = Set.new(xml.xpath(".//esc//text()") +
                           xml.xpath(".//*[local-name()='esc']//text()"))
  Set.new.tap do |indices|
    text_nodes.each_with_index do |node, i|
      indices.add(i) if esc_text_nodes.include?(node)
    end
  end
end

#build_text_cache(text_nodes, prev_context = nil, foll_context = nil) ⇒ Object

Cache text content once per method call to avoid repeated .text calls Build text cache with optional prepended/appended context Also, reduce multiple spaces to single, to avoid miscrecognition of space



70
71
72
73
74
75
# File 'lib/isodoc/l10n.rb', line 70

def build_text_cache(text_nodes, prev_context = nil, foll_context = nil)
  text_cache = text_nodes.map(&:text).map { |x| x.gsub(/\s+/, " ") }
  text_cache.unshift(prev_context) if prev_context
  text_cache.push(foll_context) if foll_context
  text_cache
end

#calendar_dataObject



34
35
36
37
38
# File 'lib/isodoc/i18n.rb', line 34

def calendar_data
  TwitterCldr::Shared::Calendar.new(tw_cldr_lang)
rescue StandardError
  TwitterCldr::Shared::Calendar.new(:en)
end

#cjk_extend(title) ⇒ Object



145
146
147
148
149
150
151
152
# File 'lib/isodoc/l10n_cjk.rb', line 145

def cjk_extend(title)
  @c.decode(title).chars.map.with_index do |n, i|
    if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
      n
    else "\u3000#{n}"
    end
  end.join
end

#cleanup_entities(text, is_xml: true) ⇒ Object



78
79
80
81
82
83
84
85
86
87
# File 'lib/isodoc/i18n.rb', line 78

def cleanup_entities(text, is_xml: true)
  if is_xml
    text.split(/([<>])/).each_slice(4).map do |a|
      a[0] = @c.decode(a[0])
      a
    end.join
  else
    @c.decode(text)
  end
end

#convert_date_format(fmt) ⇒ Object



10
11
12
13
# File 'lib/isodoc/date.rb', line 10

def convert_date_format(fmt)
  fmt.gsub(/%_/, " ")
    .gsub(/%(\^?)([BbhPpAa])/, "%\u200c\\1\\2<%\\2>")
end

#date(value, format) ⇒ Object



5
6
7
8
# File 'lib/isodoc/date.rb', line 5

def date(value, format)
  date_i18n(DateTime.iso8601(value)
    .strftime(convert_date_format(format)))
end

#date_i18n(val) ⇒ Object



15
16
17
# File 'lib/isodoc/date.rb', line 15

def date_i18n(val)
  day_i18n(month_i18n(am_pm_i18n(val)))
end

#day_i18n(val) ⇒ Object



37
38
39
40
41
42
43
44
45
46
# File 'lib/isodoc/date.rb', line 37

def day_i18n(val)
  { A: :wide, a: :abbreviated }.each do |f, t|
    @cal_en.calendar_data[:days][:format][t].each do |k, v|
      m = @cal.calendar_data[:days][:format][t][k]
      val.gsub!(/%\u200c#{f}<#{v}>/, m)
      val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase)
    end
  end
  val
end

#enum_commaObject



71
72
73
74
75
76
# File 'lib/isodoc/i18n.rb', line 71

def enum_comma
  c = @labels.dig("punct", "enum-comma")
  c && CJK_SCRIPTS.include?(@script) and
    return "<enum-comma>#{c}</enum-comma>"
  "<enum-comma>,</enum-comma> "
end

#getObject



129
130
131
# File 'lib/isodoc/i18n-yaml.rb', line 129

def get
  @labels
end

#inflect(word, options) ⇒ Object

can skip category if not present



132
133
134
135
136
137
138
139
140
141
# File 'lib/isodoc/i18n.rb', line 132

def inflect(word, options)
  i = @labels.dig("inflection", word) or return word
  i.is_a? String and return i
  INFLECTION_ORDER.each do |x|
    infl = options[x] || INFLECTIONS[x]
    i = i[infl] if i[infl]
    i.is_a? String and return i
  end
  word
end

#inflect_ordinal(num, term, ord_class) ⇒ Object

ord class is either SpelloutRules or OrdinalRules



90
91
92
93
94
95
96
97
98
# File 'lib/isodoc/i18n.rb', line 90

def inflect_ordinal(num, term, ord_class)
  lbl = if @labels["ordinal_keys"].nil? || @labels["ordinal_keys"].empty?
          @labels[ord_class]
        else @labels[ord_class][ordinal_key(term)]
        end
  tw_cldr_localize(num).to_rbnf_s(ord_class, lbl)
rescue StandardError
  num.localize(@lang.to_sym).to_rbnf_s(ord_class, lbl)
end

#init_labels(i18nyaml, i18nhash) ⇒ Object



40
41
42
43
44
45
46
47
48
49
# File 'lib/isodoc/i18n.rb', line 40

def init_labels(i18nyaml, i18nhash)
  @labels = load_yaml(@lang, @script, i18nyaml, i18nhash)
  @labels["language"] = @lang
  @labels["script"] = @script
  @labels.each_key do |k|
    methods.include?(k.downcase.to_sym) ||
      self.class.methods.include?(k.downcase.to_sym) and next
    self.class.send(:define_method, k.downcase) { get[k] }
  end
end

#init_zh_punct_mapObject

Pre-defined punctuation mappings for efficiency



78
79
80
81
82
83
# File 'lib/isodoc/l10n_cjk.rb', line 78

def init_zh_punct_map
  ZH_PUNCT_AUTOTEXT.each_with_object([]) do |(k, v), m|
    @labels.dig("punct", k.to_s) or next
    m << [v, @labels["punct"][k.to_s], ZH_PUNCT_CONTEXTS]
  end
end

#interleave_space_cjk?(text) ⇒ Boolean

Returns:

  • (Boolean)


154
155
156
157
158
159
160
161
162
163
# File 'lib/isodoc/l10n_cjk.rb', line 154

def interleave_space_cjk?(text)
  text.size == 2 or return
  ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
   "\u22ef\u22ef"].include?(text) ||
    /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
    /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
    /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
    /[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
  true
end

#l10_context_valid?(context, idx, delim, regex) ⇒ Boolean

Returns:

  • (Boolean)


132
133
134
135
136
137
138
139
# File 'lib/isodoc/l10n.rb', line 132

def l10_context_valid?(context, idx, delim, regex)
  l10n_context_found_delimiter?(context[idx], delim) or return false
  regex.nil? and return true
  regex.detect do |r|
    r[0].match?(context[0...idx].join) && # preceding context
      r[1].match?(context[(idx + 1)..-1].join) # foll context
  end
end

#l10_zh1(text, prev, foll, _script, options) ⇒ Object

note: we can’t differentiate comma from enumeration comma 、def l10_zh1(text, _script)



101
102
103
104
105
# File 'lib/isodoc/l10n_cjk.rb', line 101

def l10_zh1(text, prev, foll, _script, options)
  r = l10n_zh_punct(text, prev, foll, options)
  r = l10n_zh_remove_space(r, prev, foll)
  l10n_zh_dash(r, prev, foll)
end

#l10n(text, lang = @lang, script = @script, options = {}) ⇒ Object

function localising spaces and punctuation options and options are optional context strings options allows contextual full-width vs half-width punctuation



14
15
16
17
18
19
20
21
22
# File 'lib/isodoc/l10n.rb', line 14

def l10n(text, lang = @lang, script = @script, options = {})
  locale = options[:locale] || @locale
  %w(zh ja ko).include?(lang) and
    text = l10n_zh(text, script, options)
  lang == "fr" and
    text = l10n_fr(text, locale || "FR", options)
  text&.gsub!(/<esc>|<\/esc>/, "") # Strip esc tags
  bidiwrap(text, lang, script)
end

#l10n_context(nodes, idx) ⇒ Object

Fallback method for backward compatibility



88
89
90
91
92
# File 'lib/isodoc/l10n.rb', line 88

def l10n_context(nodes, idx)
  prev = nodes[0...idx].map(&:text).join
  foll = nodes[(idx + 1)...(nodes.size)].map(&:text).join
  [prev, foll]
end

#l10n_context_cached(text_cache, idx) ⇒ Object

previous, following context of current text node: do not use just the immediately adjoining text tokens for context deal with spaces and empty text by just concatenating entire context Optimized to avoid O(n²) complexity by using pre-cached text content



81
82
83
84
85
# File 'lib/isodoc/l10n.rb', line 81

def l10n_context_cached(text_cache, idx)
  prev = text_cache[0...idx].join
  foll = text_cache[(idx + 1)...text_cache.size].join
  [prev, foll]
end

#l10n_context_found_delimiter?(token, delim) ⇒ Boolean

Returns:

  • (Boolean)


141
142
143
144
145
146
147
# File 'lib/isodoc/l10n.rb', line 141

def l10n_context_found_delimiter?(token, delim)
  if delim[0].is_a?(Regexp) # punct to convert
    delim[0].match?(token)
  else
    token == delim[0]
  end
end

#l10n_fr(text, locale, options) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/isodoc/l10n.rb', line 94

def l10n_fr(text, locale, options)
  t, text_cache, xml, prev, _foll, esc_indices = l10n_prep(text, options)
  t.each_with_index do |n, i|
    next if esc_indices.include?(i) # Skip escaped nodes

    prev_ctx, foll_ctx = l10n_context_cached(text_cache, prev ? i + 1 : i)
    text = cleanup_entities(n.text, is_xml: false)
    n.content = l10n_fr1(text, prev_ctx, foll_ctx, locale)
  end
  to_xml(xml)
end

#l10n_fr1(text, prev, foll, locale) ⇒ Object



149
150
151
152
153
154
155
156
157
# File 'lib/isodoc/l10n.rb', line 149

def l10n_fr1(text, prev, foll, locale)
  text = l10n_gsub(text, prev, foll, [/[»›;?!]/, "\u202f\\0"],
                   [[/\p{Alnum}$/, /^(\s|$)/]])
  text = l10n_gsub(text, prev, foll, [/[«‹]/, "\\0\u202f"],
                   [[/$/, /^(?!\p{Zs})./]])
  colonsp = locale == "CH" ? "\u202f" : "\u00a0"
  l10n_gsub(text, prev, foll, [":", "#{colonsp}\\0"],
            [[/\p{Alnum}$/, /^(\s|$)/]])
end

#l10n_gsub(text, prev, foll, delim, regexes) ⇒ Object

text: string we are scanning for instances of delim to replace prev: string preceding text, as additional token of context foll: string following text, as additional token of context delim: delim is the symbol we want to replace, delim its replacement regexes: a list of regex pairs: the context before the found token, and the context after the found token, under which replacing it with delim is permitted. If regex is nil, always allow the replacement



113
114
115
116
117
118
119
120
121
# File 'lib/isodoc/l10n.rb', line 113

def l10n_gsub(text, prev, foll, delim, regexes)
  delim[1] or return text
  context = l10n_gsub_context(text, prev, foll, delim) or return text
  (1...(context.size - 1)).each do |i|
    l10_context_valid?(context, i, delim, regexes) and
      context[i] = delim[1].gsub("\\0", context[i]) # Full-width equivalent
  end
  context[1...(context.size - 1)].join
end

#l10n_gsub_context(text, prev, foll, delim) ⇒ Object

split string being scanned, and its contextual tokens before and after, into array of tokens determining whether to replace instances of delim



125
126
127
128
129
130
# File 'lib/isodoc/l10n.rb', line 125

def l10n_gsub_context(text, prev, foll, delim)
  d = delim[0].is_a?(Regexp) ? delim[0] : Regexp.quote(delim[0])
  context = text.split(/(#{d})/) # delim to replace
  context.size == 1 and return
  [prev, context.reject(&:empty?), foll].flatten
end

#l10n_prep(text, options) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/isodoc/l10n.rb', line 42

def l10n_prep(text, options)
  xml = Nokogiri::XML::DocumentFragment.parse(text)
  t = xml.xpath(".//text()").reject { |node| node.text.empty? }
  text_cache = build_text_cache(t, options[:prev], options[:foll])

  # Find all text nodes within <esc> tags in one XPath query
  # This is O(n) instead of O(n*m) where m is tree depth
  esc_indices = build_esc_indices(xml, t)

  [t, text_cache, xml, options[:prev], options[:foll], esc_indices]
end

#l10n_zh(text, script, options) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/isodoc/l10n_cjk.rb', line 85

def l10n_zh(text, script, options)
  script ||= "Hans"
  t, text_cache, xml, prev, _foll, esc_indices = l10n_prep(text, options)
  t.each_with_index do |n, i|
    next if esc_indices.include?(i) # Skip escaped nodes

    # Adjust index if prev context prepended
    prev_ctx, foll_ctx = l10n_context_cached(text_cache, prev ? i + 1 : i)
    text = cleanup_entities(n.text, is_xml: false)
    n.content = l10_zh1(text, prev_ctx, foll_ctx, script, options)
  end
  to_xml(xml) #.gsub(/<\/?em>|<\/?strong>|<\/?i>|<\/?b>/, "")
end

#l10n_zh_dash(text, prev, foll) ⇒ Object



119
120
121
122
123
124
# File 'lib/isodoc/l10n_cjk.rb', line 119

def l10n_zh_dash(text, prev, foll)
  text = l10n_gsub(text, prev, foll, ["", @labels.dig("punct", "en-dash")],
                   [[ZH1_DASH, ZH2_DASH]])
  l10n_gsub(text, prev, foll, ["", @labels.dig("punct", "number-en-dash")],
            [[ZH1_NUM_DASH, ZH2_NUM_DASH]])
end

#l10n_zh_punct(text, prev, foll, options) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
# File 'lib/isodoc/l10n_cjk.rb', line 107

def l10n_zh_punct(text, prev, foll, options)
  # Use pre-defined mapping for better performance
  @zh_punct_map ||= init_zh_punct_map
  @zh_punct_map.each do |mapping|
    punct_from, punct_to, regexes = mapping
    options[:proportional_mixed_cjk] or regexes = nil
    text = l10n_gsub(text, prev, foll, [punct_from, punct_to],
                     regexes)
  end
  text
end

#l10n_zh_remove_space(text, prev, foll) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/isodoc/l10n_cjk.rb', line 126

def l10n_zh_remove_space(text, prev, foll)
  text = l10n_gsub(text, prev, foll, [/\s+/, ""],
                   [[/(#{ZH_CHAR})$/o, /^#{ZH_CHAR}/o]])
  if sep = @labels.dig("punct", "cjk-latin-separator")
    # Skip over punctuation to find Latin letters/numbers
    text = l10n_gsub(text, prev, foll, [/\s+/, sep],
                     [[/#{ZH_CHAR}$/o, /^\p{P}*[\p{Latin}\p{N}]/o]])
    l10n_gsub(text, prev, foll, [/\s+/, sep],
              [[/[\p{Latin}\p{N}]\p{P}*$/o, /^#{ZH_NON_PUNCT}/o]])
  else
    l10n_gsub(text, prev, foll, [/\s+/, ""],
              [[/#{ZH_CHAR}$/o, /^(\d|[A-Za-z](#{ZH_CHAR}|$))/o]])
  end
end

#liquid_initObject



30
31
32
# File 'lib/isodoc/i18n.rb', line 30

def liquid_init
  ::Liquid::Environment.default.register_filter(::IsoDoc::I18n::Liquid)
end

#load_yaml(lang, script, i18nyaml = nil, i18nhash = nil) ⇒ Object



8
9
10
11
12
13
# File 'lib/isodoc/i18n-yaml.rb', line 8

def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil)
  ret = load_yaml1(lang, script)
  i18nyaml and return postprocess(merge_yaml_files(ret, i18nyaml))
  i18nhash and return postprocess(ret.deep_merge(i18nhash))
  postprocess(ret)
end

#load_yaml1(lang, script) ⇒ Object



109
110
111
112
113
114
115
116
117
118
# File 'lib/isodoc/i18n-yaml.rb', line 109

def load_yaml1(lang, script)
  case lang
  when "zh"
    if script then load_yaml2("zh-#{script}")
    else load_yaml2("zh-Hans")
    end
  else
    load_yaml2(lang)
  end
end

#load_yaml2(lang) ⇒ Object

locally defined in calling class



121
122
123
124
125
126
127
# File 'lib/isodoc/i18n-yaml.rb', line 121

def load_yaml2(lang)
  YAML.load_file(File.join(File.dirname(__FILE__),
                           "../isodoc-yaml/i18n-#{lang}.yaml"))
rescue StandardError
  YAML.load_file(File.join(File.dirname(__FILE__),
                           "../isodoc-yaml/i18n-en.yaml"))
end

#merge(new_labels) ⇒ Object



137
138
139
# File 'lib/isodoc/i18n-yaml.rb', line 137

def merge(new_labels)
  @labels = @labels.deep_merge(new_labels)
end

#merge_yaml_files(ret, i18nyaml) ⇒ Object

i18nyaml entries are nominally paths, but callers sometimes pass YAML values (label keys, multi-line prose). Skip anything that plainly isn’t a real file path before touching the filesystem —‘File.exist?` raises on strings containing 0 (Windows + Ruby 3.2).



19
20
21
22
23
24
25
26
27
28
# File 'lib/isodoc/i18n-yaml.rb', line 19

def merge_yaml_files(ret, i18nyaml)
  Array(i18nyaml).compact.each do |y|
    path = y.to_s
    next if path.empty? || path.include?("\0") || path.include?("\n")
    next unless File.file?(path)

    ret = ret.deep_merge(YAML.load_file(path))
  end
  ret
end

#month_i18n(val) ⇒ Object



26
27
28
29
30
31
32
33
34
35
# File 'lib/isodoc/date.rb', line 26

def month_i18n(val)
  { B: :wide, b: :abbreviated, h: :abbreviated }.each do |f, t|
    @cal_en.calendar_data[:months][:format][t].each do |k, v|
      m = @cal.calendar_data[:months][:format][t][k]
      val.gsub!(/%\u200c#{f}<#{v}>/, m)
      val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase)
    end
  end
  val
end

#normalise_hash(ret) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/isodoc/i18n-yaml.rb', line 96

def normalise_hash(ret)
  case ret
  when Hash
    ret.each do |k, v|
      ret[k] = normalise_hash(v)
    end
    ret
  when Array then ret.map { |n| normalise_hash(n) }
  when String then cleanup_entities(ret.unicode_normalize(:nfc))
  else ret
  end
end

#ordinal_key(term) ⇒ Object



118
119
120
121
122
# File 'lib/isodoc/i18n.rb', line 118

def ordinal_key(term)
  @labels["ordinal_keys"].each_with_object([]) do |k, m|
    m << (term[k.to_s] || INFLECTIONS[k.to_sym])
  end.join(".")
end

#parse_path(path_expr) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/isodoc/i18n-yaml.rb', line 83

def parse_path(path_expr)
  # Split by dots and brackets while preserving the content
  parts = path_expr.sub(/^\./, "").scan(/\.?([\w-]+)|\[([^\]]+)\]/)
  parts.each_with_object([]) do |(dot_part, bracket_part), segments|
    if dot_part
      segments << dot_part
    elsif bracket_part
      segment = bracket_part.strip.gsub(/^["']|["']$/, "")
      segments << segment
    end
  end
end

#populate(keys, vars = {}) ⇒ Object

populate with variables, Liquid, inflections, ordinals/spellout



52
53
54
55
56
# File 'lib/isodoc/i18n.rb', line 52

def populate(keys, vars = {})
  ::IsoDoc::I18n::Liquid.set(self)
  ::Liquid::Template.parse(@labels.dig(*Array(keys)))
    .render(vars.merge("labels" => @labels))
end

#postprocess(labels) ⇒ Object



30
31
32
# File 'lib/isodoc/i18n-yaml.rb', line 30

def postprocess(labels)
  self_reference_resolve(normalise_hash(labels))
end

#resolve_path(path_expr, labels, original_expr) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/isodoc/i18n-yaml.rb', line 60

def resolve_path(path_expr, labels, original_expr)
  segments = parse_path(path_expr)
  current = labels

  segments.each do |segment|
    case current
    when Hash
      current.key?(segment) or
        raise "Self-reference error: Path '#{original_expr}' not found - key '#{segment}' does not exist"
      current = current[segment]
    when Array
      index = segment.to_i
      segment =~ /^\d+$/ && index >= 0 && index < current.length or
        raise "Self-reference error: Path '#{original_expr}' not found - invalid array index '#{segment}'"
      current = current[index]
    else
      raise "Self-reference error: Path '#{original_expr}' not found - cannot navigate through non-collection type"
    end
  end

  current.to_s
end

#resolve_references(obj, labels) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/isodoc/i18n-yaml.rb', line 38

def resolve_references(obj, labels)
  case obj
  when Hash
    obj.transform_values { |v| resolve_references(v, labels) }
  when Array
    obj.map { |item| resolve_references(item, labels) }
  when String
    resolve_string_references(obj, labels)
  else
    obj
  end
end

#resolve_string_references(str, labels) ⇒ Object



51
52
53
54
55
56
57
58
# File 'lib/isodoc/i18n-yaml.rb', line 51

def resolve_string_references(str, labels)
  # Match patterns like #{self["key"]["subkey"]} or #{self.key.subkey}
  # Allow spaces around the self expression
  str.gsub(/\#\{\s*self([^}]+?)\s*\}/) do |match|
    path_expr = Regexp.last_match(1)
    resolve_path(path_expr, labels, match)
  end
end

#self_reference_resolve(labels) ⇒ Object



34
35
36
# File 'lib/isodoc/i18n-yaml.rb', line 34

def self_reference_resolve(labels)
  resolve_references(labels, labels)
end

#set(key, val) ⇒ Object



133
134
135
# File 'lib/isodoc/i18n-yaml.rb', line 133

def set(key, val)
  @labels[key] = val
end

#to_xml(node) ⇒ Object



159
160
161
162
# File 'lib/isodoc/l10n.rb', line 159

def to_xml(node)
  node&.to_xml(encoding: "UTF-8", indent: 0,
               save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
end

#tw_cldr_langObject



124
125
126
127
128
129
# File 'lib/isodoc/i18n.rb', line 124

def tw_cldr_lang
  if @lang == "zh" && @script == "Hans" then :"zh-cn"
  elsif @lang == "zh" && @script == "Hant" then :"zh-tw"
  else @lang.to_sym
  end
end

#tw_cldr_localize(num) ⇒ Object



100
101
102
103
104
# File 'lib/isodoc/i18n.rb', line 100

def tw_cldr_localize(num)
  num.localize(tw_cldr_lang)
rescue StandardError
  num.localize(:en)
end