Class: Yosina::Transliterators::JapaneseIterationMarks::Transliterator

Inherits:
BaseTransliterator
  • Object
show all
Includes:
CharType
Defined in:
lib/yosina/transliterators/japanese_iteration_marks.rb

Overview

Transliterator for Japanese iteration marks

Instance Method Summary collapse

Methods included from CharType

#hatsuon?, #hiragana?, #iteration_mark?, #kanji?, #katakana?, #semi_voiced?, #sokuon?, #voiced?

Constructor Details

#initialize(_options = {}) ⇒ Transliterator

Initialize the transliterator with options

Parameters:

  • options (Hash)

    Configuration options (currently unused but kept for consistency)



120
121
122
# File 'lib/yosina/transliterators/japanese_iteration_marks.rb', line 120

def initialize(_options = {})
  super()
end

Instance Method Details

#call(input_chars) ⇒ Enumerable<Char>

Replace iteration marks with appropriate repeated characters

Parameters:

  • input_chars (Enumerable<Char>)

    The characters to transliterate

Returns:

  • (Enumerable<Char>)

    The transliterated characters



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/yosina/transliterators/japanese_iteration_marks.rb', line 128

def call(input_chars)
  offset = 0
  prev_char_info = nil
  prev_was_iteration_mark = false

  Chars.enum do |y|
    input_chars.each do |char|
      # Skip empty/sentinel characters
      if char.c.empty?
        y << char
        next
      end

      current_char = char.c

      if iteration_mark?(current_char)
        # Check if previous character was also an iteration mark
        if prev_was_iteration_mark
          # Don't replace consecutive iteration marks
          y << char.with_offset(offset)
          offset += char.c.length
          prev_was_iteration_mark = true
          next
        end

        # We have an iteration mark, check if we can replace it
        replacement = nil
        if prev_char_info
          case current_char
          when HIRAGANA_ITERATION_MARK, VERTICAL_HIRAGANA_ITERATION_MARK
            # Repeat previous hiragana if valid
            if prev_char_info[:type] == :hiragana
              replacement = prev_char_info[:char]
            elsif prev_char_info[:type] == :hiragana_voiced
              # Voiced character with unvoiced mark: unvoice it
              replacement = HIRAGANA_UNVOICING[prev_char_info[:char]]
            end
          when HIRAGANA_VOICED_ITERATION_MARK, VERTICAL_HIRAGANA_VOICED_ITERATION_MARK
            # Repeat previous hiragana with voicing if possible
            if prev_char_info[:type] == :hiragana
              replacement = HIRAGANA_VOICING[prev_char_info[:char]]
            elsif prev_char_info[:type] == :hiragana_voiced
              # Voiced character with voiced mark: keep it voiced
              replacement = prev_char_info[:char]
            end
          when KATAKANA_ITERATION_MARK, VERTICAL_KATAKANA_ITERATION_MARK
            # Repeat previous katakana if valid
            if prev_char_info[:type] == :katakana
              replacement = prev_char_info[:char]
            elsif prev_char_info[:type] == :katakana_voiced
              # Voiced character with unvoiced mark: unvoice it
              replacement = KATAKANA_UNVOICING[prev_char_info[:char]]
            end
          when KATAKANA_VOICED_ITERATION_MARK, VERTICAL_KATAKANA_VOICED_ITERATION_MARK
            # Repeat previous katakana with voicing if possible
            if prev_char_info[:type] == :katakana
              replacement = KATAKANA_VOICING[prev_char_info[:char]]
            elsif prev_char_info[:type] == :katakana_voiced
              # Voiced character with voiced mark: keep it voiced
              replacement = prev_char_info[:char]
            end
          when KANJI_ITERATION_MARK
            # Repeat previous kanji
            replacement = prev_char_info[:char] if prev_char_info[:type] == :kanji
          end
        end

        if replacement
          # Create a new character with the replacement
          y << Char.new(c: replacement, offset: offset, source: char)
          offset += replacement.length
          # Don't update prev_char_info - keep the original one
          # This ensures consecutive iteration marks work correctly
        else
          # Couldn't replace the iteration mark
          y << char.with_offset(offset)
          offset += char.c.length
        end
        prev_was_iteration_mark = true
        next
      else
        # Not an iteration mark
        y << char.with_offset(offset)
        offset += char.c.length

        # Update previous character info
        char_code = current_char.ord
        char_type = get_char_type(current_char, char_code)

        # Only update prev_char_info if it's a repeatable character
        prev_char_info = ({ char: current_char, type: char_type } if char_type && char_type != :other)

        prev_was_iteration_mark = false
      end
    end
  end
end