Module: Yosina::Transliterators::Jisx0201AndAlike

Includes:
HiraKataTable
Defined in:
lib/yosina/transliterators/jisx0201_and_alike.rb

Overview

JIS X 0201 and alike transliterator for fullwidth/halfwidth conversion

Defined Under Namespace

Classes: Transliterator

Constant Summary collapse

JISX0201_GL_TABLE =

GL area mapping table (fullwidth to halfwidth)

[
  ["\u3000", "\u0020"], # Ideographic space to space
  ["\uff01", "\u0021"], # ! to !
  ["\uff02", "\u0022"], # " to "
  ["\uff03", "\u0023"], # # to #
  ["\uff04", "\u0024"], # $ to $
  ["\uff05", "\u0025"], # % to %
  ["\uff06", "\u0026"], # & to &
  ["\uff07", "\u0027"], # ' to '
  ["\uff08", "\u0028"], # ( to (
  ["\uff09", "\u0029"], # ) to )
  ["\uff0a", "\u002a"], # * to *
  ["\uff0b", "\u002b"], # + to +
  ["\uff0c", "\u002c"], # , to ,
  ["\uff0d", "\u002d"], # - to -
  ["\uff0e", "\u002e"], # . to .
  ["\uff0f", "\u002f"], # / to /
  ["\uff10", "\u0030"], # 0 to 0
  ["\uff11", "\u0031"], # 1 to 1
  ["\uff12", "\u0032"], # 2 to 2
  ["\uff13", "\u0033"], # 3 to 3
  ["\uff14", "\u0034"], # 4 to 4
  ["\uff15", "\u0035"], # 5 to 5
  ["\uff16", "\u0036"], # 6 to 6
  ["\uff17", "\u0037"], # 7 to 7
  ["\uff18", "\u0038"], # 8 to 8
  ["\uff19", "\u0039"], # 9 to 9
  ["\uff1a", "\u003a"], # : to :
  ["\uff1b", "\u003b"], # ; to ;
  ["\uff1c", "\u003c"], # < to <
  ["\uff1d", "\u003d"], # = to =
  ["\uff1e", "\u003e"], # > to >
  ["\uff1f", "\u003f"], # ? to ?
  ["\uff20", "\u0040"], # @ to @
  ["\uff21", "\u0041"], # A to A
  ["\uff22", "\u0042"], # B to B
  ["\uff23", "\u0043"], # C to C
  ["\uff24", "\u0044"], # D to D
  ["\uff25", "\u0045"], # E to E
  ["\uff26", "\u0046"], # F to F
  ["\uff27", "\u0047"], # G to G
  ["\uff28", "\u0048"], # H to H
  ["\uff29", "\u0049"], # I to I
  ["\uff2a", "\u004a"], # J to J
  ["\uff2b", "\u004b"], # K to K
  ["\uff2c", "\u004c"], # L to L
  ["\uff2d", "\u004d"], # M to M
  ["\uff2e", "\u004e"], # N to N
  ["\uff2f", "\u004f"], # O to O
  ["\uff30", "\u0050"], # P to P
  ["\uff31", "\u0051"], # Q to Q
  ["\uff32", "\u0052"], # R to R
  ["\uff33", "\u0053"], # S to S
  ["\uff34", "\u0054"], # T to T
  ["\uff35", "\u0055"], # U to U
  ["\uff36", "\u0056"], # V to V
  ["\uff37", "\u0057"], # W to W
  ["\uff38", "\u0058"], # X to X
  ["\uff39", "\u0059"], # Y to Y
  ["\uff3a", "\u005a"], # Z to Z
  ["\uff3b", "\u005b"], # [ to [
  ["\uff3d", "\u005d"], # ] to ]
  ["\uff3e", "\u005e"], # ^ to ^
  ["\uff3f", "\u005f"], # _ to _
  ["\uff40", "\u0060"], # ` to `
  ["\uff41", "\u0061"], # a to a
  ["\uff42", "\u0062"], # b to b
  ["\uff43", "\u0063"], # c to c
  ["\uff44", "\u0064"], # d to d
  ["\uff45", "\u0065"], # e to e
  ["\uff46", "\u0066"], # f to f
  ["\uff47", "\u0067"], # g to g
  ["\uff48", "\u0068"], # h to h
  ["\uff49", "\u0069"], # i to i
  ["\uff4a", "\u006a"], # j to j
  ["\uff4b", "\u006b"], # k to k
  ["\uff4c", "\u006c"], # l to l
  ["\uff4d", "\u006d"], # m to m
  ["\uff4e", "\u006e"], # n to n
  ["\uff4f", "\u006f"], # o to o
  ["\uff50", "\u0070"], # p to p
  ["\uff51", "\u0071"], # q to q
  ["\uff52", "\u0072"], # r to r
  ["\uff53", "\u0073"], # s to s
  ["\uff54", "\u0074"], # t to t
  ["\uff55", "\u0075"], # u to u
  ["\uff56", "\u0076"], # v to v
  ["\uff57", "\u0077"], # w to w
  ["\uff58", "\u0078"], # x to x
  ["\uff59", "\u0079"], # y to y
  ["\uff5a", "\u007a"], # z to z
  ["\uff5b", "\u007b"], # { to {
  ["\uff5c", "\u007c"], # | to |
  ["\uff5d", "\u007d"] # } to }
].freeze
JISX0201_GL_OVERRIDES =

Special GL overrides

{
  u005c_as_yen_sign: [["\uffe5", "\u005c"]], # ¥ to \
  u005c_as_backslash: [["\uff3c", "\u005c"]], # \ to \
  u007e_as_fullwidth_tilde: [["\uff5e", "\u007e"]], # ~ to ~
  u007e_as_wave_dash: [["\u301c", "\u007e"]], # 〜 to ~
  u007e_as_overline: [["\u203e", "\u007e"]], # ‾ to ~
  u007e_as_fullwidth_macron: [["\uffe3", "\u007e"]], #  ̄ to ~
  u00a5_as_yen_sign: [["\uffe5", "\u00a5"]] # ¥ to ¥
}.freeze
JISX0201_GR_TABLE =

GR area mapping table (fullwidth to halfwidth)

generate_gr_table.freeze
SPECIAL_PUNCTUATIONS_TABLE =

Special punctuations

[["\u30a0", "\u003d"]].freeze
VOICED_LETTERS_TABLE =

Voiced letters table

generate_voiced_letters_table.freeze
HIRAGANA_MAPPINGS =

Hiragana mappings

generate_hiragana_mappings.freeze

Constants included from HiraKataTable

HiraKataTable::HIRAGANA_KATAKANA_SMALL_TABLE, HiraKataTable::HIRAGANA_KATAKANA_TABLE, HiraKataTable::SEMI_VOICED_CHARACTERS, HiraKataTable::VOICED_CHARACTERS

Class Method Summary collapse

Methods included from HiraKataTable

generate_semi_voiced_characters, generate_voiced_characters

Class Method Details

.call(options = {}) ⇒ Transliterator

Factory method to create a JIS X 0201 and alike transliterator

Parameters:

  • options (Hash) (defaults to: {})

    Configuration options

Returns:

  • (Transliterator)

    A new JIS X 0201 and alike transliterator instance



446
447
448
# File 'lib/yosina/transliterators/jisx0201_and_alike.rb', line 446

def self.call(options = {})
  Transliterator.new(options)
end

.generate_gr_tableObject

Generate GR table from shared table



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/yosina/transliterators/jisx0201_and_alike.rb', line 121

def self.generate_gr_table
  result = [
    ["\u3002", "\uff61"], # 。 to 。
    ["\u300c", "\uff62"], # 「 to 「
    ["\u300d", "\uff63"], # 」 to 」
    ["\u3001", "\uff64"], # 、 to 、
    ["\u30fb", "\uff65"], # ・ to ・
    ["\u30fc", "\uff70"], # ー to ー
    ["\u309b", "\uff9e"], # ゛ to ゙
    ["\u309c", "\uff9f"] # ゜to ゚
  ]
  # Add katakana mappings from main table
  HIRAGANA_KATAKANA_TABLE.each do |_, katakana, halfwidth|
    result << [katakana[0], halfwidth] if halfwidth
  end
  # Add small kana mappings
  HIRAGANA_KATAKANA_SMALL_TABLE.each do |_, katakana, halfwidth|
    result << [katakana, halfwidth] if halfwidth
  end
  result
end

.generate_hiragana_mappingsObject

Generate hiragana mappings from shared table



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/yosina/transliterators/jisx0201_and_alike.rb', line 165

def self.generate_hiragana_mappings
  result = []
  # Add main table hiragana mappings
  HIRAGANA_KATAKANA_TABLE.each do |hiragana, _, halfwidth|
    next unless hiragana[0] && halfwidth

    result << [hiragana[0], halfwidth]
    result << [hiragana[1], "#{halfwidth}\uff9e"] if hiragana[1] # Has voiced form
    result << [hiragana[2], "#{halfwidth}\uff9f"] if hiragana[2] # Has semi-voiced form
  end
  # Add small kana mappings
  HIRAGANA_KATAKANA_SMALL_TABLE.each do |hiragana, _, halfwidth|
    result << [hiragana, halfwidth] if halfwidth
  end
  result
end

.generate_voiced_letters_tableObject

Generate voiced letters table from shared table



150
151
152
153
154
155
156
157
158
159
# File 'lib/yosina/transliterators/jisx0201_and_alike.rb', line 150

def self.generate_voiced_letters_table
  result = []
  HIRAGANA_KATAKANA_TABLE.each do |_, katakana, halfwidth|
    next unless halfwidth

    result << [katakana[1], "#{halfwidth}\uff9e"] if katakana[1] # Has voiced form
    result << [katakana[2], "#{halfwidth}\uff9f"] if katakana[2] # Has semi-voiced form
  end
  result
end