Class: Unisec::Planes

Inherits:
Object
  • Object
show all
Defined in:
lib/unisec/planes.rb

Overview

Operations about Unicode planes

Constant Summary collapse

PLANES =

Data about the planes

[
  { range: 0x0..0xffff, name: 'Basic Multilingual Plane' },
  { range: 0x10000..0x1ffff, name: 'Supplementary Multilingual Plane' },
  { range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' },
  { range: 0x30000..0x3ffff, name: 'Tertiary Ideographic Plane' },
  { range: 0x40000..0x4ffff, name: 'unassigned' },
  { range: 0x50000..0x5ffff, name: 'unassigned' },
  { range: 0x60000..0x6ffff, name: 'unassigned' },
  { range: 0x70000..0x7ffff, name: 'unassigned' },
  { range: 0x80000..0x8ffff, name: 'unassigned' },
  { range: 0x90000..0x9ffff, name: 'unassigned' },
  { range: 0xa0000..0xaffff, name: 'unassigned' },
  { range: 0xb0000..0xbffff, name: 'unassigned' },
  { range: 0xc0000..0xcffff, name: 'unassigned' },
  { range: 0xd0000..0xdffff, name: 'unassigned' },
  { range: 0xe0000..0xeffff, name: 'Supplement­ary Special-purpose Plane' },
  { range: 0xf0000..0xfffff, name: 'supplement­ary Private Use Area planes' },
  { range: 0x100000..0x10ffff, name: 'supplement­ary Private Use Area planes' }
].freeze

Class Method Summary collapse

Class Method Details

.abbr(name) ⇒ String

Abbreviate a plane name (based on uppercase letters)

Examples:

Unisec::Planes.abbr('Basic Multilingual Plane') # => "BMP"
Unisec::Planes.abbr('supplement­ary Private Use Area planes') # => "PUA"

Parameters:

Returns:

  • (String)

    plane abbreviation



145
146
147
# File 'lib/unisec/planes.rb', line 145

def self.abbr(name)
  name.scan(/\p{Upper}/).join
end

.block(block_arg) ⇒ String

Returns the name of the Unicode plane containing the given block.

Examples:

Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"

Parameters:

  • block_arg (String)

    Block name (case insensitive).

Returns:

  • (String)

    Plane name or empty string if not found.



264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/unisec/planes.rb', line 264

def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
  # support only search by block name
  return '' if block_arg.is_a?(Integer)
  return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))

  blk = Blocks.block(block_arg, with_count: false)
  return '' unless blk # block name not found

  PLANES.each do |plane|
    return plane[:name] if plane[:range].cover?(blk[:range])
  end
  '' # not found
end

.block_display(block_arg) ⇒ Object

Display a CLI-friendly output showing the plane name for a given block.

Parameters:

  • block_arg (String)

    Block name (case insensitive).



280
281
282
283
284
285
286
287
288
# File 'lib/unisec/planes.rb', line 280

def self.block_display(block_arg)
  plane_name = block(block_arg)
  if plane_name.empty?
    puts "no plane found for block #{block_arg.inspect}"
  else
    puts plane_name
  end
  nil
end

.list(with_count: false) ⇒ Array<Hash>

List Unicode planes name

Examples:

Unisec::Planes.list # =>
# [{range: 0..65535,
#   name: "Basic Multilingual Plane",
#   blocks:
#    [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil},
#     {range: 128..255, name: "Latin-1 Supplement", range_size: nil, char_count: nil},
# […]

Parameters:

  • with_count (TrueClass|FalseClass) (defaults to: false)

    calculate block's range size & char count? (warning: very slow, very unoptimized, see Blocks.list)

Returns:

  • (Array<Hash>)

    blocks name, range and character and blocks count as well as abbreviation



42
43
44
45
46
# File 'lib/unisec/planes.rb', line 42

def self.list(with_count: false)
  PLANES.zip(plane2blocks(PLANES, with_count: with_count)).map do |base, extra|
    base.merge(blocks: extra)
  end
end

.list_display(with_blocks: false, with_count: false) ⇒ nil

Display a CLI-friendly output listing all planes

Examples:

Unisec::Planes.list_display(with_blocks: true, with_count: false)
# Range: U+0000 - U+FFFF      Name: Basic Multilingual Plane
#   Blocks:
#     Range: U+0000 - U+007F      Name: Basic Latin
#     Range: U+0080 - U+00FF      Name: Latin-1 Supplement
#     Range: U+0100 - U+017F      Name: Latin Extended-A
# […]

Parameters:

  • with_blocks (TrueClass|FalseClass) (defaults to: false)

    display the blocks associated with each plane

  • with_count (TrueClass|FalseClass) (defaults to: false)

    calculate block's range size & char count? (see Blocks.list)

Returns:

  • (nil)


161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/unisec/planes.rb', line 161

def self.list_display(with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  planes = list(with_count: with_count)
  display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
  display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
  planes.each do |pla|
    display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
    display.call('Name:', pla[:name], 50)
    if with_blocks
      puts
      display.call('  Blocks:', "\n", 0)
      pla[:blocks].each do |block|
        display_blk.call('    Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
        display_blk.call('Name:', block[:name], 50)
        if with_count
          display_blk.call('Range size:', block[:range_size], 8)
          display_blk.call('Char count:', block[:char_count], 0)
        end
        puts
      end
    end
    puts
  end
  nil
end

.plane(plane_arg, with_count: false) ⇒ Hash|Array<Hash>|nil

List details about target plane including the list of associated blocks

Examples:

Unisec::Planes.plane(4) # =>
# {range: 196608..262143,
#  name: "unassigned",
#  blocks:
#   [{range: 196608..201551, name: "CJK Unified Ideographs Extension G", range_size: nil, char_count: nil},
#    {range: 201552..205743, name: "CJK Unified Ideographs Extension H", range_size: nil, char_count: nil},
#    {range: 205744..210047, name: "CJK Unified Ideographs Extension J", range_size: nil, char_count: nil}]}
Unisec::Planes.plane('Supplementary Ideographic Plane') # =>
# {range: 131072..196607,
#  name: "Supplementary Ideographic Plane",
#  blocks:
#   [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
#    {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
#    {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
#    {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
#    {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
#    {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
#    {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]}
Unisec::Planes.plane('unassigned') # =>
# [{range: 262144..327679, name: "unassigned", blocks: []},
#  {range: 327680..393215, name: "unassigned", blocks: []},
#  {range: 393216..458751, name: "unassigned", blocks: []},
#  {range: 458752..524287, name: "unassigned", blocks: []},
#  {range: 524288..589823, name: "unassigned", blocks: []},
#  {range: 589824..655359, name: "unassigned", blocks: []},
#  {range: 655360..720895, name: "unassigned", blocks: []},
#  {range: 720896..786431, name: "unassigned", blocks: []},
#  {range: 786432..851967, name: "unassigned", blocks: []},
#  {range: 851968..917503, name: "unassigned", blocks: []}]

Parameters:

  • plane_arg (String|Integer)

    name or number of the plane

  • with_count (TrueClass|FalseClass) (defaults to: false)

    calculate block's range size & char count? (see Blocks.list)

Returns:

  • (Hash|Array<Hash>|nil)

    nil if no match, Hash of the plane if one match, Array of planes' Hash if several matches



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/unisec/planes.rb', line 83

def self.plane(plane_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
  case plane_arg
  when Integer # search by plane number
    res = PLANES[plane_arg]
  when String # search by plane name
    res = PLANES.select { |plane| plane[:name].downcase == plane_arg.downcase }
    return nil if res.empty?

    res = res.first if res.size == 1 # Hash if one, Array of Hash if multiples
  else
    raise ArgumentError
  end
  case res
  when nil
    nil # handle invalide search term
  # Enrich plane data with blocks
  when Hash # When 1 plane
    res[:blocks] = plane2blocks(res, with_count: with_count)
    res
  when Array # When multiple planes
    res.zip(plane2blocks(res, with_count: with_count)).map do |base, extra|
      base.merge(blocks: extra)
    end
  end
end

.plane2blocks(plane, with_count: false) ⇒ Array<Hash>

Find the blocks included in a given plane

Examples:

Unisec::Planes.plane2blocks({ range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' }) # =>
# [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
#  {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
#  {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
#  {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
#  {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
#  {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
#  {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]

Parameters:

  • plane (Hash|Array<Hash>)

    plane hash or array of plane hash

  • with_count (TrueClass|FalseClass) (defaults to: false)

    calculate block's range size & char count? (see Blocks.list)

Returns:

  • (Array<Hash>)

    plane(s) enriched with blocks data



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/unisec/planes.rb', line 122

def self.plane2blocks(plane, with_count: false)
  blocks = []
  case plane
  when Hash
    Unisec::Blocks.list(with_count: with_count).each do |block|
      blocks << block if plane[:range].include_range?(block[:range])
    end
  when Array
    plane.each do |pl|
      blocks << plane2blocks(pl, with_count: with_count)
    end
  else
    raise ArgumentError
  end
  blocks
end

.plane_display(plane_arg, with_blocks: false, with_count: false) ⇒ nil

Display a CLI-friendly output searchfing for a plane

Examples:

Unisec::Planes.plane_display(3, with_blocks: true)
# Range: U+30000 - U+3FFFF    Name: Tertiary Ideographic Plane
#   Blocks:
#     Range: U+30000 - U+3134F    Name: CJK Unified Ideographs Extension G
#     Range: U+31350 - U+323AF    Name: CJK Unified Ideographs Extension H
#     Range: U+323B0 - U+3347F    Name: CJK Unified Ideographs Extension J

Parameters:

  • plane_arg (String|Integer)

    name or number of the plane

  • with_blocks (TrueClass|FalseClass) (defaults to: false)

    display the blocks associated with each plane

  • with_count (TrueClass|FalseClass) (defaults to: false)

    calculate block's range size & char count? (see Blocks.list)

Returns:

  • (nil)


198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/unisec/planes.rb', line 198

def self.plane_display(plane_arg, with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  planes = plane(plane_arg, with_count: with_count)
  planes = [planes] if planes.is_a?(Hash)
  display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
  display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
  planes.each do |pla|
    display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
    display.call('Name:', pla[:name], 50)
    if with_blocks
      puts
      display.call('  Blocks:', "\n", 0)
      pla[:blocks].each do |block|
        display_blk.call('    Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
        display_blk.call('Name:', block[:name], 50)
        if with_count
          display_blk.call('Range size:', block[:range_size], 8)
          display_blk.call('Char count:', block[:char_count], 0)
        end
        puts
      end
    end
    puts
  end
  nil
end

.reverse(char) ⇒ String

Returns the name of the Unicode plane containing the given character.

Examples:

Unisec::Planes.reverse('') # => "Basic Multilingual Plane"
Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)

Parameters:

  • char (String)

    Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).

Returns:

  • (String)

    Plane name or empty string if not found.



234
235
236
237
238
239
240
241
242
# File 'lib/unisec/planes.rb', line 234

def self.reverse(char)
  return '' unless char.is_a?(String)

  cp = Utils::String.convert_to_integer(char[0])
  PLANES.each do |plane|
    return plane[:name] if plane[:range].include?(cp)
  end
  '' # not found
end

.reverse_display(char) ⇒ Object

Display a CLI-friendly output showing the plane name for a given character.

Parameters:

  • char (String)

    Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).



248
249
250
251
252
253
254
255
256
# File 'lib/unisec/planes.rb', line 248

def self.reverse_display(char)
  plane_name = reverse(char)
  if plane_name.empty?
    puts "no plane found for #{char.inspect}"
  else
    puts plane_name
  end
  nil
end