Class: Unisec::Properties

Inherits:
Object
  • Object
show all
Defined in:
lib/unisec/properties.rb

Overview

Manipulate Unicode properties

Class Method Summary collapse

Class Method Details

.char(chr) ⇒ Hash

Returns all properties of a given unicode character (code point)

Examples:

Unisec::Properties.char('é')
# =>
# {:age=>"1.1",
# … }

Parameters:

  • chr (String)

    Unicode code point (as character / string)

Returns:

  • (Hash)

    All properties of the given code point



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/unisec/properties.rb', line 67

def self.char(chr)
  cp_num = TwitterCldr::Utils::CodePoints.from_string(chr)
  cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
  props = cp.properties
  props_hash = props.properties_hash.dup
  %w[Age Block General_Category Script].each { |p| props_hash.delete(p) } # Remaining properties
  categories = props.general_category.map do |cat|
    TwitterCldr::Shared::PropertyValueAliases.long_alias_for('gc', cat)
  end
  {
    age: props.age.join,
    plane: Unisec::Planes.reverse(chr),
    block: props.block.join,
    category: categories[1],
    subcategory: "#{categories[0]} (#{cp.category})",
    codepoint: Utils::String.char2codepoint(chr),
    name: cp.name,
    script: props.script.join,
    case: {
      ruby: {
        lowercase: chr.downcase,
        uppercase: chr.upcase
      },
      twitter: {
        lowercase: chr.localize.downcase.to_s,
        uppercase: chr.localize.upcase.to_s,
        titlecase: chr.localize.titlecase.to_s,
        casefold: chr.localize.casefold.to_s
      }
    },
    normalization: {
      ruby: {
        nfkd: chr.unicode_normalize(:nfkd),
        nfkc: chr.unicode_normalize(:nfkc),
        nfd: chr.unicode_normalize(:nfd),
        nfc: chr.unicode_normalize(:nfc)
      },
      twitter: {
        nfkd: chr.localize.normalize(using: :NFKD).to_s,
        nfkc: chr.localize.normalize(using: :NFKC).to_s,
        nfd: chr.localize.normalize(using: :NFD).to_s,
        nfc: chr.localize.normalize(using: :NFC).to_s
      }
    },
    other_properties: props_hash
  }
end

.char_display(chr, extended: false) ⇒ Object

Display a CLI-friendly output listing all properties corresponding to character (code point)

Parameters:

  • chr (String)

    Unicode code point (as character / string)

  • extended (String) (defaults to: false)

    By default, it will only show common properties, with extended set to true it will show all of them.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/unisec/properties.rb', line 119

def self.char_display(chr, extended: false)
  data = Properties.char(chr)
  display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
  display.call('Name:', data[:name])
  display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
  puts
  display.call('Plane', data[:plane])
  display.call('Block:', data[:block])
  display.call('Category:', data[:category])
  display.call('Sub-Category:', data[:subcategory])
  display.call('Script:', data[:script])
  display.call('Since (age):', "Version #{data[:age]}")
  puts
  x = data.dig(:case, :twitter, :uppercase)
  display.call('Uppercase:', x + " (#{Utils::String.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :lowercase)
  display.call('Lowercase:', x + " (#{Utils::String.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :titlecase)
  display.call('Titlecase:', x + " (#{Utils::String.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :casefold)
  display.call('Casefold:', x + " (#{Utils::String.char2codepoint(x)})")
  puts
  x = data.dig(:normalization, :twitter, :nfkd)
  display.call('Normalization NFKD:', x + " (#{Utils::String.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfkc)
  display.call('Normalization NFKC:', x + " (#{Utils::String.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfd)
  display.call('Normalization NFD:', x + " (#{Utils::String.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfc)
  display.call('Normalization NFC:', x + " (#{Utils::String.chars2codepoints(x)})")
  if extended
    puts
    data[:other_properties].each do |k, v|
      display.call(k, v&.join)
    end
  end
  nil
end

.codepoints(prop) ⇒ Array<Hash>

List all code points for a given property

Examples:

Unisec::Properties.codepoints('Quotation_Mark')
# =>
# [{:char=>"\"", :codepoint=>34, :name=>"QUOTATION MARK"},
#  {:char=>"'", :codepoint=>39, :name=>"APOSTROPHE"},
#  … ]

Parameters:

  • prop (String)

    the property name

Returns:

  • (Array<Hash>)

    Array of code points ({char: String, codepoint: Integer, name: String})



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/unisec/properties.rb', line 27

def self.codepoints(prop)
  cp = TwitterCldr::Shared::CodePoint
  out = []
  ranges = cp.properties.code_points_for_property(prop).ranges
  ranges.each do |range|
    range.each do |i|
      codepoint = cp.get(i)
      out << {
        char: TwitterCldr::Utils::CodePoints.to_string([codepoint.code_point]),
        codepoint: codepoint.code_point,
        name: codepoint.name
      }
    end
  end
  out
end

.codepoints_display(prop) ⇒ Object

Display a CLI-friendly output listing all code points corresponding to a property.

Examples:

Unisec::Properties.codepoints_display('Quotation_Mark')
# =>
# U+0022      "    QUOTATION MARK
# U+0027      '    APOSTROPHE
# …


51
52
53
54
55
56
57
# File 'lib/unisec/properties.rb', line 51

def self.codepoints_display(prop)
  codepoints = Properties.codepoints(prop)
  codepoints.each do |cp|
    puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
  end
  nil
end

.listArray<String>

List Unicode properties name

Examples:

Unisec::Properties.list # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]

Returns:

  • (Array<String>)

    properties name



14
15
16
# File 'lib/unisec/properties.rb', line 14

def self.list
  TwitterCldr::Shared::CodePoint.properties.property_names
end