Class: DWH::Column

Inherits:
Object
  • Object
show all
Defined in:
lib/dwh/column.rb

Overview

Captures column metadata for a target table.

Constant Summary collapse

DEFAULT_RULES =
{ /[_+]+/ => ' ', /\s+id$/i => ' ID', /desc/i => 'Description' }.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name:, data_type:, precision: 0, scale: 0, schema_type: nil, max_char_length: nil) ⇒ Column

Returns a new instance of Column.



6
7
8
9
10
11
12
13
# File 'lib/dwh/column.rb', line 6

def initialize(name:, data_type:, precision: 0, scale: 0, schema_type: nil, max_char_length: nil)
  @name = name.downcase
  @precision = precision.is_a?(String) ? precision.to_i : precision
  @scale = scale.is_a?(String) ? scale.to_i : scale
  @data_type = data_type&.downcase
  @schema_type = schema_type&.downcase
  @max_char_length = max_char_length
end

Instance Attribute Details

#data_typeObject (readonly)

Returns the value of attribute data_type.



4
5
6
# File 'lib/dwh/column.rb', line 4

def data_type
  @data_type
end

#max_char_lengthObject (readonly)

Returns the value of attribute max_char_length.



4
5
6
# File 'lib/dwh/column.rb', line 4

def max_char_length
  @max_char_length
end

#nameObject (readonly)

Returns the value of attribute name.



4
5
6
# File 'lib/dwh/column.rb', line 4

def name
  @name
end

#precisionObject (readonly)

Returns the value of attribute precision.



4
5
6
# File 'lib/dwh/column.rb', line 4

def precision
  @precision
end

#scaleObject (readonly)

Returns the value of attribute scale.



4
5
6
# File 'lib/dwh/column.rb', line 4

def scale
  @scale
end

#schema_typeObject (readonly)

Returns the value of attribute schema_type.



4
5
6
# File 'lib/dwh/column.rb', line 4

def schema_type
  @schema_type
end

Instance Method Details

#dim?Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/dwh/column.rb', line 15

def dim?
  schema_type == 'dimension'
end

#measure?Boolean

Returns:

  • (Boolean)


19
20
21
# File 'lib/dwh/column.rb', line 19

def measure?
  schema_type == 'measure'
end

#namify(rules = DEFAULT_RULES) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/dwh/column.rb', line 24

def namify(rules = DEFAULT_RULES)
  named = titleize(name)
  rules.each do |k, v|
    named = named.gsub(Regexp.new(k), v)
  end

  named
end

#normalized_data_typeObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/dwh/column.rb', line 33

def normalized_data_type
  # Strip ClickHouse type wrappers (Nullable(T), LowCardinality(T), Array(T))
  # so the inner type is matched by the rules below.
  inner = unwrap_type(data_type)

  case inner
  when /binary/, 'image'
    'binary'
  when /varchar/, 'string', /text/, /char/, /fixedstring/
    'string'
  when 'date', 'date32'
    'date'
  when /date_time/, /datetime/, 'time', /timestamp/
    'date_time'
  when 'int', 'integer', 'smallint', 'tinyint', /^int8$/, /^int16$/, /^int32$/,
       /^uint8$/, /^uint16$/, /^uint32$/
    'integer'
  when 'bigint', 'bit_int', 'big_integer', /^int64$/, /^int128$/, /^int256$/,
       /^uint64$/, /^uint128$/, /^uint256$/
    'bigint'
  when 'decimal', 'double', 'float', 'real', 'dec', 'numeric', 'money',
       /^float32$/, /^float64$/, /^decimal/
    'decimal'
  when 'boolean', 'bit', 'bool'
    'boolean'
  when 'uuid'
    'string'
  when 'number'
    if precision >= 38 && scale.zero?
      'bigint'
    elsif scale.positive?
      'decimal'
    else
      'integer'
    end
  else
    'string'
  end
end

#titleize(name) ⇒ Object



98
99
100
101
102
103
104
105
106
107
# File 'lib/dwh/column.rb', line 98

def titleize(name)
  # Handle underscores, dashes, and multiple spaces
  # Also preserves existing spacing patterns better
  name.gsub(/[_-]/, ' ')           # Convert underscores and dashes to spaces
      .gsub(/\s+/, ' ')            # Normalize multiple spaces to single spaces
      .strip                       # Remove leading/trailing whitespace
      .split(' ')                  # Split into words
      .map(&:capitalize)           # Capitalize each word
      .join(' ')                   # Join with single spaces
end

#to_hObject



73
74
75
76
77
78
79
80
81
82
# File 'lib/dwh/column.rb', line 73

def to_h
  {
    name: name,
    data_type: data_type,
    precision: precision,
    scale: scale,
    schema_type: schema_type,
    max_char_length: max_char_length
  }
end

#to_sObject



84
85
86
# File 'lib/dwh/column.rb', line 84

def to_s
  "<Column:#{name}:#{data_type}>"
end

#unwrap_type(type) ⇒ Object

Strips ClickHouse parameterized wrappers like Nullable(T), LowCardinality(T), Array(T) so the inner type can be normalised by the standard rules above. Safe to call on any type string; returns the input unchanged if no wrapper matches.



91
92
93
94
95
96
# File 'lib/dwh/column.rb', line 91

def unwrap_type(type)
  inner = type.to_s.downcase
  inner = inner.sub(/\Anullable\((.+)\)\z/, '\1')
  inner = inner.sub(/\Alowcardinality\((.+)\)\z/, '\1')
  inner.sub(/\Aarray\((.+)\)\z/, '\1')
end