Module: PgColumnBytePacker
- Defined in:
- lib/pg_column_byte_packer.rb,
lib/pg_column_byte_packer/pg_dump.rb,
lib/pg_column_byte_packer/version.rb,
lib/pg_column_byte_packer/schema_statements.rb
Defined Under Namespace
Modules: PgDump, SchemaCreation
Constant Summary collapse
- VERSION =
"1.2.1"
Class Method Summary collapse
- .ordering_key_for_column(connection:, name:, sql_type:, type_schema: nil, primary_key:, nullable:, has_default:) ⇒ Object
- .sql_type_alignment_cache ⇒ Object
Class Method Details
.ordering_key_for_column(connection:, name:, sql_type:, type_schema: nil, primary_key:, nullable:, has_default:) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/pg_column_byte_packer.rb', line 9 def self.ordering_key_for_column(connection:, name:, sql_type:, type_schema: nil, primary_key:, nullable:, has_default:) alignment = PgColumnBytePacker.sql_type_alignment_cache[sql_type] ||= ( if type_schema.nil? fake_query = "CREATE TABLE t(c #{sql_type});" parsed = begin PgQuery.parse(fake_query) rescue PgQuery::ParseError nil end if parsed && (column_def = parsed.tree[0]["RawStmt"]["stmt"]["CreateStmt"]["tableElts"][0]["ColumnDef"]) type_identifiers = column_def["typeName"]["TypeName"]["names"].map { |s| s["String"]["str"] } case type_identifiers.size when 1 # Do nothing; we already have a bare type. when 2 type_schema, = type_identifiers else raise ArgumentError, "Unexpected number of identifiers in type declaration for column: `#{name}`, identifiers: #{type_identifiers.inspect}" end end end if type_schema == "pg_catalog" sql_type = case sql_type when "char", "\"char\"" # The SQL standard defines bare 'character' as a single character # type. The quoted variant is also single character, but raises # an error on inputs longer than a single character (unlike the # which requires it to be silently trucated). # # We don't yet use the length for the character type, but it seems # worth retaining it here for completeness (rather than solely # handling the quoted "char" case, which is what we have to have # for things to work minimally). # # See: https://www.postgresql.org/docs/current/datatype-character.html # and format_type.c "character(1)" when "bit" # The SQL standard defines bare 'bit' as a single bit type. # # See: https://www.postgresql.org/docs/current/datatype-bit.html "bit(1)" when "\"bit\"" # The quoted variant, instead of being parallel to the quoted # variant of char, allows any number of bits, but is technically # a different entry in pg_type than varchar. "bit" else sql_type end end = if type_schema if sql_type.start_with?("#{type_schema}.") sql_type.sub("#{type_schema}.", "") elsif sql_type.start_with?("\"#{type_schema}\".") sql_type.sub("\"#{type_schema}\".", "") else sql_type end else sql_type end # Ignore array designations. This seems like something we could # so with the parsing above, and we could, and, in fact, that # would also almost certainly allow us to rip out most of the # ActiveRecord generated alias type name matching below, but # it would also mean a more thorough refactor below for types # with size designations (e.g., when ActiveRecord generates # "float(23)"). So we use this simple regex cleanup for now. = .sub(/(\[\])+\Z/, "") # Sort out the alignment. Most of the type name matching is to # support the naming variants that ActiveRecord generates (often # they're aliases, like "integer", for which PostgreSQL internally # has a different canonical name: "int4"). There are also # a few cases we have to handle where the output from pgdump # doesn't match the canonical name in pg_type; e.g., "float8" is # canonical, but pgdump outputs "double precision". case when "bigint", "double precision", /\Atimestamp(tz| with time zone| without time zone)?/, /\Abigserial( primary key)?/ 8 # Actual alignment for these types. when "integer", "date", /\Atime(tz| with time zone| without time zone)?/, /\Adecimal(\([^\)]+\))?/, "real", /\Aserial( primary key)?/ # Note: unlike the others which always take a fixed amount of space, # the numeric/decimal type is stored in a variable amount of space (see: # https://www.postgresql.org/docs/10/datatype-numeric.html) but pg_type # shows that its alignment is the same as integer. Postgres canonicalizes # this type to numeric, but we have to still support the decimal # designation for ActiveRecord inputs. 4 # Actual alignment for these types. when "bytea" # These types generally have an alignment of 4, but values of at most 127 bytes # long they are optimized into 2 byte alignment. # Since we'd expect any binary fields to be relatively long, we'll assume they # won't fit into the optimized case. 4 when "text", "citext", "character varying", "bit varying", "bit" # These types generally have an alignment of 4 (as designated by pg_type # having a typalign value of 'i', but they're special in that small values # have an optimized storage layout. Beyond the optimized storage layout, though, # these small values also are not required to respect the alignment the type # would otherwise have. Specifically, values with a size of at most 127 bytes # aren't aligned. That 127 byte cap, however, includes an overhead byte to store # the length, and so in reality the max is 126 bytes. Interestingly TOASTable # values are also treated that way, but we don't have a good way of knowing which # values those will be. # # See: `fill_val()` in src/backend/access/common/heaptuple.c (in the conditional # `else if (att->attlen == -1)` branch. # # When no limit modifier has been applied we don't have a good heuristic for # determining which columns are likely to be long or short, so we currently # just slot them all after the columns we believe will always be long. 3 when /\Acharacter varying\(\d+\)/ # However, when a limit modifier has been applied we can make stronger assumptions. if (limit = /\Acharacter varying\((\d+)\)/.match(sql_type)[1]) if limit.to_i <= 126 # If we know the limit guarantees we'll fit into the unaligned storage # optimization, then we can go ahead and treat it as unaligned. 1 else # If the limit would allow strings that require alignment, then we assume it's # more likely to exeed the optimization cap and slot them after the columns # we know for certain will require integer alignment. 3 end end when /\Abit varying\(\d+\)/ # This type is functionally the same as varchar above, but the calculation we need # to do has been scaled since the limit is expressed in bits rather than bytes. if (limit = /\Abit varying\((\d+)\)/.match(sql_type)[1]) if limit.to_i <= (126 * 8) 1 else 3 end end when /\Afloat(\(\d+\))?/ precision_match = /\Afloat\((\d+)\)?/.match(sql_type) if precision_match # Precision here is a number of binary digits; # see https://www.postgresql.org/docs/10/datatype-numeric.html # for more information. if precision_match[1].to_i >= 25 8 # Double precision else 4 # Real end else 8 # Default is double precision end else type_without_modifier, modifier = .match(/\A([^\(]+)(\([^\)]+\))?/)[1..-1] pg_type_typname = case type_without_modifier when "boolean" "bool" when "smallint" "int2" when "character" "char" else # There are other cases you might expect to see here (for other system # types like varchar and varbit where the external name generated by # format_type_extended() in Postgres's format_type.c doesn't match the # pg_type typaname), but we've already handled them separately above # (since we have additional rules to apply to them). type_without_modifier end typtype, typalign = connection.select_rows(<<~SQL, "Type Lookup").first SELECT typ.typtype, typ.typalign FROM pg_type typ JOIN pg_namespace nsp ON nsp.oid = typ.typnamespace WHERE typname = '#{connection.quote_string(pg_type_typname)}' #{type_schema ? "AND nsp.nspname = '#{connection.quote_string(type_schema)}'" : ""} SQL if typtype.nil? raise ArgumentError, "Got sql_type: `#{sql_type}` and type_schema: `#{type_schema}` but was unable to find entry in pg_type." end if typtype == "e" # Enum 4 else case typalign when "c" # Character types, for example, occupy a variable amount of space # (though fixed in the sense that it's specified up front for each # column definition) but require no alignment. 1 when "s" # Short alignment 2 when "i" # Int alingment 4 when "d" # Double alignment 8 else 1 end end end ) # Ordering components in order of most importance to least importance. [ -alignment, # Sort alignment descending. primary_key ? 0 : 1, # Sort PRIMARY KEY first. nullable ? 1 : 0, # Sort NOT NULL first. has_default && nullable ? 0 : 1, # Sort DEFAULT first (but only when also nullable). name, # Sort name ascending. ] end |
.sql_type_alignment_cache ⇒ Object
5 6 7 |
# File 'lib/pg_column_byte_packer.rb', line 5 def self.sql_type_alignment_cache @@sql_type_alignment_cache ||= {} end |