Class: ActiveRecord::Summarize::Summarize

Inherits:
Object
  • Object
show all
Defined in:
lib/activerecord/summarize.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(relation, pure: nil, noop: false) ⇒ Summarize

noop: true

causes `summarize` simply to yield the original relation and a trivial,
synchronous `with` proc. It is meant as a convenient way to test/prove
the correctness of `summarize` and to compare performance of the single
combined query vs the original individual queries.
N.b., if `relation` already has a grouping applied, there is no direct
ActiveRecord translation for what `summarize` does, so noop: true is
impossible and raises an exception.

pure: true

lets `summarize` know that you're not mutating state within the block,
so it doesn't need to go spelunking in the block binding for
ChainableResults. See `if !pure?` section below.
N.b., if `relation` already has a grouping applied, pure: true is
implied and pure: false throws an exception, as the impure behavior
would be non-obvious and of doubtful value.

Raises:



29
30
31
32
33
34
35
36
37
# File 'lib/activerecord/summarize.rb', line 29

def initialize(relation, pure: nil, noop: false)
  @relation = relation
  @noop = noop
  has_base_groups = relation.group_values.any?
  raise Unsummarizable, "`summarize` must be pure when called on a grouped relation" if pure == false && has_base_groups
  raise ArgumentError, "`summarize(noop: true)` is impossible on a grouped relation" if noop && has_base_groups
  @pure = has_base_groups || !!pure
  @calculations = []
end

Instance Attribute Details

#current_result_rowObject (readonly)

Returns the value of attribute current_result_row.



10
11
12
# File 'lib/activerecord/summarize.rb', line 10

def current_result_row
  @current_result_row
end

#from_whereObject (readonly)

Returns the value of attribute from_where.



10
11
12
# File 'lib/activerecord/summarize.rb', line 10

def from_where
  @from_where
end

#noopObject (readonly) Also known as: noop?

Returns the value of attribute noop.



10
11
12
# File 'lib/activerecord/summarize.rb', line 10

def noop
  @noop
end

#pureObject (readonly) Also known as: pure?

Returns the value of attribute pure.



10
11
12
# File 'lib/activerecord/summarize.rb', line 10

def pure
  @pure
end

Instance Method Details

#add_calculation(relation, operation, column_name) ⇒ Object



108
109
110
111
112
113
114
# File 'lib/activerecord/summarize.rb', line 108

def add_calculation(relation, operation, column_name)
  merge_from_where!(relation)
  calculation = CalculationResult.new(relation, operation, column_name)
  index = @calculations.size
  @calculations << calculation
  ChainableResult.wrap(calculation) { current_result_row[index] }
end

#process(&block) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/activerecord/summarize.rb', line 39

def process(&block)
  # For noop, just yield the original relation and a transparent `with` proc.
  return yield(@relation, ->(*results, &block) { [*results].then(&block) }) if noop?
  # Within the block, the relation and its future clones intercept calls to
  # `count` and `sum`, registering them and returning a ChainableResult via
  # summarize.add_calculation.
  future_block_result = ChainableResult.wrap(yield(
    @relation.unscope(:group).tap do |r|
      r.instance_variable_set(:@summarize, self)
      class << r
        include InstanceMethods
      end
    end,
    ChainableResult::WITH
  ))
  ChainableResult.with_cache(!pure?) do
    # `resolve` builds the single query that answers all collected calculations,
    # executes it, and aggregates the results by the values of
    # `@relation.group_values``. In the common case of no `@relation.group_values`,
    # the result is just `{[]=>[*final_value_for_each_calculation]}`
    result = resolve.transform_values! do |row|
      # Each row (in the common case, only one) is used to resolve any
      # ChainableResults returned by the block. These may be a one-to-one mapping,
      # or the block return may have combined some results via `with` or chained
      # additional methods on results, etc..
      @current_result_row = row
      future_block_result.value
    end.then do |result|
      # Change ungrouped result from `{[]=>v}` to `v` and grouped-by-one-column
      # result from `{[k1]=>v1,[k2]=>v2,...}` to `{k1=>v1,k2=>v2,...}`.
      # (Those are both probably more common than multiple-column base grouping.)
      case @relation.group_values.size
      when 0 then result.values.first
      when 1 then result.transform_keys! { |k| k.first }
      else result
      end
    end
    if !pure?
      # Check block scope's local vars and block's self's instance vars for
      # any ChainableResult, and replace it with its resolved value.
      #
      # Also check the values of any of those vars that are Hashes, since IME
      # it's not rare to assign counts to hashes, and it is rare to have giant
      # hashes that would be particularly wasteful to traverse. Do not do the
      # same for Arrays, since IME pushing counts to arrays is rare, and large
      # arrays, e.g., of many eagerly-fetched ActiveRecord objects, are not
      # rare in controllers.
      #
      # Preconditions:
      # - @current_result_row is still set to the single result row
      # - we are within a ChainableResult.with_cache(true) block
      block_binding = block.binding
      block_self = block_binding.receiver
      block_binding.local_variables.each do |k|
        v = block_binding.local_variable_get(k)
        next block_binding.local_variable_set(k, v.value) if v.is_a?(ChainableResult)
        lightly_touch_impure_hash(v) if v.is_a?(Hash)
      end
      block_self.instance_variables.each do |k|
        v = block_self.instance_variable_get(k)
        next block_self.instance_variable_set(k, v.value) if v.is_a?(ChainableResult)
        lightly_touch_impure_hash(v) if v.is_a?(Hash)
      end
    end
    @current_result_row = nil
    result
  end
end

#resolveObject



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/activerecord/summarize.rb', line 116

def resolve
  # Build & execute query
  groups = all_groups
  # MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
  # where the numbers are 1-indexed references to SELECT values. It makes these
  # generated queries much shorter and more readable, and it avoids the
  # ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
  # from underlying tables) even where those are supported. But in case we find
  # a database that doesn't support numeric references, the fully-explicit
  # grouping code is commented out below.
  #
  # grouped_query = groups.any? ? from_where.group(*groups) : from_where
  grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
  data = grouped_query.pluck(*groups, *value_selects)

  # Aggregate & assign results
  group_idx = groups.each_with_index.to_h
  starting_values, reducers = @calculations.each_with_index.map do |f, i|
    value_column = groups.size + i
    group_columns = f.relation.group_values.map { |k| group_idx[k] }
    case group_columns.size
    when 0 then [
      0,
      ->(memo, row) { memo + row[value_column] }
    ]
    when 1 then [
      Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
      ->(memo, row) {
        memo[row[group_columns[0]]] += row[value_column] unless row[value_column].zero?
        memo
      }
    ]
    else [
      Hash.new(0),
      ->(memo, row) {
        memo[group_columns.map { |i| row[i] }] += row[value_column] unless row[value_column].zero?
        memo
      }
    ]
    end
  end.transpose # For an array of pairs, `transpose` is the reverse of `zip`
  cols = (0...reducers.size)
  base_group_columns = (0...base_groups.size)
  data
    .group_by { |row| row[base_group_columns] }
    .tap { |h| h[[]] = [] if h.empty? && base_groups.size.zero? }
    .transform_values! do |rows|
      values = starting_values.map(&:dup) # map(&:dup) since some are hashes and we don't want to mutate starting_values
      rows.each do |row|
        cols.each do |i|
          values[i] = reducers[i].call(values[i], row)
        end
      end
      # Set any hash's default back to nil, since callers will expect a normal hash
      values.each { |v| v.default = nil if v.is_a? Hash }
    end
end