Class: ActiveRecord::Summarize::Summarize

Inherits:

Object

Object
ActiveRecord::Summarize::Summarize

show all

Defined in:: lib/activerecord/summarize.rb

Instance Attribute Summary collapse

#base_association ⇒ Object readonly

Returns the value of attribute base_association.
#base_groups ⇒ Object readonly

Returns the value of attribute base_groups.
#current_result_row ⇒ Object readonly

Returns the value of attribute current_result_row.
#from_where ⇒ Object readonly

Returns the value of attribute from_where.
#noop ⇒ Object (also: #noop?) readonly

Returns the value of attribute noop.
#pure ⇒ Object (also: #pure?) readonly

Returns the value of attribute pure.

Instance Method Summary collapse

#add_calculation(relation, operation, column_name) ⇒ Object
#initialize(relation, pure: nil, noop: false) ⇒ Summarize constructor

noop: true causes ‘summarize` simply to yield the original relation and a trivial, synchronous `with` proc.
#process(&block) ⇒ Object
#resolve ⇒ Object

Constructor Details

#initialize(relation, pure: nil, noop: false) ⇒ `Summarize`

noop: true

causes `summarize` simply to yield the original relation and a trivial,
synchronous `with` proc. It is meant as a convenient way to test/prove
the correctness of `summarize` and to compare performance of the single
combined query vs the original individual queries.
N.b., if `relation` already has a grouping applied, there is no direct
ActiveRecord translation for what `summarize` does, so noop: true is
impossible and raises an exception.

pure: true

lets `summarize` know that you're not mutating state within the block,
so it doesn't need to go spelunking in the block binding for
ChainableResults. See `if !pure?` section below.
N.b., if `relation` already has a grouping applied, pure: true is
implied and pure: false throws an exception, as the impure behavior
would be non-obvious and of doubtful value.

Raises:

(Unsummarizable)

# File 'lib/activerecord/summarize.rb', line 29

def initialize(relation, pure: nil, noop: false)
  @relation = relation
  @noop = noop
  @base_groups, @base_association = relation.group_values.dup.then do |group_fields|
    # Based upon a bit from ActiveRecord::Calculations.execute_grouped_calculation,
    # if the base relation is grouped only by a belongs_to association, group by
    # the association's foreign key.
    if group_fields.size == 1 && group_fields.first.respond_to?(:to_sym)
      association = relation.klass._reflect_on_association(group_fields.first)
      # Like ActiveRecord's group(:association).count behavior, this only works with belongs_to associations
      next [Array(association.foreign_key), association] if association&.belongs_to?
    end
    [group_fields, nil]
  end
  has_base_groups = base_groups.any?
  raise Unsummarizable, "`summarize` must be pure when called on a grouped relation" if pure == false && has_base_groups
  raise ArgumentError, "`summarize(noop: true)` is impossible on a grouped relation" if noop && has_base_groups
  @pure = has_base_groups || !!pure
  @calculations = []
end

Instance Attribute Details

#base_association ⇒ `Object` (readonly)

Returns the value of attribute base_association.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def base_association
  @base_association
end

#base_groups ⇒ `Object` (readonly)

Returns the value of attribute base_groups.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def base_groups
  @base_groups
end

#current_result_row ⇒ `Object` (readonly)

Returns the value of attribute current_result_row.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def current_result_row
  @current_result_row
end

#from_where ⇒ `Object` (readonly)

Returns the value of attribute from_where.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def from_where
  @from_where
end

#noop ⇒ `Object` (readonly) Also known as: noop?

Returns the value of attribute noop.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def noop
  @noop
end

#pure ⇒ `Object` (readonly) Also known as: pure?

Returns the value of attribute pure.



10
11
12

# File 'lib/activerecord/summarize.rb', line 10

def pure
  @pure
end

Instance Method Details

#add_calculation(relation, operation, column_name) ⇒ `Object`

# File 'lib/activerecord/summarize.rb', line 133

def add_calculation(relation, operation, column_name)
  merge_from_where!(relation)
  calculation = CalculationResult.new(relation, operation, column_name)
  index = @calculations.size
  @calculations << calculation
  ChainableResult.wrap(calculation) { current_result_row[index] }
end

#process(&block) ⇒ `Object`

# File 'lib/activerecord/summarize.rb', line 50

def process(&block)
  # For noop, just yield the original relation and a transparent `with_resolved` proc.
  return yield(@relation, ChainableResult::SYNC_WITH_RESOLVED) if noop?
  # Within the block, the relation and its future clones intercept calls to
  # `count` and `sum`, registering them and returning a ChainableResult via
  # summarize.add_calculation.
  future_block_result = ChainableResult.wrap(yield(
    @relation.unscope(:group).tap do |r|
      r.instance_variable_set(:@summarize, self)
      class << r
        include InstanceMethods
      end
    end,
    ChainableResult::WITH_RESOLVED
  ))
  ChainableResult.with_cache(!pure?) do
    # `resolve` builds the single query that answers all collected calculations,
    # executes it, and aggregates the results by the values of `base_groups`.
    # In the common case of no `base_groups`, the resolve returns:
    # `{[]=>[*final_value_for_each_calculation]}`
    result = resolve.transform_values! do |row|
      # Each row (in the common case, only one) is used to resolve any
      # ChainableResults returned by the block. These may be a one-to-one mapping,
      # or the block return may have combined some results via `with`, chained
      # additional methods on results, etc..
      @current_result_row = row
      future_block_result.value
    end.then do |result|
      # Now unpack/fix-up the result keys to match shape of Relation.count or Relation.group(*cols).count return values
      if base_groups.empty?
        # Change ungrouped result from `{[]=>v}` to `v`, like Relation.count
        result.values.first
      elsif base_association
        # Change grouped-by-one-belongs_to-association result from `{[id1]=>v1,[id2]=>v2,...}` to
        # `{<AssociatedModel id:id1>=>v1,<AssociatedModel id:id2>=>v2,...}` like Relation.group(:association).count

        # Loosely based on a bit from ActiveRecord::Calculations.execute_grouped_calculation,
        # retrieve the records for the group association and replace the keys of our final result.
        key_class = base_association.klass.base_class
        key_records = key_class
          .where(key_class.primary_key => result.keys.flatten)
          .index_by(&:id)
        result.transform_keys! { |k| key_records[k[0]] }
      elsif base_groups.size == 1
        # Change grouped-by-one-column result from `{[k1]=>v1,[k2]=>v2,...}` to `{k1=>v1,k2=>v2,...}`, like Relation.group(:column).count
        result.transform_keys! { |k| k[0] }
      else
        # Multiple-column base grouping (though perhaps relatively rare) requires no change.
        result
      end
    end
    if !pure?
      # Check block scope's local vars and block's self's instance vars for
      # any ChainableResult, and replace it with its resolved value.
      #
      # Also check the values of any of those vars that are Hashes, since IME
      # it's not rare to assign counts to hashes, and it is rare to have giant
      # hashes that would be particularly wasteful to traverse. Do not do the
      # same for Arrays, since IME pushing counts to arrays is rare, and large
      # arrays, e.g., of many eagerly-fetched ActiveRecord objects, are not
      # rare in controllers.
      #
      # Preconditions:
      # - @current_result_row is still set to the single result row
      # - we are within a ChainableResult.with_cache(true) block
      block_binding = block.binding
      block_self = block_binding.receiver
      block_binding.local_variables.each do |k|
        v = block_binding.local_variable_get(k)
        next block_binding.local_variable_set(k, v.value) if v.is_a?(ChainableResult)
        lightly_touch_impure_hash(v) if v.is_a?(Hash)
      end
      block_self.instance_variables.each do |k|
        v = block_self.instance_variable_get(k)
        next block_self.instance_variable_set(k, v.value) if v.is_a?(ChainableResult)
        lightly_touch_impure_hash(v) if v.is_a?(Hash)
      end
    end
    @current_result_row = nil
    result
  end
end

#resolve ⇒ `Object`

# File 'lib/activerecord/summarize.rb', line 141

def resolve
  # Build & execute query
  groups = all_groups
  # MariaDB, SQLite, and Postgres all support `GROUP BY 1, 2, 3`-style syntax,
  # where the numbers are 1-indexed references to SELECT values. It makes these
  # generated queries much shorter and more readable, and it avoids the
  # ambiguity of using aliases (for GROUP BY, they can get clobbered by columns
  # from underlying tables) even where those are supported. But in case we find
  # a database that doesn't support numeric references, the fully-explicit
  # grouping code is commented out below.
  #
  # grouped_query = groups.any? ? from_where.group(*groups) : from_where
  grouped_query = groups.any? ? from_where.group(*1..groups.size) : from_where
  data = grouped_query.pluck(*groups, *value_selects)
  # .pluck(:one_column) returns an array of values instead of an array of arrays,
  # which breaks the aggregation and assignment below in case anyone ever asks
  # `summarize` for only one thing.
  data = data.map { |d| [d] } if (groups.size + value_selects.size) == 1

  # Aggregate & assign results
  group_idx = groups.each_with_index.to_h
  starting_values, reducers = @calculations.each_with_index.map do |f, i|
    value_column = groups.size + i
    group_columns = f.relation.group_values.map { |k| group_idx[k] }
    # `row[value_column] || 0` pattern in reducers because SQL SUM(NULL)
    # returns NULL, but like ActiveRecord we always want .sum to return a
    # number, and our "starting_values and reducers" implementation means
    # we sometimes will have to add NULL to our numbers.
    case group_columns.size
    when 0 then [
      0,
      ->(memo, row) { memo + (row[value_column] || 0) }
    ]
    when 1 then [
      Hash.new(0), # Default 0 makes the reducer much cleaner, but we have to clean it up later
      ->(memo, row) {
        memo[row[group_columns[0]]] += row[value_column] unless (row[value_column] || 0).zero?
        memo
      }
    ]
    else [
      Hash.new(0),
      ->(memo, row) {
        memo[group_columns.map { |i| row[i] }] += row[value_column] unless (row[value_column] || 0).zero?
        memo
      }
    ]
    end
  end.transpose # For an array of pairs, `transpose` is the reverse of `zip`
  cols = (0...reducers.size)
  base_group_columns = (0...base_groups.size)
  data
    .group_by { |row| row[base_group_columns] }
    .tap { |h| h[[]] = [] if h.empty? && base_groups.empty? }
    .transform_values! do |rows|
      values = starting_values.map(&:dup) # map(&:dup) since some are hashes and we don't want to mutate starting_values
      rows.each do |row|
        cols.each do |i|
          values[i] = reducers[i].call(values[i], row)
        end
      end
      # Set any hash's default back to nil, since callers will expect a normal hash
      values.each { |v| v.default = nil if v.is_a? Hash }
    end
end

Class: ActiveRecord::Summarize::Summarize

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(relation, pure: nil, noop: false) ⇒ Summarize

Instance Attribute Details

#base_association ⇒ Object (readonly)

#base_groups ⇒ Object (readonly)

#current_result_row ⇒ Object (readonly)

#from_where ⇒ Object (readonly)

#noop ⇒ Object (readonly) Also known as: noop?

#pure ⇒ Object (readonly) Also known as: pure?