Class: Ignis::LinAlg::MatmulPlan

Inherits:

Object

Object
Ignis::LinAlg::MatmulPlan

show all

Defined in:: lib/nvruby/linalg/matmul_plan.rb

Overview

Stateful matrix multiplication plan with autotuning Reusable for repeated operations with same dimensions

Instance Attribute Summary collapse

#autotuned ⇒ Boolean readonly

Whether plan has been autotuned.
#dtype ⇒ Symbol readonly

Data type.
#options ⇒ Hash readonly

Plan options.
#shape_a ⇒ Array<Integer> readonly

Shape of matrix A.
#shape_b ⇒ Array<Integer> readonly

Shape of matrix B.

Instance Method Summary collapse

#autotune!(iterations: nil, warmup: 3) ⇒ self

Autotune the operation to find the best algorithm.
#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ NvArray

Execute the planned matrix multiplication.
#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ MatmulPlan constructor

A new instance of MatmulPlan.
#output_shape ⇒ Array<Integer>

Output shape of the matmul operation.
#plan!(workspace_size: nil) ⇒ self

Plan the operation (find algorithms).
#stats ⇒ Hash

Get statistics about the plan.
#to_s ⇒ String

Constructor Details

#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ `MatmulPlan`

Returns a new instance of MatmulPlan.

Parameters:

shape_a (Array<Integer>) —

Shape of matrix A [m, k]
shape_b (Array<Integer>) —

Shape of matrix B [k, n]
dtype (Symbol) (defaults to: :float32) —

Data type
transpose_a (Boolean) (defaults to: false) —

Transpose A
transpose_b (Boolean) (defaults to: false) —

Transpose B
epilog (Symbol, nil) (defaults to: nil) —

Epilog operation
device (Integer, nil) (defaults to: nil) —

Target device

# File 'lib/nvruby/linalg/matmul_plan.rb', line 30

def initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false,
               epilog: nil, device: nil)
  @shape_a = Array(shape_a)
  @shape_b = Array(shape_b)
  @dtype = DType.validate!(dtype)
  @transpose_a = transpose_a
  @transpose_b = transpose_b
  @epilog = epilog
  @device_index = device || Ignis.configuration.default_device

  validate_shapes!

  @options = {}
  @autotuned = false
  @best_algorithm = nil
  @workspace = nil

  @m, @k, @n = compute_dimensions
  @execution_count = 0
end

Instance Attribute Details

#autotuned ⇒ `Boolean` (readonly)

Returns Whether plan has been autotuned.

Returns:

(Boolean) —

Whether plan has been autotuned



21
22
23

# File 'lib/nvruby/linalg/matmul_plan.rb', line 21

def autotuned
  @autotuned
end

#dtype ⇒ `Symbol` (readonly)

Returns Data type.

Returns:

(Symbol) —

Data type



15
16
17

# File 'lib/nvruby/linalg/matmul_plan.rb', line 15

def dtype
  @dtype
end

#options ⇒ `Hash` (readonly)

Returns Plan options.

Returns:

(Hash) —

Plan options



18
19
20

# File 'lib/nvruby/linalg/matmul_plan.rb', line 18

def options
  @options
end

#shape_a ⇒ `Array<Integer>` (readonly)

Returns Shape of matrix A.

Returns:

(Array<Integer>) —

Shape of matrix A



9
10
11

# File 'lib/nvruby/linalg/matmul_plan.rb', line 9

def shape_a
  @shape_a
end

#shape_b ⇒ `Array<Integer>` (readonly)

Returns Shape of matrix B.

Returns:

(Array<Integer>) —

Shape of matrix B



12
13
14

# File 'lib/nvruby/linalg/matmul_plan.rb', line 12

def shape_b
  @shape_b
end

Instance Method Details

#autotune!(iterations: nil, warmup: 3) ⇒ `self`

Autotune the operation to find the best algorithm

Parameters:

iterations (Integer) (defaults to: nil) —

Number of benchmark iterations
warmup (Integer) (defaults to: 3) —

Number of warmup iterations

Returns:

(self)

# File 'lib/nvruby/linalg/matmul_plan.rb', line 81

def autotune!(iterations: nil, warmup: 3)
  iterations ||= Ignis.configuration.autotuning_iterations

  plan! unless @options[:planned]

  Ignis.logger.info { "Autotuning MatmulPlan with #{iterations} iterations" }

  # Create test arrays
  a = NvArray.zeros(@shape_a, dtype: @dtype, device: @device_index)
  b = NvArray.zeros(@shape_b, dtype: @dtype, device: @device_index)
  c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index)

  # Warmup
  warmup.times { execute_internal(a, b, c) }
  CUDA::Device.current.synchronize

  # Benchmark
  start_event = CUDA::Event.new
  end_event = CUDA::Event.new

  start_event.record
  iterations.times { execute_internal(a, b, c) }
  end_event.record
  end_event.synchronize

  elapsed_ms = CUDA::Event.elapsed_time(start_event, end_event)
  avg_time = elapsed_ms / iterations

  @options[:avg_time_ms] = avg_time
  @autotuned = true

  # Cleanup
  start_event.destroy!
  end_event.destroy!
  a.free!
  b.free!
  c.free!

  Ignis.logger.info { "MatmulPlan autotuned: avg_time=#{avg_time.round(3)}ms" }

  self
end

#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ `NvArray`

Execute the planned matrix multiplication

Parameters:

a (NvArray) —

Left matrix
b (NvArray) —

Right matrix
c (NvArray, nil) (defaults to: nil) —

Output matrix (created if nil)
alpha (Float) (defaults to: 1.0) —

Scaling factor for A @ B
beta (Float) (defaults to: 0.0) —

Scaling factor for C
stream (CUDA::Stream, nil) (defaults to: nil) —

CUDA stream

Returns:

(NvArray) —

Result matrix

# File 'lib/nvruby/linalg/matmul_plan.rb', line 132

def execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil)
  validate_execution_inputs!(a, b)

  # Ensure on device
  a = a.to_device(device: @device_index) unless a.on_device?
  b = b.to_device(device: @device_index) unless b.on_device?

  # Prepare output
  if c
    validate_output!(c)
    c = c.to_device(device: @device_index) unless c.on_device?
  else
    c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index)
  end

  execute_internal(a, b, c, alpha, beta, stream)

  c
end

#output_shape ⇒ `Array<Integer>`

Output shape of the matmul operation

Returns:

(Array<Integer>)



53
54
55

# File 'lib/nvruby/linalg/matmul_plan.rb', line 53

def output_shape
  [@m, @n]
end

#plan!(workspace_size: nil) ⇒ `self`

Plan the operation (find algorithms)

Parameters:

workspace_size (Integer) (defaults to: nil) —

Maximum workspace size in bytes

Returns:

(self)

# File 'lib/nvruby/linalg/matmul_plan.rb', line 60

def plan!(workspace_size: nil)
  workspace_size ||= Ignis.configuration.default_workspace_size

  CuBLASBindings.ensure_loaded!

  Ignis.logger.debug { "Planning MatmulPlan for #{@shape_a} @ #{@shape_b} -> #{output_shape}" }

  # For basic cuBLAS GEMM, planning is straightforward
  # Advanced planning with cuBLASLt would involve algorithm selection
  @options[:workspace_size] = workspace_size
  @options[:planned] = true

  Ignis.logger.info { "MatmulPlan planned: workspace=#{workspace_size} bytes" }

  self
end

#stats ⇒ `Hash`

Get statistics about the plan

Returns:

(Hash)

# File 'lib/nvruby/linalg/matmul_plan.rb', line 154

def stats
  {
    shape_a: @shape_a,
    shape_b: @shape_b,
    output_shape: output_shape,
    dtype: @dtype,
    transpose_a: @transpose_a,
    transpose_b: @transpose_b,
    autotuned: @autotuned,
    avg_time_ms: @options[:avg_time_ms],
    execution_count: @execution_count
  }
end

#to_s ⇒ `String`

Returns:

(String)

# File 'lib/nvruby/linalg/matmul_plan.rb', line 169

def to_s
  tuned = @autotuned ? "autotuned" : "not tuned"
  "MatmulPlan(#{@shape_a} @ #{@shape_b} -> #{output_shape}, #{@dtype}, #{tuned})"
end

Class: Ignis::LinAlg::MatmulPlan

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ MatmulPlan

Instance Attribute Details

#autotuned ⇒ Boolean (readonly)

#dtype ⇒ Symbol (readonly)

#options ⇒ Hash (readonly)

#shape_a ⇒ Array<Integer> (readonly)

#shape_b ⇒ Array<Integer> (readonly)

Instance Method Details

#autotune!(iterations: nil, warmup: 3) ⇒ self

#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ NvArray

#output_shape ⇒ Array<Integer>

#plan!(workspace_size: nil) ⇒ self

#stats ⇒ Hash

#to_s ⇒ String

#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ `MatmulPlan`

#autotuned ⇒ `Boolean` (readonly)

#dtype ⇒ `Symbol` (readonly)

#options ⇒ `Hash` (readonly)

#shape_a ⇒ `Array<Integer>` (readonly)

#shape_b ⇒ `Array<Integer>` (readonly)

#autotune!(iterations: nil, warmup: 3) ⇒ `self`

#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ `NvArray`

#output_shape ⇒ `Array<Integer>`

#plan!(workspace_size: nil) ⇒ `self`

#stats ⇒ `Hash`

#to_s ⇒ `String`