Class: Ignis::LinAlg::MatmulPlan

Inherits:
Object
  • Object
show all
Defined in:
lib/nvruby/linalg/matmul_plan.rb

Overview

Stateful matrix multiplication plan with autotuning Reusable for repeated operations with same dimensions

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ MatmulPlan

Returns a new instance of MatmulPlan.

Parameters:

  • shape_a (Array<Integer>)

    Shape of matrix A [m, k]

  • shape_b (Array<Integer>)

    Shape of matrix B [k, n]

  • dtype (Symbol) (defaults to: :float32)

    Data type

  • transpose_a (Boolean) (defaults to: false)

    Transpose A

  • transpose_b (Boolean) (defaults to: false)

    Transpose B

  • epilog (Symbol, nil) (defaults to: nil)

    Epilog operation

  • device (Integer, nil) (defaults to: nil)

    Target device



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/nvruby/linalg/matmul_plan.rb', line 30

def initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false,
               epilog: nil, device: nil)
  @shape_a = Array(shape_a)
  @shape_b = Array(shape_b)
  @dtype = DType.validate!(dtype)
  @transpose_a = transpose_a
  @transpose_b = transpose_b
  @epilog = epilog
  @device_index = device || Ignis.configuration.default_device

  validate_shapes!

  @options = {}
  @autotuned = false
  @best_algorithm = nil
  @workspace = nil

  @m, @k, @n = compute_dimensions
  @execution_count = 0
end

Instance Attribute Details

#autotunedBoolean (readonly)

Returns Whether plan has been autotuned.

Returns:

  • (Boolean)

    Whether plan has been autotuned



21
22
23
# File 'lib/nvruby/linalg/matmul_plan.rb', line 21

def autotuned
  @autotuned
end

#dtypeSymbol (readonly)

Returns Data type.

Returns:

  • (Symbol)

    Data type



15
16
17
# File 'lib/nvruby/linalg/matmul_plan.rb', line 15

def dtype
  @dtype
end

#optionsHash (readonly)

Returns Plan options.

Returns:

  • (Hash)

    Plan options



18
19
20
# File 'lib/nvruby/linalg/matmul_plan.rb', line 18

def options
  @options
end

#shape_aArray<Integer> (readonly)

Returns Shape of matrix A.

Returns:

  • (Array<Integer>)

    Shape of matrix A



9
10
11
# File 'lib/nvruby/linalg/matmul_plan.rb', line 9

def shape_a
  @shape_a
end

#shape_bArray<Integer> (readonly)

Returns Shape of matrix B.

Returns:

  • (Array<Integer>)

    Shape of matrix B



12
13
14
# File 'lib/nvruby/linalg/matmul_plan.rb', line 12

def shape_b
  @shape_b
end

Instance Method Details

#autotune!(iterations: nil, warmup: 3) ⇒ self

Autotune the operation to find the best algorithm

Parameters:

  • iterations (Integer) (defaults to: nil)

    Number of benchmark iterations

  • warmup (Integer) (defaults to: 3)

    Number of warmup iterations

Returns:

  • (self)


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/nvruby/linalg/matmul_plan.rb', line 81

def autotune!(iterations: nil, warmup: 3)
  iterations ||= Ignis.configuration.autotuning_iterations

  plan! unless @options[:planned]

  Ignis.logger.info { "Autotuning MatmulPlan with #{iterations} iterations" }

  # Create test arrays
  a = NvArray.zeros(@shape_a, dtype: @dtype, device: @device_index)
  b = NvArray.zeros(@shape_b, dtype: @dtype, device: @device_index)
  c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index)

  # Warmup
  warmup.times { execute_internal(a, b, c) }
  CUDA::Device.current.synchronize

  # Benchmark
  start_event = CUDA::Event.new
  end_event = CUDA::Event.new

  start_event.record
  iterations.times { execute_internal(a, b, c) }
  end_event.record
  end_event.synchronize

  elapsed_ms = CUDA::Event.elapsed_time(start_event, end_event)
  avg_time = elapsed_ms / iterations

  @options[:avg_time_ms] = avg_time
  @autotuned = true

  # Cleanup
  start_event.destroy!
  end_event.destroy!
  a.free!
  b.free!
  c.free!

  Ignis.logger.info { "MatmulPlan autotuned: avg_time=#{avg_time.round(3)}ms" }

  self
end

#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ NvArray

Execute the planned matrix multiplication

Parameters:

  • a (NvArray)

    Left matrix

  • b (NvArray)

    Right matrix

  • c (NvArray, nil) (defaults to: nil)

    Output matrix (created if nil)

  • alpha (Float) (defaults to: 1.0)

    Scaling factor for A @ B

  • beta (Float) (defaults to: 0.0)

    Scaling factor for C

  • stream (CUDA::Stream, nil) (defaults to: nil)

    CUDA stream

Returns:

  • (NvArray)

    Result matrix



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/nvruby/linalg/matmul_plan.rb', line 132

def execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil)
  validate_execution_inputs!(a, b)

  # Ensure on device
  a = a.to_device(device: @device_index) unless a.on_device?
  b = b.to_device(device: @device_index) unless b.on_device?

  # Prepare output
  if c
    validate_output!(c)
    c = c.to_device(device: @device_index) unless c.on_device?
  else
    c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index)
  end

  execute_internal(a, b, c, alpha, beta, stream)

  c
end

#output_shapeArray<Integer>

Output shape of the matmul operation

Returns:

  • (Array<Integer>)


53
54
55
# File 'lib/nvruby/linalg/matmul_plan.rb', line 53

def output_shape
  [@m, @n]
end

#plan!(workspace_size: nil) ⇒ self

Plan the operation (find algorithms)

Parameters:

  • workspace_size (Integer) (defaults to: nil)

    Maximum workspace size in bytes

Returns:

  • (self)


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/nvruby/linalg/matmul_plan.rb', line 60

def plan!(workspace_size: nil)
  workspace_size ||= Ignis.configuration.default_workspace_size

  CuBLASBindings.ensure_loaded!

  Ignis.logger.debug { "Planning MatmulPlan for #{@shape_a} @ #{@shape_b} -> #{output_shape}" }

  # For basic cuBLAS GEMM, planning is straightforward
  # Advanced planning with cuBLASLt would involve algorithm selection
  @options[:workspace_size] = workspace_size
  @options[:planned] = true

  Ignis.logger.info { "MatmulPlan planned: workspace=#{workspace_size} bytes" }

  self
end

#statsHash

Get statistics about the plan

Returns:

  • (Hash)


154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/nvruby/linalg/matmul_plan.rb', line 154

def stats
  {
    shape_a: @shape_a,
    shape_b: @shape_b,
    output_shape: output_shape,
    dtype: @dtype,
    transpose_a: @transpose_a,
    transpose_b: @transpose_b,
    autotuned: @autotuned,
    avg_time_ms: @options[:avg_time_ms],
    execution_count: @execution_count
  }
end

#to_sString

Returns:

  • (String)


169
170
171
172
# File 'lib/nvruby/linalg/matmul_plan.rb', line 169

def to_s
  tuned = @autotuned ? "autotuned" : "not tuned"
  "MatmulPlan(#{@shape_a} @ #{@shape_b} -> #{output_shape}, #{@dtype}, #{tuned})"
end