Class: Ignis::LinAlg::MatmulPlan
- Inherits:
-
Object
- Object
- Ignis::LinAlg::MatmulPlan
- Defined in:
- lib/nvruby/linalg/matmul_plan.rb
Overview
Stateful matrix multiplication plan with autotuning Reusable for repeated operations with same dimensions
Instance Attribute Summary collapse
-
#autotuned ⇒ Boolean
readonly
Whether plan has been autotuned.
-
#dtype ⇒ Symbol
readonly
Data type.
-
#options ⇒ Hash
readonly
Plan options.
-
#shape_a ⇒ Array<Integer>
readonly
Shape of matrix A.
-
#shape_b ⇒ Array<Integer>
readonly
Shape of matrix B.
Instance Method Summary collapse
-
#autotune!(iterations: nil, warmup: 3) ⇒ self
Autotune the operation to find the best algorithm.
-
#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ NvArray
Execute the planned matrix multiplication.
-
#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ MatmulPlan
constructor
A new instance of MatmulPlan.
-
#output_shape ⇒ Array<Integer>
Output shape of the matmul operation.
-
#plan!(workspace_size: nil) ⇒ self
Plan the operation (find algorithms).
-
#stats ⇒ Hash
Get statistics about the plan.
- #to_s ⇒ String
Constructor Details
#initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) ⇒ MatmulPlan
Returns a new instance of MatmulPlan.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 30 def initialize(shape_a:, shape_b:, dtype: :float32, transpose_a: false, transpose_b: false, epilog: nil, device: nil) @shape_a = Array(shape_a) @shape_b = Array(shape_b) @dtype = DType.validate!(dtype) @transpose_a = transpose_a @transpose_b = transpose_b @epilog = epilog @device_index = device || Ignis.configuration.default_device validate_shapes! @options = {} @autotuned = false @best_algorithm = nil @workspace = nil @m, @k, @n = compute_dimensions @execution_count = 0 end |
Instance Attribute Details
#autotuned ⇒ Boolean (readonly)
Returns Whether plan has been autotuned.
21 22 23 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 21 def autotuned @autotuned end |
#dtype ⇒ Symbol (readonly)
Returns Data type.
15 16 17 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 15 def dtype @dtype end |
#options ⇒ Hash (readonly)
Returns Plan options.
18 19 20 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 18 def @options end |
#shape_a ⇒ Array<Integer> (readonly)
Returns Shape of matrix A.
9 10 11 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 9 def shape_a @shape_a end |
#shape_b ⇒ Array<Integer> (readonly)
Returns Shape of matrix B.
12 13 14 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 12 def shape_b @shape_b end |
Instance Method Details
#autotune!(iterations: nil, warmup: 3) ⇒ self
Autotune the operation to find the best algorithm
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 81 def autotune!(iterations: nil, warmup: 3) iterations ||= Ignis.configuration.autotuning_iterations plan! unless @options[:planned] Ignis.logger.info { "Autotuning MatmulPlan with #{iterations} iterations" } # Create test arrays a = NvArray.zeros(@shape_a, dtype: @dtype, device: @device_index) b = NvArray.zeros(@shape_b, dtype: @dtype, device: @device_index) c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index) # Warmup warmup.times { execute_internal(a, b, c) } CUDA::Device.current.synchronize # Benchmark start_event = CUDA::Event.new end_event = CUDA::Event.new start_event.record iterations.times { execute_internal(a, b, c) } end_event.record end_event.synchronize elapsed_ms = CUDA::Event.elapsed_time(start_event, end_event) avg_time = elapsed_ms / iterations @options[:avg_time_ms] = avg_time @autotuned = true # Cleanup start_event.destroy! end_event.destroy! a.free! b.free! c.free! Ignis.logger.info { "MatmulPlan autotuned: avg_time=#{avg_time.round(3)}ms" } self end |
#execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) ⇒ NvArray
Execute the planned matrix multiplication
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 132 def execute(a, b, c: nil, alpha: 1.0, beta: 0.0, stream: nil) validate_execution_inputs!(a, b) # Ensure on device a = a.to_device(device: @device_index) unless a.on_device? b = b.to_device(device: @device_index) unless b.on_device? # Prepare output if c validate_output!(c) c = c.to_device(device: @device_index) unless c.on_device? else c = NvArray.zeros(output_shape, dtype: @dtype, device: @device_index) end execute_internal(a, b, c, alpha, beta, stream) c end |
#output_shape ⇒ Array<Integer>
Output shape of the matmul operation
53 54 55 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 53 def output_shape [@m, @n] end |
#plan!(workspace_size: nil) ⇒ self
Plan the operation (find algorithms)
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 60 def plan!(workspace_size: nil) workspace_size ||= Ignis.configuration.default_workspace_size CuBLASBindings.ensure_loaded! Ignis.logger.debug { "Planning MatmulPlan for #{@shape_a} @ #{@shape_b} -> #{output_shape}" } # For basic cuBLAS GEMM, planning is straightforward # Advanced planning with cuBLASLt would involve algorithm selection @options[:workspace_size] = workspace_size @options[:planned] = true Ignis.logger.info { "MatmulPlan planned: workspace=#{workspace_size} bytes" } self end |
#stats ⇒ Hash
Get statistics about the plan
154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 154 def stats { shape_a: @shape_a, shape_b: @shape_b, output_shape: output_shape, dtype: @dtype, transpose_a: @transpose_a, transpose_b: @transpose_b, autotuned: @autotuned, avg_time_ms: @options[:avg_time_ms], execution_count: @execution_count } end |
#to_s ⇒ String
169 170 171 172 |
# File 'lib/nvruby/linalg/matmul_plan.rb', line 169 def to_s tuned = @autotuned ? "autotuned" : "not tuned" "MatmulPlan(#{@shape_a} @ #{@shape_b} -> #{output_shape}, #{@dtype}, #{tuned})" end |