Class: Ignis::CUDA::Graph

Inherits:
Object
  • Object
show all
Defined in:
lib/nvruby/cuda/graph.rb

Overview

CUDA Graph for capturing and replaying GPU operations. Provides reduced kernel launch overhead for repetitive workloads.

Uses GraphBindings (FFI-based) since graph operations are NOT hot-path. Stream handles are Fiddle::Pointer — we convert for FFI interop.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(flags: 0) ⇒ Graph

Create a new empty CUDA graph.

Parameters:

  • flags (Integer) (defaults to: 0)

    Graph creation flags (default: 0)



25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/nvruby/cuda/graph.rb', line 25

def initialize(flags: 0)
  GraphBindings.ensure_loaded!

  graph_ptr = FFI::MemoryPointer.new(:pointer)
  status = GraphBindings.cudaGraphCreate(graph_ptr, flags)
  GraphBindings.check_status!(status, 'cudaGraphCreate')

  @handle = graph_ptr.read_pointer
  @captured = false
  @destroyed = false
  @device_id = nil
end

Instance Attribute Details

#capturedBoolean (readonly)

Returns Whether the graph was created via stream capture.

Returns:

  • (Boolean)

    Whether the graph was created via stream capture



18
19
20
# File 'lib/nvruby/cuda/graph.rb', line 18

def captured
  @captured
end

#device_idInteger?

Returns device_id for recovery coordinator invalidation.

Returns:

  • (Integer, nil)

    device_id for recovery coordinator invalidation



21
22
23
# File 'lib/nvruby/cuda/graph.rb', line 21

def device_id
  @device_id
end

#handleFFI::Pointer (readonly)

Returns Native CUDA graph handle.

Returns:

  • (FFI::Pointer)

    Native CUDA graph handle



15
16
17
# File 'lib/nvruby/cuda/graph.rb', line 15

def handle
  @handle
end

Class Method Details

.capture(stream: nil, mode: :global) {|Stream| ... } ⇒ Graph

Capture GPU operations from a stream into a new graph.

Parameters:

  • stream (Stream, nil) (defaults to: nil)

    Stream to capture (creates temporary if nil)

  • mode (Symbol) (defaults to: :global)

    Capture mode (:global, :thread_local, :relaxed)

Yields:

  • (Stream)

    Block containing GPU operations to capture

Returns:



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/nvruby/cuda/graph.rb', line 43

def self.capture(stream: nil, mode: :global, &block)
  GraphBindings.ensure_loaded!

  own_stream = stream.nil?
  stream ||= Stream.new

  capture_mode = case mode
                 when :global then GraphBindings::CUDA_STREAM_CAPTURE_MODE_GLOBAL
                 when :thread_local then GraphBindings::CUDA_STREAM_CAPTURE_MODE_THREAD_LOCAL
                 when :relaxed then GraphBindings::CUDA_STREAM_CAPTURE_MODE_RELAXED
                 else GraphBindings::CUDA_STREAM_CAPTURE_MODE_GLOBAL
                 end

  # Convert Fiddle::Pointer to FFI::Pointer for GraphBindings
  stream_ffi = to_ffi_ptr(stream.to_ptr)

  status = GraphBindings.cudaStreamBeginCapture(stream_ffi, capture_mode)
  GraphBindings.check_status!(status, 'cudaStreamBeginCapture')

  begin
    block.call(stream)
  ensure
    graph_ptr = FFI::MemoryPointer.new(:pointer)
    status = GraphBindings.cudaStreamEndCapture(stream_ffi, graph_ptr)
    GraphBindings.check_status!(status, 'cudaStreamEndCapture')

    stream.destroy! if own_stream
  end

  graph = allocate
  graph.instance_variable_set(:@handle, graph_ptr.read_pointer)
  graph.instance_variable_set(:@captured, true)
  graph.instance_variable_set(:@destroyed, false)
  graph.instance_variable_set(:@device_id, nil)

  graph
end

.to_ffi_ptr(fiddle_ptr) ⇒ FFI::Pointer

Convert a Fiddle::Pointer to an FFI::Pointer for interop.

Parameters:

  • fiddle_ptr (Fiddle::Pointer)

Returns:

  • (FFI::Pointer)


151
152
153
154
155
# File 'lib/nvruby/cuda/graph.rb', line 151

def self.to_ffi_ptr(fiddle_ptr)
  return FFI::Pointer::NULL if fiddle_ptr.nil? || fiddle_ptr.to_i.zero?

  FFI::Pointer.new(:pointer, fiddle_ptr.to_i)
end

Instance Method Details

#cloneGraph

Clone this graph.

Returns:

Raises:



92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/nvruby/cuda/graph.rb', line 92

def clone
  raise InvalidOperationError, 'Graph already destroyed' if @destroyed

  clone_ptr = FFI::MemoryPointer.new(:pointer)
  status = GraphBindings.cudaGraphClone(clone_ptr, @handle)
  GraphBindings.check_status!(status, 'cudaGraphClone')

  cloned = Graph.allocate
  cloned.instance_variable_set(:@handle, clone_ptr.read_pointer)
  cloned.instance_variable_set(:@captured, @captured)
  cloned.instance_variable_set(:@destroyed, false)
  cloned.instance_variable_set(:@device_id, @device_id)
  cloned
end

#destroy!void

This method returns an undefined value.

Destroy the graph and release resources.



138
139
140
141
142
143
144
145
146
# File 'lib/nvruby/cuda/graph.rb', line 138

def destroy!
  return if @destroyed

  status = GraphBindings.cudaGraphDestroy(@handle)
  GraphBindings.check_status!(status, 'cudaGraphDestroy')

  @destroyed = true
  @handle = nil
end

#destroyed?Boolean

Returns:

  • (Boolean)


132
133
134
# File 'lib/nvruby/cuda/graph.rb', line 132

def destroyed?
  @destroyed
end

#instantiate(flags: 0) ⇒ GraphExecutable

Instantiate the graph to create an executable graph.

Parameters:

  • flags (Integer) (defaults to: 0)

    Instantiation flags

Returns:

Raises:



84
85
86
87
88
# File 'lib/nvruby/cuda/graph.rb', line 84

def instantiate(flags: 0)
  raise InvalidOperationError, 'Graph already destroyed' if @destroyed

  GraphExecutable.new(self, flags: flags)
end

#node_countInteger

Get number of nodes in the graph.

Returns:

  • (Integer)

Raises:



109
110
111
112
113
114
115
116
117
# File 'lib/nvruby/cuda/graph.rb', line 109

def node_count
  raise InvalidOperationError, 'Graph already destroyed' if @destroyed

  count_ptr = FFI::MemoryPointer.new(:size_t)
  status = GraphBindings.cudaGraphGetNodes(@handle, FFI::Pointer::NULL, count_ptr)
  GraphBindings.check_status!(status, 'cudaGraphGetNodes')

  count_ptr.read(:size_t)
end

#root_node_countInteger

Get number of root nodes (nodes with no dependencies).

Returns:

  • (Integer)

Raises:



121
122
123
124
125
126
127
128
129
# File 'lib/nvruby/cuda/graph.rb', line 121

def root_node_count
  raise InvalidOperationError, 'Graph already destroyed' if @destroyed

  count_ptr = FFI::MemoryPointer.new(:size_t)
  status = GraphBindings.cudaGraphGetRootNodes(@handle, FFI::Pointer::NULL, count_ptr)
  GraphBindings.check_status!(status, 'cudaGraphGetRootNodes')

  count_ptr.read(:size_t)
end