Module: Ignis::Collective::VMMBindings

Extended by:
FFI::Library
Defined in:
lib/nvruby/collective/vmm_bindings.rb

Overview

cuMem VMM (Virtual Memory Management) API bindings for modern IPC Required for sharing cudaMallocAsync allocations on Windows Uses cuMemExportToShareableHandle with CU_MEM_HANDLE_TYPE_WIN32

Defined Under Namespace

Classes: CUmemAccessDesc, CUmemAllocationProp, CUmemLocation

Constant Summary collapse

CU_MEM_HANDLE_TYPE_NONE =

Handle types for shareable handles

0
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR =
1
CU_MEM_HANDLE_TYPE_WIN32 =
2
CU_MEM_HANDLE_TYPE_WIN32_KMT =
4
CU_MEM_ALLOCATION_TYPE_INVALID =

Memory allocation types

0
CU_MEM_ALLOCATION_TYPE_PINNED =

Pinned memory, can be shared

1
CU_MEM_ALLOCATION_TYPE_MAX =
0xFFFFFFFF
CU_MEM_LOCATION_TYPE_INVALID =

Allocation location types

0
CU_MEM_LOCATION_TYPE_DEVICE =
1
CU_MEM_LOCATION_TYPE_HOST =
2
CU_MEM_LOCATION_TYPE_HOST_NUMA =
3
CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT =
4
CU_MEM_LOCATION_TYPE_MAX =
0xFFFFFFFF
CU_MEM_ACCESS_FLAGS_PROT_NONE =

Memory access flags

0
CU_MEM_ACCESS_FLAGS_PROT_READ =
1
CU_MEM_ACCESS_FLAGS_PROT_READWRITE =
3

Class Method Summary collapse

Class Method Details

.attach_vmm_functions!Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/nvruby/collective/vmm_bindings.rb', line 80

def self.attach_vmm_functions!
  # Memory allocation via VMM
  attach_function :cuMemCreate, [
    :pointer,                    # CUmemGenericAllocationHandle* handle
    :size_t,                     # size
    :pointer,                    # const CUmemAllocationProp* prop
    :uint64                      # flags (must be 0)
  ], :int

  # Get allocation size granularity
  attach_function :cuMemGetAllocationGranularity, [
    :pointer,                    # size_t* granularity
    :pointer,                    # const CUmemAllocationProp* prop
    :int                         # CUmemAllocationGranularity_flags
  ], :int

  # Release allocation handle
  attach_function :cuMemRelease, [
    :uint64                      # CUmemGenericAllocationHandle handle
  ], :int

  # Export to shareable handle (Windows HANDLE or POSIX fd)
  attach_function :cuMemExportToShareableHandle, [
    :pointer,                    # void* shareableHandle (HANDLE* or int*)
    :uint64,                     # CUmemGenericAllocationHandle handle
    :int,                        # CUmemAllocationHandleType handleType
    :uint64                      # flags (must be 0)
  ], :int

  # Import from shareable handle
  attach_function :cuMemImportFromShareableHandle, [
    :pointer,                    # CUmemGenericAllocationHandle* handle
    :pointer,                    # void* osHandle (HANDLE or int)
    :int                         # CUmemAllocationHandleType handleType
  ], :int

  # Reserve virtual address range
  attach_function :cuMemAddressReserve, [
    :pointer,                    # CUdeviceptr* ptr
    :size_t,                     # size
    :size_t,                     # alignment (0 = default)
    :uint64,                     # addr (0 = any)
    :uint64                      # flags (must be 0)
  ], :int

  # Free reserved address range
  attach_function :cuMemAddressFree, [
    :uint64,                     # CUdeviceptr ptr
    :size_t                      # size
  ], :int

  # Map allocation to address range
  attach_function :cuMemMap, [
    :uint64,                     # CUdeviceptr ptr
    :size_t,                     # size
    :size_t,                     # offset
    :uint64,                     # CUmemGenericAllocationHandle handle
    :uint64                      # flags (must be 0)
  ], :int

  # Unmap allocation
  attach_function :cuMemUnmap, [
    :uint64,                     # CUdeviceptr ptr
    :size_t                      # size
  ], :int

  # Set memory access for specific devices
  attach_function :cuMemSetAccess, [
    :uint64,                     # CUdeviceptr ptr
    :size_t,                     # size
    :pointer,                    # const CUmemAccessDesc* desc
    :size_t                      # count (number of descriptors)
  ], :int

  # Get allocation properties
  attach_function :cuMemGetAllocationPropertiesFromHandle, [
    :pointer,                    # CUmemAllocationProp* prop
    :uint64                      # CUmemGenericAllocationHandle handle
  ], :int
end

.check_status!(status, context = "VMM operation") ⇒ Object

Check status and raise on error

Parameters:

  • status (Integer)

    CUDA driver error code

  • context (String) (defaults to: "VMM operation")

    Operation description

Raises:

  • (CudaRuntimeError)


164
165
166
167
168
169
170
171
# File 'lib/nvruby/collective/vmm_bindings.rb', line 164

def self.check_status!(status, context = "VMM operation")
  return if status.zero?  # CUDA_SUCCESS

  # NOTE: previously this called cuGetErrorName, which is never attached in
  # this module — so on ANY driver error it raised NoMethodError, masking the
  # real failure. Report the numeric driver code directly instead.
  raise CudaRuntimeError.new("#{context}: CUDA driver error #{status}", cuda_code: status)
end

.create_access_desc(device_id:, read_write: true) ⇒ CUmemAccessDesc

Helper: create access descriptor for device

Parameters:

  • device_id (Integer)

    Target device

  • read_write (Boolean) (defaults to: true)

    Read-write access?

Returns:



203
204
205
206
207
208
209
# File 'lib/nvruby/collective/vmm_bindings.rb', line 203

def self.create_access_desc(device_id:, read_write: true)
  desc = CUmemAccessDesc.new
  desc[:location][:type] = CU_MEM_LOCATION_TYPE_DEVICE
  desc[:location][:id] = device_id
  desc[:flags] = read_write ? CU_MEM_ACCESS_FLAGS_PROT_READWRITE : CU_MEM_ACCESS_FLAGS_PROT_READ
  desc
end

.create_allocation_prop(device_id:, shareable: true) ⇒ CUmemAllocationProp

Helper: create allocation properties for device memory

Parameters:

  • device_id (Integer)

    Target GPU device

  • shareable (Boolean) (defaults to: true)

    Whether to allow IPC sharing

Returns:



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/nvruby/collective/vmm_bindings.rb', line 177

def self.create_allocation_prop(device_id:, shareable: true)
  prop = CUmemAllocationProp.new
  prop[:type] = CU_MEM_ALLOCATION_TYPE_PINNED

  if shareable
    handle_type = if defined?(Ignis::Platform)
                    Ignis::Platform.windows? ? CU_MEM_HANDLE_TYPE_WIN32 : CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
                  else
                    RUBY_PLATFORM =~ /mswin|mingw|cygwin/ ? CU_MEM_HANDLE_TYPE_WIN32 : CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
                  end
    prop[:requestedHandleTypes] = handle_type
  else
    prop[:requestedHandleTypes] = 0
  end

  prop[:location][:type] = CU_MEM_LOCATION_TYPE_DEVICE
  prop[:location][:id] = device_id
  prop[:win32HandleMetaData] = FFI::Pointer::NULL
  prop[:allocFlags] = 0
  prop
end

.ensure_loaded!Object

Load CUDA driver library (platform-aware)



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/nvruby/collective/vmm_bindings.rb', line 63

def self.ensure_loaded!
  return if @loaded

  begin
    driver_lib = if defined?(Ignis::Platform)
                   Ignis::Platform.find_cuda_lib(:cuda_driver) || (Ignis::Platform.windows? ? 'nvcuda' : 'libcuda')
                 else
                   RUBY_PLATFORM =~ /mswin|mingw|cygwin/ ? 'nvcuda' : 'libcuda'
                 end
    ffi_lib [driver_lib, "nvcuda", "libcuda"]
    attach_vmm_functions!
    @loaded = true
  rescue FFI::NotFoundError => e
    raise LoadError, "Could not load CUDA driver library: #{e.message}"
  end
end