Class: ActiveHarness::Providers::GPUStack

Inherits:
Base
  • Object
show all
Defined in:
lib/active_harness/providers/gpustack.rb

Overview

GPUStack — self-hosted GPU inference server, OpenAI-compatible API. docs.gpustack.ai/latest/user-guide/inference-openai-compatible-apis/

GPUSTACK_API_BASE is required (e.g. “my-gpustack-server:80”). GPUSTACK_API_KEY is optional (needed only if the server has auth enabled).

Example:

model do
  use provider: :gpustack, model: "Qwen/Qwen2.5-7B-Instruct-GGUF"
end

Constant Summary

Constants inherited from Base

Base::HTTP, Base::STREAMING_HTTP

Instance Method Summary collapse

Instance Method Details

#call(model:, messages:, temperature: 0.7, stream: nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/active_harness/providers/gpustack.rb', line 16

def call(model:, messages:, temperature: 0.7, stream: nil)
  url     = "#{api_base}/v1/chat/completions"
  headers = { "Content-Type" => "application/json" }
  key     = api_key
  headers["Authorization"] = "Bearer #{key}" if key
  body    = { model: model, messages: messages, temperature: temperature }

  return call_streaming(url: url, headers: headers, body: body, stream: stream, provider: :gpustack, model: model) if stream

  raw  = post_json(URI(url), headers: headers, body: body)
  data = parse!(raw)
  handle_error!(data)

  { content: data.dig("choices", 0, "message", "content").to_s.strip, provider: :gpustack, model: data["model"] || model, usage: extract_usage_openai(data) }
end