Class: EmbeddingUtil::Providers::SelfHosted
Constant Summary
collapse
- EMBEDDING_BATCH_SIZE =
32
Instance Attribute Summary
#config
Instance Method Summary
collapse
#initialize, provider_name, #provider_name, supported?
Instance Method Details
#embed(texts, profile: config.resolved_profile) ⇒ Object
26
27
28
29
30
31
32
|
# File 'lib/embedding_util/providers/self_hosted.rb', line 26
def embed(texts, profile: config.resolved_profile)
manager = ServerManager.new(config: config)
endpoint = manager.ensure_server(:embedding, profile: profile)
manager.track_activity(:embedding, profile: profile) do
embed_batches(endpoint, texts, profile)
end
end
|
#rerank(query, documents, profile: config.resolved_profile) ⇒ Object
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/embedding_util/providers/self_hosted.rb', line 34
def rerank(query, documents, profile: config.resolved_profile)
manager = ServerManager.new(config: config)
endpoint = manager.ensure_server(:reranker, profile: profile)
rerank_with_activity(manager, endpoint, query, documents, profile)
rescue EndpointError => e
raise unless retryable_reranker_error?(e) && can_escalate_reranker_ubatch?
config.reranker_ubatch_size = config.reranker_max_ubatch_size
endpoint = manager.restart_server(:reranker, profile: profile)
rerank_with_activity(manager, endpoint, query, documents, profile)
end
|
#support ⇒ Object
16
17
18
19
20
21
22
23
24
|
# File 'lib/embedding_util/providers/self_hosted.rb', line 16
def support
{
provider: provider_name,
supported: supported?,
runtime: RuntimeCommand.resolve(config.runtime),
shutdown_idle: config.shutdown_idle,
state_dir: config.state_dir
}
end
|
#supported? ⇒ Boolean
12
13
14
|
# File 'lib/embedding_util/providers/self_hosted.rb', line 12
def supported?
ServerManager.supported?(config)
end
|