Module: Legion::LLM::Discovery::MemoryGate
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/discovery/memory_gate.rb
Constant Summary collapse
- LOCAL_PROVIDERS =
%i[ollama mlx].freeze
Class Method Summary collapse
- .allow?(provider:, instance: nil, model: nil) ⇒ Boolean
- .available_memory_mb ⇒ Object
- .estimated_model_mb(model, provider: nil, instance: nil) ⇒ Object
- .memory_floor_mb ⇒ Object
Class Method Details
.allow?(provider:, instance: nil, model: nil) ⇒ Boolean
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/legion/llm/discovery/memory_gate.rb', line 15 def allow?(provider:, instance: nil, model: nil) return true if provider.nil? return true unless LOCAL_PROVIDERS.include?(provider.to_sym) available = available_memory_mb cost = estimated_model_mb(model, provider: provider, instance: instance) floor = memory_floor_mb fits = (cost + floor) <= available unless fits log.info("[llm][memory_gate] rejected model=#{model} provider=#{provider}/#{instance} " \ "cost_mb=#{cost} available_mb=#{available} floor_mb=#{floor}") end fits end |
.available_memory_mb ⇒ Object
31 32 33 34 35 36 |
# File 'lib/legion/llm/discovery/memory_gate.rb', line 31 def available_memory_mb Discovery::System.available_memory_mb rescue StandardError => e handle_exception(e, level: :debug, handled: true, operation: 'memory_gate.available') 4096 end |
.estimated_model_mb(model, provider: nil, instance: nil) ⇒ Object
38 39 40 41 42 43 44 45 |
# File 'lib/legion/llm/discovery/memory_gate.rb', line 38 def estimated_model_mb(model, provider: nil, instance: nil) file_mb = Discovery.model_size(model.to_s, provider: provider&.to_sym, instance: instance&.to_sym) file_mb = file_mb.to_i / (1024 * 1024) if file_mb && file_mb > 1_000_000 return 4096 unless file_mb&.positive? overhead = Legion::LLM::Settings.value(:discovery, :memory_overhead_factor) || 1.4 (file_mb * overhead).ceil end |