Class: SmartPrompt::MultimodalAdapter
- Inherits:
-
LLMAdapter
- Object
- LLMAdapter
- SmartPrompt::MultimodalAdapter
- Defined in:
- lib/smart_prompt/multimodal_adapter.rb
Constant Summary collapse
- SUPPORTED_IMAGE_FORMATS =
%w[jpg jpeg png gif bmp webp]
- SUPPORTED_VIDEO_FORMATS =
%w[mp4 mov avi mkv webm]
Instance Attribute Summary
Attributes inherited from LLMAdapter
Instance Method Summary collapse
-
#analyze_image(image_input, prompt, model = nil, detail: "auto", max_tokens: nil) ⇒ Object
Analyze image with text prompt.
-
#analyze_multiple_images(images, prompt, model = nil, detail: "auto") ⇒ Object
Multi-image analysis.
-
#analyze_video(video_input, prompt, model = nil, max_frames: 10, fps: 1, detail: "auto") ⇒ Object
Analyze video with text prompt.
-
#initialize(config) ⇒ MultimodalAdapter
constructor
A new instance of MultimodalAdapter.
- #send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil) ⇒ Object
Constructor Details
#initialize(config) ⇒ MultimodalAdapter
Returns a new instance of MultimodalAdapter.
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/smart_prompt/multimodal_adapter.rb', line 9 def initialize(config) super api_key = @config["api_key"] if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]") api_key = eval(api_key) end begin @client = OpenAI::Client.new( access_token: api_key, uri_base: @config["url"], request_timeout: 240, ) rescue OpenAI::ConfigurationError => e SmartPrompt.logger.error "Failed to initialize Multimodal client: #{e.}" raise LLMAPIError, "Invalid Multimodal configuration: #{e.}" rescue OpenAI::Error => e SmartPrompt.logger.error "Failed to initialize Multimodal client: #{e.}" raise LLMAPIError, "Multimodal authentication failed: #{e.}" rescue SocketError => e SmartPrompt.logger.error "Failed to initialize Multimodal client: #{e.}" raise LLMAPIError, "Network error: Unable to connect to Multimodal API" rescue => e SmartPrompt.logger.error "Failed to initialize Multimodal client: #{e.}" raise Error, "Unexpected error initializing Multimodal client: #{e.}" ensure SmartPrompt.logger.info "Successfully created a Multimodal client." end end |
Instance Method Details
#analyze_image(image_input, prompt, model = nil, detail: "auto", max_tokens: nil) ⇒ Object
Analyze image with text prompt
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/smart_prompt/multimodal_adapter.rb', line 92 def analyze_image(image_input, prompt, model = nil, detail: "auto", max_tokens: nil) SmartPrompt.logger.info "MultimodalAdapter: Analyzing image" = [ { role: "user", content: [ { type: "text", text: prompt }, { type: "image_url", image_url: prepare_image_input(image_input, detail) } ] } ] model_name = model || @config["model"] parameters = { model: model_name, messages: , temperature: @config["temperature"] || 0.7, } parameters[:max_tokens] = max_tokens if max_tokens response = @client.chat(parameters: parameters) @last_response = response response.dig("choices", 0, "message", "content") end |
#analyze_multiple_images(images, prompt, model = nil, detail: "auto") ⇒ Object
Multi-image analysis
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/smart_prompt/multimodal_adapter.rb', line 145 def analyze_multiple_images(images, prompt, model = nil, detail: "auto") SmartPrompt.logger.info "MultimodalAdapter: Analyzing multiple images" content = [{ type: "text", text: prompt }] images.each do |image_input| content << { type: "image_url", image_url: prepare_image_input(image_input, detail) } end = [{ role: "user", content: content }] model_name = model || @config["model"] response = @client.chat(parameters: { model: model_name, messages: , temperature: @config["temperature"] || 0.7, }) @last_response = response response.dig("choices", 0, "message", "content") end |
#analyze_video(video_input, prompt, model = nil, max_frames: 10, fps: 1, detail: "auto") ⇒ Object
Analyze video with text prompt
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/smart_prompt/multimodal_adapter.rb', line 120 def analyze_video(video_input, prompt, model = nil, max_frames: 10, fps: 1, detail: "auto") SmartPrompt.logger.info "MultimodalAdapter: Analyzing video" = [ { role: "user", content: [ { type: "text", text: prompt }, { type: "video_url", video_url: prepare_video_input(video_input, max_frames, fps, detail) } ] } ] model_name = model || @config["model"] response = @client.chat(parameters: { model: model_name, messages: , temperature: @config["temperature"] || 0.7, }) @last_response = response response.dig("choices", 0, "message", "content") end |
#send_request(messages, model = nil, temperature = 0.7, tools = nil, proc = nil) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/smart_prompt/multimodal_adapter.rb', line 38 def send_request(, model = nil, temperature = 0.7, tools = nil, proc = nil) SmartPrompt.logger.info "MultimodalAdapter: Sending multimodal request" # Process messages to handle multimodal content = () temperature = 0.7 if temperature.nil? model_name = model || @config["model"] SmartPrompt.logger.info "MultimodalAdapter: Using model #{model_name}" begin parameters = { model: model_name, messages: , temperature: @config["temperature"] || temperature, } if proc parameters[:stream] = proc end if tools parameters[:tools] = tools end SmartPrompt.logger.info "Send parameters is: #{parameters}" response = @client.chat(parameters: parameters) rescue OpenAI::Error => e SmartPrompt.logger.error "Multimodal API error: #{e.}" raise LLMAPIError, "Multimodal API error: #{e.}" rescue OpenAI::MiddlewareErrors => e SmartPrompt.logger.error "Multimodal HTTP Error: #{e.}" raise LLMAPIError, "Multimodal HTTP Error" rescue JSON::ParserError => e SmartPrompt.logger.error "Failed to parse Multimodal API response" raise LLMAPIError, "Failed to parse Multimodal API response" rescue => e SmartPrompt.logger.error "Unexpected error during Multimodal request: #{e.}" raise Error, "Unexpected error during Multimodal request: #{e.}" ensure SmartPrompt.logger.info "Successfully sent multimodal message" end SmartPrompt.logger.info "MultimodalAdapter: Received response from Multimodal API" if proc.nil? @last_response = response return response.dig("choices", 0, "message", "content") end end |