Class: SmartPrompt::STTAdapter

Inherits:

LLMAdapter

Object
LLMAdapter
SmartPrompt::STTAdapter

show all

Defined in:: lib/smart_prompt/stt_adapter.rb

Constant Summary collapse

SUPPORTED_AUDIO_FORMATS = Supported audio formats

%w[mp3 mp4 mpeg mpga m4a wav webm]

SUPPORTED_LANGUAGES = Supported languages for speech recognition

%w[zh en ja ko]

MAX_FILE_SIZE = Maximum file size (25MB)

25 * 1024 * 1024

Instance Attribute Summary

Attributes inherited from LLMAdapter

#last_response

Instance Method Summary collapse

#detect_language(text) ⇒ Object

Language detection (basic implementation).
#get_audio_info(audio_file) ⇒ Object

Get audio file information.
#initialize(config) ⇒ STTAdapter constructor

A new instance of STTAdapter.
#transcribe_audio(audio_file, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json") ⇒ Object

Speech-to-text transcription.
#transcribe_audio_url(audio_url, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json") ⇒ Object

Transcribe audio from URL.
#transcribe_batch(audio_files, model: nil, language: nil, prompt: nil, temperature: 0.0) ⇒ Object

Batch transcription.

Constructor Details

#initialize(config) ⇒ `STTAdapter`

Returns a new instance of STTAdapter.

# File 'lib/smart_prompt/stt_adapter.rb', line 17

def initialize(config)
  super
  api_key = @config["api_key"]
  if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
    api_key = eval(api_key)
  end
  begin
    @client = OpenAI::Client.new(
      access_token: api_key,
      uri_base: @config["url"],
      request_timeout: 120,
    )
  rescue OpenAI::ConfigurationError => e
    SmartPrompt.logger.error "Failed to initialize STT client: #{e.message}"
    raise LLMAPIError, "Invalid STT configuration: #{e.message}"
  rescue OpenAI::Error => e
    SmartPrompt.logger.error "Failed to initialize STT client: #{e.message}"
    raise LLMAPIError, "STT authentication failed: #{e.message}"
  rescue SocketError => e
    SmartPrompt.logger.error "Failed to initialize STT client: #{e.message}"
    raise LLMAPIError, "Network error: Unable to connect to STT API"
  rescue => e
    SmartPrompt.logger.error "Failed to initialize STT client: #{e.message}"
    raise Error, "Unexpected error initializing STT client: #{e.message}"
  ensure
    SmartPrompt.logger.info "Successfully created an STT client."
  end
end

Instance Method Details

#detect_language(text) ⇒ `Object`

Language detection (basic implementation)

# File 'lib/smart_prompt/stt_adapter.rb', line 231

def detect_language(text)
  SmartPrompt.logger.info "STTAdapter: Detecting language from text"

  # Simple language detection based on character ranges
  if text =~ /[\u4e00-\u9fff]/
    "zh"
  elsif text =~ /[\u3040-\u309f\u30a0-\u30ff]/
    "ja"
  elsif text =~ /[\uac00-\ud7af]/
    "ko"
  else
    "en"
  end
end

#get_audio_info(audio_file) ⇒ `Object`

Get audio file information

# File 'lib/smart_prompt/stt_adapter.rb', line 193

def get_audio_info(audio_file)
  SmartPrompt.logger.info "STTAdapter: Getting audio file information"

  begin
    unless File.exist?(audio_file)
      raise Error, "Audio file not found: #{audio_file}"
    end

    file_ext = File.extname(audio_file).downcase.delete(".")
    unless SUPPORTED_AUDIO_FORMATS.include?(file_ext)
      raise Error, "Unsupported audio format: #{file_ext}"
    end

    file_size = File.size(audio_file)
    if file_size > MAX_FILE_SIZE
      raise Error, "Audio file too large (max #{MAX_FILE_SIZE / (1024 * 1024)}MB)"
    end

    # Estimate duration (rough calculation)
    # Note: This is a simplified estimation, actual duration may vary
    duration = estimate_audio_duration(file_size, file_ext)

    {
      file_path: audio_file,
      file_name: File.basename(audio_file),
      file_size: file_size,
      format: file_ext,
      estimated_duration: duration,
      supported: true
    }

  rescue => e
    SmartPrompt.logger.error "Error getting audio info: #{e.message}"
    raise Error, "Error getting audio info: #{e.message}"
  end
end

#transcribe_audio(audio_file, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json") ⇒ `Object`

Speech-to-text transcription

# File 'lib/smart_prompt/stt_adapter.rb', line 47

def transcribe_audio(audio_file, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json")
  SmartPrompt.logger.info "STTAdapter: Transcribing audio to text"

  model_name = model || @config["model"]

  # Validate parameters
  validate_stt_parameters(audio_file, language, response_format)

  begin
    # Prepare audio file
    audio_data = prepare_audio_file(audio_file)

    parameters = {
      model: model_name,
      file: audio_data[:file],
      temperature: temperature,
      response_format: response_format
    }

    # Add optional parameters
    parameters[:language] = language if language
    parameters[:prompt] = prompt if prompt

    SmartPrompt.logger.info "STT parameters: #{parameters.except(:file)}"

    # Custom implementation for STT since OpenAI gem doesn't support audio transcription endpoints
    response = submit_stt_request(parameters)

    @last_response = response

    # Process response
    if response["text"]
      transcription_data = {
        text: response["text"],
        language: language,
        duration: audio_data[:duration],
        file_size: audio_data[:file_size],
        format: audio_data[:format]
      }

      SmartPrompt.logger.info "STT transcription successful, transcribed #{response['text'].length} characters"
      return transcription_data
    else
      SmartPrompt.logger.error "No text in STT response"
      raise LLMAPIError, "No text in STT response"
    end

  rescue OpenAI::Error => e
    SmartPrompt.logger.error "STT API error: #{e.message}"
    raise LLMAPIError, "STT API error: #{e.message}"
  rescue => e
    SmartPrompt.logger.error "Unexpected error during STT transcription: #{e.message}"
    raise Error, "Unexpected error during STT transcription: #{e.message}"
  end
end

#transcribe_audio_url(audio_url, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json") ⇒ `Object`

Transcribe audio from URL

# File 'lib/smart_prompt/stt_adapter.rb', line 104

def transcribe_audio_url(audio_url, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json")
  SmartPrompt.logger.info "STTAdapter: Transcribing audio from URL"

  model_name = model || @config["model"]

  begin
    parameters = {
      model: model_name,
      audio_url: audio_url,
      temperature: temperature,
      response_format: response_format
    }

    # Add optional parameters
    parameters[:language] = language if language
    parameters[:prompt] = prompt if prompt

    SmartPrompt.logger.info "STT URL parameters: #{parameters}"

    # Custom implementation for URL-based STT
    response = submit_stt_url_request(parameters)

    @last_response = response

    if response["text"]
      transcription_data = {
        text: response["text"],
        language: language,
        audio_url: audio_url
      }

      SmartPrompt.logger.info "STT URL transcription successful, transcribed #{response['text'].length} characters"
      return transcription_data
    else
      SmartPrompt.logger.error "No text in STT URL response"
      raise LLMAPIError, "No text in STT URL response"
    end

  rescue => e
    SmartPrompt.logger.error "Error in URL transcription: #{e.message}"
    raise Error, "Error in URL transcription: #{e.message}"
  end
end

#transcribe_batch(audio_files, model: nil, language: nil, prompt: nil, temperature: 0.0) ⇒ `Object`

Batch transcription

# File 'lib/smart_prompt/stt_adapter.rb', line 149

def transcribe_batch(audio_files, model: nil, language: nil, prompt: nil, temperature: 0.0)
  SmartPrompt.logger.info "STTAdapter: Batch transcribing #{audio_files.size} audio files"

  results = []

  audio_files.each_with_index do |audio_file, index|
    begin
      SmartPrompt.logger.info "Transcribing file #{index + 1}/#{audio_files.size}: #{File.basename(audio_file)}"

      result = transcribe_audio(
        audio_file,
        model: model,
        language: language,
        prompt: prompt,
        temperature: temperature
      )

      results << {
        file: audio_file,
        index: index,
        transcription: result,
        success: true
      }

    rescue => e
      SmartPrompt.logger.error "Failed to transcribe #{audio_file}: #{e.message}"
      results << {
        file: audio_file,
        index: index,
        error: e.message,
        success: false
      }
    end
  end

  {
    total_files: audio_files.size,
    successful: results.count { |r| r[:success] },
    failed: results.count { |r| !r[:success] },
    results: results
  }
end

Class: SmartPrompt::STTAdapter

Constant Summary collapse

Instance Attribute Summary

Attributes inherited from LLMAdapter

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ STTAdapter

Instance Method Details

#detect_language(text) ⇒ Object

#get_audio_info(audio_file) ⇒ Object

#transcribe_audio(audio_file, model: nil, language: nil, prompt: nil, temperature: 0.0, response_format: "json") ⇒ Object