Class: SmartPrompt::TTSAdapter

Inherits:

LLMAdapter

Object
LLMAdapter
SmartPrompt::TTSAdapter

show all

Defined in:: lib/smart_prompt/tts_adapter.rb

Constant Summary collapse

PREDEFINED_VOICES = Predefined voice options

{
  "alloy" => "沉稳男声alex",
  "echo" => "温柔女声claire",
  "fable" => "活泼女声fable",
  "onyx" => "磁性男声onyx",
  "nova" => "甜美女声nova",
  "shimmer" => "优雅女声shimmer"
}

SUPPORTED_LANGUAGES = Supported languages

%w[zh en ja ko]

SUPPORTED_FORMATS = Supported output formats

%w[mp3 opus wav pcm]

Instance Attribute Summary

Attributes inherited from LLMAdapter

#last_response

Instance Method Summary collapse

#available_voices ⇒ Object

Get available voices.
#create_custom_voice(name, reference_audio_file, description: nil) ⇒ Object

Create custom voice from reference audio.
#delete_custom_voice(voice_id) ⇒ Object

Delete custom voice.
#initialize(config) ⇒ TTSAdapter constructor

A new instance of TTSAdapter.
#list_custom_voices ⇒ Object

List custom voices.
#synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ Object

Text-to-speech synthesis.
#synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ Object

Synthesize speech and save to file.

Constructor Details

#initialize(config) ⇒ `TTSAdapter`

Returns a new instance of TTSAdapter.

# File 'lib/smart_prompt/tts_adapter.rb', line 24

def initialize(config)
  super
  api_key = @config["api_key"]
  if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
    api_key = eval(api_key)
  end
  begin
    @client = OpenAI::Client.new(
      access_token: api_key,
      uri_base: @config["url"],
      request_timeout: 120,
    )
  rescue OpenAI::ConfigurationError => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "Invalid TTS configuration: #{e.message}"
  rescue OpenAI::Error => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "TTS authentication failed: #{e.message}"
  rescue SocketError => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "Network error: Unable to connect to TTS API"
  rescue => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise Error, "Unexpected error initializing TTS client: #{e.message}"
  ensure
    SmartPrompt.logger.info "Successfully created a TTS client."
  end
end

Instance Method Details

#available_voices ⇒ `Object`

Get available voices



143
144
145

# File 'lib/smart_prompt/tts_adapter.rb', line 143

def available_voices
  PREDEFINED_VOICES.dup
end

#create_custom_voice(name, reference_audio_file, description: nil) ⇒ `Object`

Create custom voice from reference audio

# File 'lib/smart_prompt/tts_adapter.rb', line 148

def create_custom_voice(name, reference_audio_file, description: nil)
  SmartPrompt.logger.info "TTSAdapter: Creating custom voice"

  begin
    unless File.exist?(reference_audio_file)
      raise Error, "Reference audio file not found: #{reference_audio_file}"
    end

    # Check audio file size (should be less than 30 seconds)
    file_size = File.size(reference_audio_file)
    if file_size > 5 * 1024 * 1024 # 5MB limit
      raise Error, "Reference audio file too large (max 5MB)"
    end

    # Convert audio to base64
    audio_data = File.binread(reference_audio_file)
    base64_audio = Base64.strict_encode64(audio_data)

    parameters = {
      name: name,
      audio: base64_audio
    }

    parameters[:description] = description if description

    SmartPrompt.logger.info "Creating custom voice: #{name}"

    # Custom implementation for voice creation
    response = create_custom_voice_request(parameters)

    @last_response = response

    if response["voice_id"]
      voice_data = {
        voice_id: response["voice_id"],
        name: response["name"],
        status: response["status"],
        created_at: response["created_at"]
      }

      SmartPrompt.logger.info "Custom voice created successfully: #{voice_data[:voice_id]}"
      return voice_data
    else
      SmartPrompt.logger.error "Failed to create custom voice"
      raise LLMAPIError, "Failed to create custom voice"
    end

  rescue => e
    SmartPrompt.logger.error "Error creating custom voice: #{e.message}"
    raise Error, "Error creating custom voice: #{e.message}"
  end
end

#delete_custom_voice(voice_id) ⇒ `Object`

Delete custom voice

# File 'lib/smart_prompt/tts_adapter.rb', line 235

def delete_custom_voice(voice_id)
  SmartPrompt.logger.info "TTSAdapter: Deleting custom voice"

  begin
    response = delete_custom_voice_request(voice_id)

    @last_response = response

    if response["deleted"]
      SmartPrompt.logger.info "Custom voice deleted successfully: #{voice_id}"
      return { deleted: true, voice_id: voice_id }
    else
      SmartPrompt.logger.error "Failed to delete custom voice"
      raise LLMAPIError, "Failed to delete custom voice"
    end

  rescue => e
    SmartPrompt.logger.error "Error deleting custom voice: #{e.message}"
    raise Error, "Error deleting custom voice: #{e.message}"
  end
end

#list_custom_voices ⇒ `Object`

List custom voices

# File 'lib/smart_prompt/tts_adapter.rb', line 202

def list_custom_voices
  SmartPrompt.logger.info "TTSAdapter: Listing custom voices"

  begin
    response = list_custom_voices_request

    @last_response = response

    if response["voices"]
      voices = response["voices"].map do |voice|
        {
          voice_id: voice["id"],
          name: voice["name"],
          description: voice["description"],
          status: voice["status"],
          created_at: voice["created_at"]
        }
      end

      SmartPrompt.logger.info "Found #{voices.size} custom voices"
      return voices
    else
      SmartPrompt.logger.error "No custom voices found"
      return []
    end

  rescue => e
    SmartPrompt.logger.error "Error listing custom voices: #{e.message}"
    raise Error, "Error listing custom voices: #{e.message}"
  end
end

#synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ `Object`

Text-to-speech synthesis

# File 'lib/smart_prompt/tts_adapter.rb', line 54

def synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
  SmartPrompt.logger.info "TTSAdapter: Synthesizing speech from text"

  model_name = model || @config["model"]

  # Validate parameters
  validate_tts_parameters(text, voice, speed, response_format, language)

  begin
    # Map voice name if it's a predefined voice
    voice_name = PREDEFINED_VOICES[voice] || voice

    parameters = {
      model: model_name,
      input: text,
      voice: voice_name,
      speed: speed,
      response_format: response_format
    }

    # Add language parameter if specified
    parameters[:language] = language if language

    SmartPrompt.logger.info "TTS parameters: #{parameters.except(:input)}"

    # Custom implementation for TTS since OpenAI gem doesn't support audio endpoints
    response = submit_tts_request(parameters)

    @last_response = response

    # Process response
    if response.is_a?(String) && response.start_with?("data:audio/")
      # Base64 encoded audio data
      audio_data = {
        audio_data: response,
        format: response_format,
        text_length: text.length,
        voice: voice_name
      }

      SmartPrompt.logger.info "TTS synthesis successful, generated #{text.length} characters"
      return audio_data
    else
      SmartPrompt.logger.error "Invalid TTS response format"
      raise LLMAPIError, "Invalid TTS response format"
    end

  rescue OpenAI::Error => e
    SmartPrompt.logger.error "TTS API error: #{e.message}"
    raise LLMAPIError, "TTS API error: #{e.message}"
  rescue => e
    SmartPrompt.logger.error "Unexpected error during TTS synthesis: #{e.message}"
    raise Error, "Unexpected error during TTS synthesis: #{e.message}"
  end
end

#synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ `Object`

Synthesize speech and save to file

# File 'lib/smart_prompt/tts_adapter.rb', line 111

def synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
  SmartPrompt.logger.info "TTSAdapter: Synthesizing speech to file"

  begin
    # Synthesize speech
    audio_data = synthesize_speech(
      text,
      voice: voice,
      model: model,
      speed: speed,
      response_format: response_format,
      language: language
    )

    # Save to file
    save_audio_to_file(audio_data[:audio_data], output_path, response_format)

    SmartPrompt.logger.info "TTS audio saved to: #{output_path}"
    return {
      file_path: output_path,
      text_length: audio_data[:text_length],
      voice: audio_data[:voice],
      format: response_format
    }

  rescue => e
    SmartPrompt.logger.error "Error synthesizing to file: #{e.message}"
    raise Error, "Error synthesizing to file: #{e.message}"
  end
end

Class: SmartPrompt::TTSAdapter

Constant Summary collapse

Instance Attribute Summary

Attributes inherited from LLMAdapter

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ TTSAdapter

Instance Method Details

#available_voices ⇒ Object

#create_custom_voice(name, reference_audio_file, description: nil) ⇒ Object

#delete_custom_voice(voice_id) ⇒ Object

#list_custom_voices ⇒ Object

#synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ Object