Class: SmartPrompt::TTSAdapter

Inherits:
LLMAdapter show all
Defined in:
lib/smart_prompt/tts_adapter.rb

Constant Summary collapse

PREDEFINED_VOICES =

Predefined voice options

{
  "alloy" => "沉稳男声alex",
  "echo" => "温柔女声claire",
  "fable" => "活泼女声fable",
  "onyx" => "磁性男声onyx",
  "nova" => "甜美女声nova",
  "shimmer" => "优雅女声shimmer"
}
SUPPORTED_LANGUAGES =

Supported languages

%w[zh en ja ko]
SUPPORTED_FORMATS =

Supported output formats

%w[mp3 opus wav pcm]

Instance Attribute Summary

Attributes inherited from LLMAdapter

#last_response

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ TTSAdapter

Returns a new instance of TTSAdapter.



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/smart_prompt/tts_adapter.rb', line 24

def initialize(config)
  super
  api_key = @config["api_key"]
  if api_key.is_a?(String) && api_key.start_with?("ENV[") && api_key.end_with?("]")
    api_key = eval(api_key)
  end
  begin
    @client = OpenAI::Client.new(
      access_token: api_key,
      uri_base: @config["url"],
      request_timeout: 120,
    )
  rescue OpenAI::ConfigurationError => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "Invalid TTS configuration: #{e.message}"
  rescue OpenAI::Error => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "TTS authentication failed: #{e.message}"
  rescue SocketError => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise LLMAPIError, "Network error: Unable to connect to TTS API"
  rescue => e
    SmartPrompt.logger.error "Failed to initialize TTS client: #{e.message}"
    raise Error, "Unexpected error initializing TTS client: #{e.message}"
  ensure
    SmartPrompt.logger.info "Successfully created a TTS client."
  end
end

Instance Method Details

#available_voicesObject

Get available voices



143
144
145
# File 'lib/smart_prompt/tts_adapter.rb', line 143

def available_voices
  PREDEFINED_VOICES.dup
end

#create_custom_voice(name, reference_audio_file, description: nil) ⇒ Object

Create custom voice from reference audio



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/smart_prompt/tts_adapter.rb', line 148

def create_custom_voice(name, reference_audio_file, description: nil)
  SmartPrompt.logger.info "TTSAdapter: Creating custom voice"

  begin
    unless File.exist?(reference_audio_file)
      raise Error, "Reference audio file not found: #{reference_audio_file}"
    end

    # Check audio file size (should be less than 30 seconds)
    file_size = File.size(reference_audio_file)
    if file_size > 5 * 1024 * 1024 # 5MB limit
      raise Error, "Reference audio file too large (max 5MB)"
    end

    # Convert audio to base64
    audio_data = File.binread(reference_audio_file)
    base64_audio = Base64.strict_encode64(audio_data)

    parameters = {
      name: name,
      audio: base64_audio
    }

    parameters[:description] = description if description

    SmartPrompt.logger.info "Creating custom voice: #{name}"

    # Custom implementation for voice creation
    response = create_custom_voice_request(parameters)

    @last_response = response

    if response["voice_id"]
      voice_data = {
        voice_id: response["voice_id"],
        name: response["name"],
        status: response["status"],
        created_at: response["created_at"]
      }

      SmartPrompt.logger.info "Custom voice created successfully: #{voice_data[:voice_id]}"
      return voice_data
    else
      SmartPrompt.logger.error "Failed to create custom voice"
      raise LLMAPIError, "Failed to create custom voice"
    end

  rescue => e
    SmartPrompt.logger.error "Error creating custom voice: #{e.message}"
    raise Error, "Error creating custom voice: #{e.message}"
  end
end

#delete_custom_voice(voice_id) ⇒ Object

Delete custom voice



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/smart_prompt/tts_adapter.rb', line 235

def delete_custom_voice(voice_id)
  SmartPrompt.logger.info "TTSAdapter: Deleting custom voice"

  begin
    response = delete_custom_voice_request(voice_id)

    @last_response = response

    if response["deleted"]
      SmartPrompt.logger.info "Custom voice deleted successfully: #{voice_id}"
      return { deleted: true, voice_id: voice_id }
    else
      SmartPrompt.logger.error "Failed to delete custom voice"
      raise LLMAPIError, "Failed to delete custom voice"
    end

  rescue => e
    SmartPrompt.logger.error "Error deleting custom voice: #{e.message}"
    raise Error, "Error deleting custom voice: #{e.message}"
  end
end

#list_custom_voicesObject

List custom voices



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/smart_prompt/tts_adapter.rb', line 202

def list_custom_voices
  SmartPrompt.logger.info "TTSAdapter: Listing custom voices"

  begin
    response = list_custom_voices_request

    @last_response = response

    if response["voices"]
      voices = response["voices"].map do |voice|
        {
          voice_id: voice["id"],
          name: voice["name"],
          description: voice["description"],
          status: voice["status"],
          created_at: voice["created_at"]
        }
      end

      SmartPrompt.logger.info "Found #{voices.size} custom voices"
      return voices
    else
      SmartPrompt.logger.error "No custom voices found"
      return []
    end

  rescue => e
    SmartPrompt.logger.error "Error listing custom voices: #{e.message}"
    raise Error, "Error listing custom voices: #{e.message}"
  end
end

#synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ Object

Text-to-speech synthesis



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/smart_prompt/tts_adapter.rb', line 54

def synthesize_speech(text, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
  SmartPrompt.logger.info "TTSAdapter: Synthesizing speech from text"

  model_name = model || @config["model"]

  # Validate parameters
  validate_tts_parameters(text, voice, speed, response_format, language)

  begin
    # Map voice name if it's a predefined voice
    voice_name = PREDEFINED_VOICES[voice] || voice

    parameters = {
      model: model_name,
      input: text,
      voice: voice_name,
      speed: speed,
      response_format: response_format
    }

    # Add language parameter if specified
    parameters[:language] = language if language

    SmartPrompt.logger.info "TTS parameters: #{parameters.except(:input)}"

    # Custom implementation for TTS since OpenAI gem doesn't support audio endpoints
    response = submit_tts_request(parameters)

    @last_response = response

    # Process response
    if response.is_a?(String) && response.start_with?("data:audio/")
      # Base64 encoded audio data
      audio_data = {
        audio_data: response,
        format: response_format,
        text_length: text.length,
        voice: voice_name
      }

      SmartPrompt.logger.info "TTS synthesis successful, generated #{text.length} characters"
      return audio_data
    else
      SmartPrompt.logger.error "Invalid TTS response format"
      raise LLMAPIError, "Invalid TTS response format"
    end

  rescue OpenAI::Error => e
    SmartPrompt.logger.error "TTS API error: #{e.message}"
    raise LLMAPIError, "TTS API error: #{e.message}"
  rescue => e
    SmartPrompt.logger.error "Unexpected error during TTS synthesis: #{e.message}"
    raise Error, "Unexpected error during TTS synthesis: #{e.message}"
  end
end

#synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil) ⇒ Object

Synthesize speech and save to file



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/smart_prompt/tts_adapter.rb', line 111

def synthesize_to_file(text, output_path, voice: "alloy", model: nil, speed: 1.0, response_format: "mp3", language: nil)
  SmartPrompt.logger.info "TTSAdapter: Synthesizing speech to file"

  begin
    # Synthesize speech
    audio_data = synthesize_speech(
      text,
      voice: voice,
      model: model,
      speed: speed,
      response_format: response_format,
      language: language
    )

    # Save to file
    save_audio_to_file(audio_data[:audio_data], output_path, response_format)

    SmartPrompt.logger.info "TTS audio saved to: #{output_path}"
    return {
      file_path: output_path,
      text_length: audio_data[:text_length],
      voice: audio_data[:voice],
      format: response_format
    }

  rescue => e
    SmartPrompt.logger.error "Error synthesizing to file: #{e.message}"
    raise Error, "Error synthesizing to file: #{e.message}"
  end
end