Class: RubyLLM::Agents::TranscriptionResult

Inherits:
Object
  • Object
show all
Includes:
Trackable
Defined in:
lib/ruby_llm/agents/results/transcription_result.rb

Overview

Result object for transcription operations

Wraps transcription output with metadata about the operation including audio duration, timing, cost, and utility methods for output formatting.

Examples:

Basic transcription

result = MeetingTranscriber.call(audio: "meeting.mp3")
result.text           # => "Hello everyone..."
result.audio_duration # => 60.5
result.total_cost     # => 0.006

With segments

result = SubtitleTranscriber.call(audio: "video.mp4")
result.segments       # => [{ start: 0.0, end: 2.5, text: "Hello" }, ...]
result.srt            # => "1\n00:00:00,000 --> 00:00:02,500\nHello\n\n..."
result.vtt            # => "WEBVTT\n\n00:00:00.000 --> 00:00:02.500\nHello\n\n..."

Speaker diarization

result = InterviewTranscriber.call(audio: "interview.mp3")
result.speakers          # => ["Interviewer", "Guest"]
result.speaker_segments  # => { "Interviewer" => [...], "Guest" => [...] }

Content collapse

Speaker Diarization collapse

Audio Metadata collapse

Language collapse

Model Info collapse

Timing collapse

Cost & Usage collapse

Quality collapse

Status collapse

Multi-tenancy collapse

Error collapse

Execution Record collapse

Instance Method Summary collapse

Methods included from Trackable

included

Constructor Details

#initialize(attributes = {}) ⇒ TranscriptionResult

Creates a new TranscriptionResult instance

Parameters:

  • attributes (Hash) (defaults to: {})

    Result attributes

Options Hash (attributes):

  • :text (String)

    The transcription text

  • :segments (Array<Hash>)

    Timed segments

  • :words (Array<Hash>)

    Timed words

  • :speakers (Array<String>)

    Speaker names

  • :speaker_segments (Hash)

    Segments by speaker

  • :audio_duration (Float)

    Audio duration in seconds

  • :audio_format (String)

    Audio format

  • :audio_channels (Integer)

    Number of channels

  • :audio_sample_rate (Integer)

    Sample rate in Hz

  • :language (String)

    Requested language

  • :detected_language (String)

    Detected language

  • :language_confidence (Float)

    Language confidence

  • :model_id (String)

    Model used

  • :duration_ms (Integer)

    Execution duration

  • :started_at (Time)

    Start time

  • :completed_at (Time)

    Completion time

  • :total_cost (Float)

    Cost in USD

  • :audio_minutes (Float)

    Billable minutes

  • :confidence (Float)

    Overall confidence

  • :status (Symbol)

    Status

  • :chunks (Array)

    Chunk results

  • :tenant_id (String)

    Tenant identifier

  • :error_class (String)

    Error class

  • :error_message (String)

    Error message



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 206

def initialize(attributes = {})
  # Content
  @text = attributes[:text]
  @segments = attributes[:segments]
  @words = attributes[:words]

  # Speaker diarization
  @speakers = attributes[:speakers]
  @speaker_segments = attributes[:speaker_segments]

  # Audio metadata
  @audio_duration = attributes[:audio_duration]
  @audio_format = attributes[:audio_format]
  @audio_channels = attributes[:audio_channels]
  @audio_sample_rate = attributes[:audio_sample_rate]

  # Language
  @language = attributes[:language]
  @detected_language = attributes[:detected_language]
  @language_confidence = attributes[:language_confidence]

  # Model info
  @model_id = attributes[:model_id]

  # Timing
  @duration_ms = attributes[:duration_ms]
  @started_at = attributes[:started_at]
  @completed_at = attributes[:completed_at]

  # Cost & usage
  @total_cost = attributes[:total_cost]
  @audio_minutes = attributes[:audio_minutes] || (audio_duration ? audio_duration / 60.0 : nil)

  # Quality
  @confidence = attributes[:confidence]

  # Status
  @status = attributes[:status] || :success
  @chunks = attributes[:chunks]

  # Multi-tenancy
  @tenant_id = attributes[:tenant_id]

  # Error
  @error_class = attributes[:error_class]
  @error_message = attributes[:error_message]

  # Execution record
  @execution_id = attributes[:execution_id]

  # Tracking
  @agent_class_name = attributes[:agent_class_name]
  register_with_tracker
end

Instance Attribute Details

#audio_channelsObject (readonly)



71
72
73
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 71

def audio_channels
  @audio_channels
end

#audio_durationObject (readonly)



63
64
65
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 63

def audio_duration
  @audio_duration
end

#audio_formatObject (readonly)



67
68
69
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 67

def audio_format
  @audio_format
end

#audio_minutesObject (readonly)



127
128
129
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 127

def audio_minutes
  @audio_minutes
end

#audio_sample_rateObject (readonly)



75
76
77
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 75

def audio_sample_rate
  @audio_sample_rate
end

#chunksObject (readonly)



147
148
149
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 147

def chunks
  @chunks
end

#completed_atObject (readonly)



115
116
117
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 115

def completed_at
  @completed_at
end

#confidenceObject (readonly)



135
136
137
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 135

def confidence
  @confidence
end

#detected_languageObject (readonly)



87
88
89
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 87

def detected_language
  @detected_language
end

#duration_msObject (readonly)



107
108
109
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 107

def duration_ms
  @duration_ms
end

#error_classObject (readonly)



163
164
165
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 163

def error_class
  @error_class
end

#error_messageObject (readonly)



167
168
169
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 167

def error_message
  @error_message
end

#execution_idObject (readonly)



175
176
177
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 175

def execution_id
  @execution_id
end

#languageObject (readonly)



83
84
85
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 83

def language
  @language
end

#language_confidenceObject (readonly)



91
92
93
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 91

def language_confidence
  @language_confidence
end

#model_idObject (readonly)



99
100
101
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 99

def model_id
  @model_id
end

#segmentsObject (readonly)



39
40
41
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 39

def segments
  @segments
end

#speaker_segmentsObject (readonly)



55
56
57
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 55

def speaker_segments
  @speaker_segments
end

#speakersObject (readonly)



51
52
53
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 51

def speakers
  @speakers
end

#started_atObject (readonly)



111
112
113
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 111

def started_at
  @started_at
end

#statusObject (readonly)



143
144
145
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 143

def status
  @status
end

#tenant_idObject (readonly)



155
156
157
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 155

def tenant_id
  @tenant_id
end

#textObject (readonly)



35
36
37
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 35

def text
  @text
end

#total_costObject (readonly)



123
124
125
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 123

def total_cost
  @total_cost
end

#wordsObject (readonly)



43
44
45
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 43

def words
  @words
end

Instance Method Details

#diarized?Boolean

Returns whether speaker diarization data is available

Returns:

  • (Boolean)

    true if speakers were identified



292
293
294
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 292

def diarized?
  speakers.present? && speakers.any?
end

#error?Boolean

Returns whether the transcription failed

Returns:

  • (Boolean)

    true if an error occurred



278
279
280
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 278

def error?
  !success?
end

#executionRubyLLM::Agents::Execution?

Loads the associated Execution record from the database

Returns:



264
265
266
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 264

def execution
  @execution ||= RubyLLM::Agents::Execution.find_by(id: execution_id) if execution_id
end

#partial?Boolean

Returns whether partial results are available

Returns:

  • (Boolean)

    true if status is :partial



285
286
287
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 285

def partial?
  status == :partial
end

#segment_at(timestamp) ⇒ Hash?

Returns the segment at a specific timestamp

Parameters:

  • timestamp (Float)

    Time in seconds

Returns:

  • (Hash, nil)

    The segment containing the timestamp



345
346
347
348
349
350
351
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 345

def segment_at(timestamp)
  return nil unless segments.present?

  segments.find do |segment|
    timestamp.between?(segment[:start], segment[:end])
  end
end

#srtString?

Returns the transcription as SRT subtitle format

Returns:

  • (String, nil)

    SRT formatted subtitles



299
300
301
302
303
304
305
306
307
308
309
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 299

def srt
  return nil unless segments.present?

  segments.each_with_index.map do |segment, index|
    start_time = format_srt_time(segment[:start])
    end_time = format_srt_time(segment[:end])
    text_content = segment[:text]&.strip

    "#{index + 1}\n#{start_time} --> #{end_time}\n#{text_content}\n"
  end.join("\n")
end

#success?Boolean

Returns whether the transcription succeeded

Returns:

  • (Boolean)

    true if no error occurred



271
272
273
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 271

def success?
  error_class.nil? && status == :success
end

#text_between(start_time, end_time) ⇒ String?

Returns text between two timestamps

Parameters:

  • start_time (Float)

    Start time in seconds

  • end_time (Float)

    End time in seconds

Returns:

  • (String, nil)

    Concatenated text from segments in range



358
359
360
361
362
363
364
365
366
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 358

def text_between(start_time, end_time)
  return nil unless segments.present?

  matching = segments.select do |segment|
    segment[:start] >= start_time && segment[:end] <= end_time
  end

  matching.map { |s| s[:text] }.join(" ")
end

#to_hHash

Converts the result to a hash

Returns:

  • (Hash)

    All result data as a hash



371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 371

def to_h
  {
    text: text,
    segments: segments,
    words: words,
    speakers: speakers,
    speaker_segments: speaker_segments,
    audio_duration: audio_duration,
    audio_format: audio_format,
    audio_channels: audio_channels,
    audio_sample_rate: audio_sample_rate,
    language: language,
    detected_language: detected_language,
    language_confidence: language_confidence,
    model_id: model_id,
    duration_ms: duration_ms,
    started_at: started_at,
    completed_at: completed_at,
    total_cost: total_cost,
    audio_minutes: audio_minutes,
    confidence: confidence,
    status: status,
    tenant_id: tenant_id,
    error_class: error_class,
    error_message: error_message,
    execution_id: execution_id
  }
end

#vttString?

Returns the transcription as WebVTT subtitle format

Returns:

  • (String, nil)

    VTT formatted subtitles



314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 314

def vtt
  return nil unless segments.present?

  lines = ["WEBVTT", ""]
  segments.each do |segment|
    start_time = format_vtt_time(segment[:start])
    end_time = format_vtt_time(segment[:end])
    text_content = segment[:text]&.strip

    lines << "#{start_time} --> #{end_time}"
    lines << text_content
    lines << ""
  end

  lines.join("\n")
end

#words_per_minuteFloat?

Returns calculated words per minute

Returns:

  • (Float, nil)

    Words per minute or nil if not calculable



334
335
336
337
338
339
# File 'lib/ruby_llm/agents/results/transcription_result.rb', line 334

def words_per_minute
  return nil unless text.present? && audio_duration.present? && audio_duration > 0

  word_count = text.split(/\s+/).count
  (word_count / (audio_duration / 60.0)).round(1)
end