Class: Legion::Data::Extract::Handlers::Vtt

Inherits:
Base
  • Object
show all
Defined in:
lib/legion/data/extract/handlers/vtt.rb

Constant Summary collapse

TIMESTAMP_PATTERN =
/^\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}/
SPEAKER_TAG_PATTERN =
/^<v ([^>]+)>(.*)$/

Class Method Summary collapse

Methods inherited from Base

available?, for_type, inherited, register, supported_types

Methods included from Logging::Helper

#handle_exception

Class Method Details

.extensionsObject



12
# File 'lib/legion/data/extract/handlers/vtt.rb', line 12

def self.extensions = %w[.vtt]

.extract(source, preserve_speakers: true) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/legion/data/extract/handlers/vtt.rb', line 15

def self.extract(source, preserve_speakers: true)
  content = if source.respond_to?(:read)
              source.read
            elsif source.is_a?(String) && source.include?("\n")
              source
            else
              File.read(source.to_s)
            end
  lines = parse_vtt(content, preserve_speakers: preserve_speakers)
  text = lines.join("\n")
  speakers = extract_speakers(content)
  {
    text:     text,
    metadata: {
      bytes:      content.bytesize,
      speakers:   speakers,
      line_count: lines.size
    }
  }
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: :extract_vtt)
  { text: nil, error: e.message }
end

.gem_nameObject



13
# File 'lib/legion/data/extract/handlers/vtt.rb', line 13

def self.gem_name = nil

.typeObject



11
# File 'lib/legion/data/extract/handlers/vtt.rb', line 11

def self.type = :vtt