Module: Legion::Data::Extract

Extended by:
Logging::Helper
Defined in:
lib/legion/data/extract.rb,
lib/legion/data/extract/handlers/csv.rb,
lib/legion/data/extract/handlers/pdf.rb,
lib/legion/data/extract/handlers/vtt.rb,
lib/legion/data/extract/handlers/base.rb,
lib/legion/data/extract/handlers/docx.rb,
lib/legion/data/extract/handlers/html.rb,
lib/legion/data/extract/handlers/json.rb,
lib/legion/data/extract/handlers/pptx.rb,
lib/legion/data/extract/handlers/text.rb,
lib/legion/data/extract/handlers/xlsx.rb,
lib/legion/data/extract/type_detector.rb,
lib/legion/data/extract/handlers/jsonl.rb,
lib/legion/data/extract/handlers/markdown.rb

Defined Under Namespace

Modules: Handlers, TypeDetector

Class Method Summary collapse

Methods included from Logging::Helper

handle_exception

Class Method Details

.can_extract?(type) ⇒ Boolean

Returns:

  • (Boolean)


44
45
46
47
48
# File 'lib/legion/data/extract.rb', line 44

def can_extract?(type)
  load_all_handlers
  handler = Handlers::Base.for_type(type&.to_sym)
  handler&.available? || false
end

.extract(source, type: :auto) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/legion/data/extract.rb', line 13

def extract(source, type: :auto)
  detected_type = type == :auto ? TypeDetector.detect(source) : type&.to_sym
  return { success: false, text: nil, error: :unknown_type } unless detected_type

  handler = Handlers::Base.for_type(detected_type)
  return { success: false, text: nil, error: :no_handler, type: detected_type } unless handler

  unless handler.available?
    return { success: false, text: nil, error: :gem_not_installed,
             gem: handler.gem_name, type: detected_type }
  end

  log.info "Extract starting type=#{detected_type} handler=#{handler.name}"
  result = handler.extract(source)
  if result[:text]
    log.info "Extract succeeded type=#{detected_type}"
    { success: true, text: result[:text], metadata: result[:metadata], type: detected_type }
  else
    log.warn "Extract failed type=#{detected_type} error=#{result[:error]}"
    { success: false, text: nil, error: result[:error], type: detected_type }
  end
rescue StandardError => e
  handle_exception(e, level: :error, handled: true, operation: :extract, type: detected_type)
  { success: false, text: nil, error: e.message, type: detected_type }
end

.register_handler(type, klass) ⇒ Object



50
51
52
53
# File 'lib/legion/data/extract.rb', line 50

def register_handler(type, klass)
  Handlers::Base.instance_variable_set(:@registry,
                                       Handlers::Base.registry.merge(type.to_sym => klass).freeze)
end

.supported_typesObject



39
40
41
42
# File 'lib/legion/data/extract.rb', line 39

def supported_types
  load_all_handlers
  Handlers::Base.supported_types
end