Module: Legion::Data::Extract
- Extended by:
- Logging::Helper
- Defined in:
- lib/legion/data/extract.rb,
lib/legion/data/extract/handlers/csv.rb,
lib/legion/data/extract/handlers/pdf.rb,
lib/legion/data/extract/handlers/vtt.rb,
lib/legion/data/extract/handlers/base.rb,
lib/legion/data/extract/handlers/docx.rb,
lib/legion/data/extract/handlers/html.rb,
lib/legion/data/extract/handlers/json.rb,
lib/legion/data/extract/handlers/pptx.rb,
lib/legion/data/extract/handlers/text.rb,
lib/legion/data/extract/handlers/xlsx.rb,
lib/legion/data/extract/type_detector.rb,
lib/legion/data/extract/handlers/jsonl.rb,
lib/legion/data/extract/handlers/markdown.rb
Defined Under Namespace
Modules: Handlers, TypeDetector
Class Method Summary collapse
- .can_extract?(type) ⇒ Boolean
- .extract(source, type: :auto) ⇒ Object
- .register_handler(type, klass) ⇒ Object
- .supported_types ⇒ Object
Methods included from Logging::Helper
Class Method Details
.can_extract?(type) ⇒ Boolean
44 45 46 47 48 |
# File 'lib/legion/data/extract.rb', line 44 def can_extract?(type) load_all_handlers handler = Handlers::Base.for_type(type&.to_sym) handler&.available? || false end |
.extract(source, type: :auto) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/legion/data/extract.rb', line 13 def extract(source, type: :auto) detected_type = type == :auto ? TypeDetector.detect(source) : type&.to_sym return { success: false, text: nil, error: :unknown_type } unless detected_type handler = Handlers::Base.for_type(detected_type) return { success: false, text: nil, error: :no_handler, type: detected_type } unless handler unless handler.available? return { success: false, text: nil, error: :gem_not_installed, gem: handler.gem_name, type: detected_type } end log.info "Extract starting type=#{detected_type} handler=#{handler.name}" result = handler.extract(source) if result[:text] log.info "Extract succeeded type=#{detected_type}" { success: true, text: result[:text], metadata: result[:metadata], type: detected_type } else log.warn "Extract failed type=#{detected_type} error=#{result[:error]}" { success: false, text: nil, error: result[:error], type: detected_type } end rescue StandardError => e handle_exception(e, level: :error, handled: true, operation: :extract, type: detected_type) { success: false, text: nil, error: e., type: detected_type } end |
.register_handler(type, klass) ⇒ Object
50 51 52 53 |
# File 'lib/legion/data/extract.rb', line 50 def register_handler(type, klass) Handlers::Base.instance_variable_set(:@registry, Handlers::Base.registry.merge(type.to_sym => klass).freeze) end |
.supported_types ⇒ Object
39 40 41 42 |
# File 'lib/legion/data/extract.rb', line 39 def supported_types load_all_handlers Handlers::Base.supported_types end |