Module: Legion::Data::Extract::TypeDetector

Defined in:
lib/legion/data/extract/type_detector.rb

Constant Summary collapse

EXTENSION_MAP =
{
  '.pdf'      => :pdf,
  '.docx'     => :docx,
  '.pptx'     => :pptx,
  '.xlsx'     => :xlsx,
  '.xls'      => :xlsx,
  '.md'       => :markdown,
  '.markdown' => :markdown,
  '.txt'      => :text,
  '.csv'      => :csv,
  '.json'     => :json,
  '.jsonl'    => :jsonl,
  '.html'     => :html,
  '.htm'      => :html,
  '.vtt'      => :vtt
}.freeze

Class Method Summary collapse

Class Method Details

.detect(source) ⇒ Object



26
27
28
29
30
31
# File 'lib/legion/data/extract/type_detector.rb', line 26

def detect(source)
  return detect_from_path(source) if source.is_a?(String) && File.exist?(source)
  return detect_from_io(source) if source.respond_to?(:path)

  nil
end

.detect_from_io(io) ⇒ Object



38
39
40
41
42
# File 'lib/legion/data/extract/type_detector.rb', line 38

def detect_from_io(io)
  return nil unless io.respond_to?(:path) && io.path

  detect_from_path(io.path)
end

.detect_from_path(path) ⇒ Object



33
34
35
36
# File 'lib/legion/data/extract/type_detector.rb', line 33

def detect_from_path(path)
  ext = File.extname(path).downcase
  EXTENSION_MAP[ext]
end