Class: Kreuzberg::Config::PDF

Inherits:
Object
  • Object
show all
Defined in:
lib/kreuzberg/config.rb

Overview

PDF-specific options

Examples:

pdf = PDF.new(extract_images: true, passwords: ["secret", "backup"])

With font configuration

font_config = FontConfig.new(enabled: true, custom_font_dirs: ["/usr/share/fonts"])
pdf = PDF.new(extract_images: true, font_config: font_config)

With hierarchy configuration

hierarchy = Hierarchy.new(enabled: true, k_clusters: 6)
pdf = PDF.new(extract_images: true, hierarchy: hierarchy)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(extract_images: false, passwords: nil, extract_metadata: true, font_config: nil, hierarchy: nil, extract_annotations: false, top_margin_fraction: nil, bottom_margin_fraction: nil, allow_single_column_tables: false) ⇒ PDF

Returns a new instance of PDF.



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'lib/kreuzberg/config.rb', line 398

def initialize(
  extract_images: false,
  passwords: nil,
  extract_metadata: true,
  font_config: nil,
  hierarchy: nil,
  extract_annotations: false,
  top_margin_fraction: nil,
  bottom_margin_fraction: nil,
  allow_single_column_tables: false
)
  @extract_images = extract_images ? true : false
  @passwords = if passwords.is_a?(Array)
                 passwords.map(&:to_s)
               else
                 (passwords ? [passwords.to_s] : nil)
               end
  @extract_metadata =  ? true : false
  @font_config = normalize_font_config(font_config)
  @hierarchy = normalize_hierarchy(hierarchy)
  @extract_annotations = extract_annotations ? true : false
  @top_margin_fraction = top_margin_fraction&.to_f
  @bottom_margin_fraction = bottom_margin_fraction&.to_f
  @allow_single_column_tables = allow_single_column_tables ? true : false
end

Instance Attribute Details

#allow_single_column_tablesObject (readonly)

Returns the value of attribute allow_single_column_tables.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def allow_single_column_tables
  @allow_single_column_tables
end

#bottom_margin_fractionObject (readonly)

Returns the value of attribute bottom_margin_fraction.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def bottom_margin_fraction
  @bottom_margin_fraction
end

#extract_annotationsObject (readonly)

Returns the value of attribute extract_annotations.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def extract_annotations
  @extract_annotations
end

#extract_imagesObject (readonly)

Returns the value of attribute extract_images.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def extract_images
  @extract_images
end

#extract_metadataObject (readonly)

Returns the value of attribute extract_metadata.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def 
  @extract_metadata
end

#font_configObject

Returns the value of attribute font_config.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def font_config
  @font_config
end

#hierarchyObject

Returns the value of attribute hierarchy.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def hierarchy
  @hierarchy
end

#passwordsObject (readonly)

Returns the value of attribute passwords.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def passwords
  @passwords
end

#top_margin_fractionObject (readonly)

Returns the value of attribute top_margin_fraction.



394
395
396
# File 'lib/kreuzberg/config.rb', line 394

def top_margin_fraction
  @top_margin_fraction
end

Instance Method Details

#to_hObject



424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/kreuzberg/config.rb', line 424

def to_h
  {
    extract_images: @extract_images,
    passwords: @passwords,
    extract_metadata: @extract_metadata,
    font_config: @font_config&.to_h,
    hierarchy: @hierarchy&.to_h,
    extract_annotations: @extract_annotations,
    top_margin_fraction: @top_margin_fraction,
    bottom_margin_fraction: @bottom_margin_fraction,
    allow_single_column_tables: @allow_single_column_tables
  }.compact
end