Class: IiifPrint::SplitPdfs::PdfImageExtractionService

Inherits:
Object
  • Object
show all
Defined in:
lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb

Overview

Uses poppler 0.19+ pdfimages command to extract image

listing metadata from PDF files.

For dpi extraction, falls back to calculating using MiniMagick,

if neccessary.

Constant Summary collapse

COL_WIDTH =

class constant column numbers

3
COL_HEIGHT =
4
COL_COLOR =
5
COL_CHANNELS =
6
COL_BITS =
7
COL_XPPI =

only poppler 0.25+ has this column in output:

12

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ PdfImageExtractionService

Returns a new instance of PdfImageExtractionService.



20
21
22
23
24
25
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 20

def initialize(path)
  @path = path
  @cmd = format('pdfimages -list %<path>s', path: path)
  @output = nil
  @entries = nil
end

Instance Method Details

#colorObject



62
63
64
65
66
67
68
69
70
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 62

def color
  # desc is either 'gray', 'cmyk', 'rgb', but 1-bit gray is black/white
  #   so caller may want all of this information, and in case of
  #   mixed color spaces across images, this returns maximum
  desc = entries.any? { |e| e[COL_COLOR] != 'gray' } ? 'rgb' : 'gray'
  channels = entries.map { |e| e[COL_CHANNELS].to_i }.max
  bits = entries.map { |e| e[COL_BITS].to_i }.max
  [desc, channels, bits]
end

#entriesObject



37
38
39
40
41
42
43
44
45
46
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 37

def entries
  if @entries.nil?
    @entries = []
    output = process
    (0..output.size - 1).each do |i|
      @entries.push(output[i].gsub(/\s+/m, ' ').strip.split(" "))
    end
  end
  @entries
end

#heightObject



58
59
60
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 58

def height
  selectcolumn(COL_HEIGHT, &:to_i).max
end

#ppiObject



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 72

def ppi
  if entries[0].size <= 12
    # poppler < 0.25
    pdf = MiniMagick::Image.open(@path)
    width_points = pdf.width
    width_px = width
    return (72 * width_px / width_points).to_i
  end
  # with poppler 0.25+, pdfimages just gives us this:
  selectcolumn(COL_XPPI, &:to_i).max
end

#processObject



27
28
29
30
31
32
33
34
35
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 27

def process
  # call just once
  if @output.nil?
    Open3.popen3(@cmd) do |_stdin, stdout, _stderr, _wait_thr|
      @output = stdout.read.split("\n")
    end
  end
  @output.slice(2, @output.size - 1)
end

#selectcolumn(i, &block) ⇒ Object



48
49
50
51
52
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 48

def selectcolumn(i, &block)
  result = entries.map { |e| e[i] }
  return result.map!(&block) if block_given?
  result
end

#widthObject



54
55
56
# File 'lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb', line 54

def width
  selectcolumn(COL_WIDTH, &:to_i).max
end