Class: IiifPrint::SplitPdfs::PagesIntoImagesService

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/iiif_print/split_pdfs/pages_into_images_service.rb

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ PagesIntoImagesService

Returns a new instance of PagesIntoImagesService.



11
12
13
14
15
16
17
18
19
20
21
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 11

def initialize(path)
  @baseid = SecureRandom.uuid
  @pdfpath = path
  @info = nil
  @entries = nil
  @tmpdir = nil
  @size = nil
  @pagecount = nil
  @pdftext = nil
  @compression = 'lzw'
end

Instance Method Details

#colordevice(channels, bpc) ⇒ Object



40
41
42
43
44
45
46
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 40

def colordevice(channels, bpc)
  bits = bpc * channels
  # will be either 8bpc/16bpd color TIFF,
  #   with any CMYK source transformed to 8bpc RBG
  bits = 24 unless [24, 48].include? bits
  "tiff#{bits}nc"
end

#eachObject



123
124
125
126
127
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 123

def each
  entries.each do |e|
    yield(e)
  end
end

#entriesObject

entries for each page



118
119
120
121
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 118

def entries
  @entries = gsconvert if @entries.nil?
  @entries
end

#gsconvertObject

ghostscript convert all pages to TIFF



102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 102

def gsconvert
  output_base = File.join(tmpdir, "#{@baseid}-page%d.tiff")
  cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} " \
        "-dTextAlphaBits=4 -sCompression=#{@compression} " \
        "-sOutputFile=#{output_base} -r#{ppi} -f #{@pdfpath}"
  Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
    output = stdout.read.split("\n")
    # rubocop:disable Performance/Count
    @size = output.select { |e| e.start_with?('Page ') }.length
    # rubocop:enable Performance/Count
  end
  # Return an array of expected filenames
  (1..@size).map { |n| File.join(tmpdir, "#{@baseid}-page#{n}.tiff") }
end

#gsdeviceObject



48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 48

def gsdevice
  color, channels, bpc = pdfinfo.color
  device = nil
  # CCITT Group 4 Black and White, if applicable:
  if color == 'gray' && bpc == 1
    device = 'tiffg4'
    @compression = 'g4'
  end
  # 8 Bit Grayscale, if applicable:
  device = 'tiffgray' if color == 'gray' && bpc > 1
  # otherwise color:
  device = colordevice(channels, bpc) if device.nil?
  device
end

#invalid_pdf?Boolean

TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.

Returns:

  • (Boolean)


30
31
32
33
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 30

def invalid_pdf?
  return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.entries.length.zero?
  false
end

#looks_scannedObject



85
86
87
88
89
90
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 85

def looks_scanned
  max_image_px = pdfinfo.width * pdfinfo.height
  single_image_per_page = pdfinfo.entries.length == pagecount
  # single 10mp+ image per page?
  single_image_per_page && max_image_px > 1024 * 1024 * 10
end

#pagecountObject

TODO: this method came from newspaper gem but appears to be unused. Is it needed anywhere? def gstext

cmd = "gs -q -dNOPAUSE -dBATCH -sDEVICE=txtwrite " \
      "-sOutputFile=- -f #{@pdfpath}"
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
  @pdftext = stdout.read
end
@pdftext

end



73
74
75
76
77
78
79
80
81
82
83
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 73

def pagecount
  cmd = "pdfinfo #{@pdfpath}"
  Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
    output = stdout.read.split("\n")
    # rubocop:disable Performance/Detect
    pages_e = output.select { |e| e.start_with?('Pages:') }[0]
    # rubocop:enable Performance/Detect
    @pagecount = pages_e.split[-1].to_i
  end
  @pagecount
end

#pdfinfoObject

return



24
25
26
27
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 24

def pdfinfo
  @info = IiifPrint::SplitPdfs::PdfImageExtractionService.new(@pdfpath) if @info.nil?
  @info
end

#ppiObject



92
93
94
95
96
97
98
99
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 92

def ppi
  unless looks_scanned
    # 400 dpi for something that does not look like scanned media:
    return 400
  end
  # For scanned media, defer to detected image PPI:
  pdfinfo.ppi
end

#tmpdirObject



35
36
37
38
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 35

def tmpdir
  @tmpdir = Dir.mktmpdir if @tmpdir.nil?
  @tmpdir
end