Class: IiifPrint::SplitPdfs::PagesIntoImagesService
- Inherits:
-
Object
- Object
- IiifPrint::SplitPdfs::PagesIntoImagesService
- Includes:
- Enumerable
- Defined in:
- lib/iiif_print/split_pdfs/pages_into_images_service.rb
Instance Method Summary collapse
- #colordevice(channels, bpc) ⇒ Object
- #each ⇒ Object
-
#entries ⇒ Object
entries for each page.
-
#gsconvert ⇒ Object
ghostscript convert all pages to TIFF.
- #gsdevice ⇒ Object
-
#initialize(path) ⇒ PagesIntoImagesService
constructor
A new instance of PagesIntoImagesService.
-
#invalid_pdf? ⇒ Boolean
TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
- #looks_scanned ⇒ Object
-
#pagecount ⇒ Object
TODO: this method came from newspaper gem but appears to be unused.
-
#pdfinfo ⇒ Object
return.
- #ppi ⇒ Object
- #tmpdir ⇒ Object
Constructor Details
#initialize(path) ⇒ PagesIntoImagesService
Returns a new instance of PagesIntoImagesService.
11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 11 def initialize(path) @baseid = SecureRandom.uuid @pdfpath = path @info = nil @entries = nil @tmpdir = nil @size = nil @pagecount = nil @pdftext = nil @compression = 'lzw' end |
Instance Method Details
#colordevice(channels, bpc) ⇒ Object
40 41 42 43 44 45 46 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 40 def colordevice(channels, bpc) bits = bpc * channels # will be either 8bpc/16bpd color TIFF, # with any CMYK source transformed to 8bpc RBG bits = 24 unless [24, 48].include? bits "tiff#{bits}nc" end |
#each ⇒ Object
123 124 125 126 127 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 123 def each entries.each do |e| yield(e) end end |
#entries ⇒ Object
entries for each page
118 119 120 121 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 118 def entries @entries = gsconvert if @entries.nil? @entries end |
#gsconvert ⇒ Object
ghostscript convert all pages to TIFF
102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 102 def gsconvert output_base = File.join(tmpdir, "#{@baseid}-page%d.tiff") cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} " \ "-dTextAlphaBits=4 -sCompression=#{@compression} " \ "-sOutputFile=#{output_base} -r#{ppi} -f #{@pdfpath}" Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr| output = stdout.read.split("\n") # rubocop:disable Performance/Count @size = output.select { |e| e.start_with?('Page ') }.length # rubocop:enable Performance/Count end # Return an array of expected filenames (1..@size).map { |n| File.join(tmpdir, "#{@baseid}-page#{n}.tiff") } end |
#gsdevice ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 48 def gsdevice color, channels, bpc = pdfinfo.color device = nil # CCITT Group 4 Black and White, if applicable: if color == 'gray' && bpc == 1 device = 'tiffg4' @compression = 'g4' end # 8 Bit Grayscale, if applicable: device = 'tiffgray' if color == 'gray' && bpc > 1 # otherwise color: device = colordevice(channels, bpc) if device.nil? device end |
#invalid_pdf? ⇒ Boolean
TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
30 31 32 33 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 30 def invalid_pdf? return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.entries.length.zero? false end |
#looks_scanned ⇒ Object
85 86 87 88 89 90 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 85 def looks_scanned max_image_px = pdfinfo.width * pdfinfo.height single_image_per_page = pdfinfo.entries.length == pagecount # single 10mp+ image per page? single_image_per_page && max_image_px > 1024 * 1024 * 10 end |
#pagecount ⇒ Object
TODO: this method came from newspaper gem but appears to be unused. Is it needed anywhere? def gstext
cmd = "gs -q -dNOPAUSE -dBATCH -sDEVICE=txtwrite " \
"-sOutputFile=- -f #{@pdfpath}"
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
@pdftext = stdout.read
end
@pdftext
end
73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 73 def pagecount cmd = "pdfinfo #{@pdfpath}" Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr| output = stdout.read.split("\n") # rubocop:disable Performance/Detect pages_e = output.select { |e| e.start_with?('Pages:') }[0] # rubocop:enable Performance/Detect @pagecount = pages_e.split[-1].to_i end @pagecount end |
#pdfinfo ⇒ Object
return
24 25 26 27 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 24 def pdfinfo @info = IiifPrint::SplitPdfs::PdfImageExtractionService.new(@pdfpath) if @info.nil? @info end |
#ppi ⇒ Object
92 93 94 95 96 97 98 99 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 92 def ppi unless looks_scanned # 400 dpi for something that does not look like scanned media: return 400 end # For scanned media, defer to detected image PPI: pdfinfo.ppi end |
#tmpdir ⇒ Object
35 36 37 38 |
# File 'lib/iiif_print/split_pdfs/pages_into_images_service.rb', line 35 def tmpdir @tmpdir = Dir.mktmpdir if @tmpdir.nil? @tmpdir end |