Module: IiifPrint

Extended by:
ActiveSupport::Autoload
Defined in:
lib/iiif_print.rb,
lib/iiif_print/data.rb,
lib/iiif_print/engine.rb,
lib/iiif_print/errors.rb,
lib/iiif_print/version.rb,
lib/iiif_print/metadata.rb,
lib/iiif_print/image_tool.rb,
lib/iiif_print/configuration.rb,
lib/iiif_print/data/work_file.rb,
lib/iiif_print/data/work_files.rb,
lib/iiif_print/lineage_service.rb,
lib/iiif_print/text_extraction.rb,
lib/iiif_print/data/path_helper.rb,
lib/iiif_print/jp2_image_metadata.rb,
lib/iiif_print/data/fileset_helper.rb,
lib/iiif_print/jobs/application_job.rb,
lib/iiif_print/data/work_derivatives.rb,
lib/iiif_print/catalog_search_builder.rb,
lib/iiif_print/jp2_derivative_service.rb,
lib/iiif_print/pdf_derivative_service.rb,
lib/iiif_print/base_derivative_service.rb,
lib/iiif_print/homepage_search_builder.rb,
lib/iiif_print/tiff_derivative_service.rb,
lib/iiif_print/split_pdfs/base_splitter.rb,
lib/iiif_print/text_extraction/page_ocr.rb,
app/models/iiif_print/application_record.rb,
lib/iiif_print/works_controller_behavior.rb,
app/helpers/iiif_print/application_helper.rb,
app/mailers/iiif_print/application_mailer.rb,
lib/iiif_print/jobs/request_split_pdf_job.rb,
app/models/iiif_print/ingest_file_relation.rb,
app/models/iiif_print/pending_relationship.rb,
lib/generators/iiif_print/assets_generator.rb,
lib/iiif_print/text_extraction/alto_reader.rb,
lib/iiif_print/text_extraction/hocr_reader.rb,
lib/iiif_print/text_extraction/render_alto.rb,
app/models/iiif_print/derivative_attachment.rb,
app/models/iiif_print/iiif_search_decorator.rb,
lib/generators/iiif_print/install_generator.rb,
app/helpers/iiif_print/iiif_helper_decorator.rb,
lib/iiif_print/jobs/child_works_from_pdf_job.rb,
lib/iiif_print/jobs/create_relationships_job.rb,
app/models/concerns/iiif_print/set_child_flag.rb,
lib/iiif_print/text_formats_from_alto_service.rb,
app/indexers/concerns/iiif_print/child_indexer.rb,
app/controllers/iiif_print/split_pdfs_controller.rb,
app/services/iiif_print/derivative_rodeo_service.rb,
lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb,
lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb,
app/indexers/concerns/iiif_print/file_set_indexer.rb,
lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb,
lib/iiif_print/text_extraction_derivative_service.rb,
lib/iiif_print/text_extraction/word_coords_builder.rb,
lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb,
app/models/iiif_print/iiif_search_response_decorator.rb,
app/actors/iiif_print/actors/file_set_actor_decorator.rb,
app/search_builders/concerns/iiif_print/exclude_models.rb,
lib/generators/iiif_print/catalog_controller_generator.rb,
lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb,
app/presenters/iiif_print/work_show_presenter_decorator.rb,
app/services/iiif_print/manifest_builder_service_behavior.rb,
lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb,
app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb,
lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb,
app/search_builders/concerns/iiif_print/allinson_flex_fields.rb,
app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb,
lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb,
app/search_builders/concerns/iiif_print/highlight_search_params.rb,
app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb

Overview

override Hyrax to remove splitting upon work delete

Defined Under Namespace

Modules: Actors, AllinsonFlexFields, ApplicationHelper, BlacklightIiifSearch, ChildIndexer, Data, ExcludeModels, FileSetIndexer, HighlightSearchParams, IiifHelperDecorator, IiifManifestPresenterBehavior, IiifManifestPresenterFactoryBehavior, IiifPrintHelperBehavior, IiifSearchDecorator, IiifSearchResponseDecorator, Jobs, LineageService, ManifestBuilderServiceBehavior, SetChildFlag, SplitPdfs, TextExtraction, WorkShowPresenterDecorator, WorksControllerBehaviorDecorator Classes: ApplicationMailer, ApplicationRecord, AssetsGenerator, BaseDerivativeService, CatalogControllerGenerator, CatalogSearchBuilder, CollectionFieldShim, Configuration, DataError, DerivativeAttachment, DerivativeRodeoService, Engine, Field, HomepageSearchBuilder, IiifPrintError, ImageTool, IngestFileRelation, InstallGenerator, JP2DerivativeService, JP2ImageMetadata, Metadata, MissingFileError, PDFDerivativeService, PendingRelationship, PluggableDerivativeService, SplitPdfsController, TIFFDerivativeService, TextExtractionDerivativeService, TextFormatsFromALTOService, UnexpectedMimeTypeError, WorkNotConfiguredToSplitFileSetError

Constant Summary collapse

DEFAULT_MODEL_CONFIGURATION =
{
  # Split a PDF into individual page images and create a new child work for each image.
  pdf_splitter_job: IiifPrint::Jobs::ChildWorksFromPdfJob,
  pdf_splitter_service: IiifPrint::SplitPdfs::PagesToJpgsSplitter,
  derivative_service_plugins: [
    IiifPrint::TextExtractionDerivativeService
  ]
}.freeze
GEM_PATH =

module constants:

Gem::Specification.find_by_name("iiif_print").gem_dir
VERSION =
'1.1.0'.freeze

Class Method Summary collapse

Class Method Details

.allinson_flex_fieldsArray<IiifPrint::Field>

Returns:



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/iiif_print.rb', line 224

def self.allinson_flex_fields
  return @allinson_flex_fields if defined?(@allinson_flex_fields)

  allinson_flex_relation = AllinsonFlex::ProfileProperty
                           .joins(:texts)
                           .where(allinson_flex_profile_texts: { name: 'display_label' })
                           .distinct
                           .select(:name, :value, :indexing)
  flex_fields = allinson_flex_relation.to_a
  unless allinson_flex_relation.exists?(name: 'collection')
    collection_field = CollectionFieldShim.new(name: :collection, value: 'Collection', indexing: [])
    flex_fields << collection_field
  end
  @allinson_flex_fields = flex_fields
end

.conditionally_submit_split_for(work:, file_set:, locations:, user:, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts) ⇒ Symbol

Returns when none of the locations are to be split.

Parameters:

  • work (ActiveFedora::Base)
  • file_set (FileSet)
  • locations (Array<String>)
  • user (User)

Returns:

  • (Symbol)

    when none of the locations are to be split.



261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/iiif_print.rb', line 261

def self.conditionally_submit_split_for(work:, file_set:, locations:, user:, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts)
  locations = locations.select { |location| split_for_path_suffix?(location, skip_these_endings: skip_these_endings) }
  return :no_pdfs_for_splitting if locations.empty?

  work.try(:iiif_print_config)&.pdf_splitter_job&.perform_later(
    file_set,
    locations,
    user,
    work.admin_set_id,
    0 # A no longer used parameter; but we need to preserve the method signature (for now)
  )
end

.config {|config| ... } ⇒ IiifPrint::Configuration

Exposes the IiifPrint configuration.

Yield Parameters:

Returns:

See Also:



40
41
42
43
44
# File 'lib/iiif_print.rb', line 40

def self.config(&block)
  @config ||= IiifPrint::Configuration.new
  yield @config if block
  @config
end

.default_fields(fields: config.metadata_fields) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

TODO:

Figure out a way to use a custom label, right now it takes it get rendered from the title.



185
186
187
188
189
190
191
192
193
# File 'lib/iiif_print.rb', line 185

def self.default_fields(fields: config.)
  fields.map do |field|
    Field.new(
      name: field.first,
      label: Hyrax::Renderers::AttributeRenderer.new(field.first, nil).label,
      options: field.last
    )
  end
end

.fields_for_allinson_flex(fields: allinson_flex_fields, sort_order: IiifPrint.config.iiif_metadata_field_presentation_order) ⇒ Object

Parameters:



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/iiif_print.rb', line 197

def self.fields_for_allinson_flex(fields: allinson_flex_fields, sort_order: IiifPrint.config.)
  fields = sort_af_fields!(fields, sort_order: sort_order)
  fields.each_with_object({}) do |field, hash|
    # filters out admin_only fields
    next if field.indexing&.include?('admin_only')

    # WARNING: This is assuming A LOT
    # This is taking the Allinson Flex fields that have the same name and only
    # using the first one while discarding the rest.  There currently no way to
    # controller which one(s) are discarded but this fits for the moment.
    next if hash.key?(field.name)

    # currently only supports the faceted option
    # Why the `render_as:`? This was originally derived from Hyku default attributes
    # @see https://github.com/samvera/hyku/blob/c702844de4c003eaa88eb5a7514c7a1eae1b289e/app/views/hyrax/base/_attribute_rows.html.erb#L3
    hash[field.name] = Field.new(
      name: field.name,
      label: field.value,
      options: field.indexing&.include?('facetable') ? { render_as: :faceted } : nil
    )
  end.values
end

.grandparent_for(file_set) ⇒ #work?, ...

Return the parent’s parent of the given :file_set.

Parameters:

  • file_set (FileSet)

Returns:

  • (#work?, Hydra::PCDM::Work)
  • (NilClass)

    when no grand parent is found.



68
69
70
71
72
73
74
75
76
77
78
# File 'lib/iiif_print.rb', line 68

def self.grandparent_for(file_set)
  parent_of_file_set = parent_for(file_set)
  # HACK: This is an assumption about the file_set structure, namely that an image page split from
  # a PDF is part of a file set that is a child of a work that is a child of a single work.  That
  # is, it only has one grand parent.  Which is a reasonable assumption for IIIF Print but is not
  # valid when extended beyond IIIF Print.  That is GenericWork does not have a parent method but
  # does have a parents method.
  parent_of_file_set.try(:parent_works).try(:first) ||
    parent_of_file_set.try(:parents).try(:first) ||
    parent_of_file_set&.member_of&.find(&:work?)
end

.manifest_metadata_for(work:, version: config.default_iiif_manifest_version, fields: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields, current_ability:, base_url:) ⇒ Array<Hash>

Map the given work’s metadata to the given IIIF version spec’s metadata structure. This is intended to be a drop-in replacement for ‘Hyrax::IiifManifestPresenter#manifest_metadata`.

Parameters:

  • work (Object)
  • version (Integer) (defaults to: config.default_iiif_manifest_version)
  • fields (Array<IiifPrint::Metadata::Field>, Array<#name, #label>) (defaults to: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields)

Returns:

  • (Array<Hash>)

See Also:

  • for expected output
  • Hyrax::IiifManifestPresenter#manifest_metadata


161
162
163
164
165
166
167
168
169
170
171
# File 'lib/iiif_print.rb', line 161

def self.(work:,
                               version: config.default_iiif_manifest_version,
                               fields: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields,
                               current_ability:,
                               base_url:)
  Metadata.(work: work,
                              version: version,
                              fields: fields,
                              current_ability: current_ability,
                              base_url: base_url)
end

.manifest_metadata_from(work:, presenter:) ⇒ Object



173
174
175
176
177
# File 'lib/iiif_print.rb', line 173

def self.(work:, presenter:)
  current_ability = presenter.try(:ability) || presenter.try(:current_ability)
  base_url = presenter.try(:base_url) || presenter.try(:request)&.base_url
  IiifPrint.(work: work, current_ability: current_ability, base_url: base_url)
end

.model_configuration(**kwargs) ⇒ Module

TODO:

Because not every job will split PDFs and write to a child model. May want to introduce an alternative splitting method to create new filesets on the existing work instead of new child works.

This method is responsible for configuring a model for additional derivative generation.

Examples:

class Book < ActiveFedora::Base
  include IiifPrint.model_configuration(
    pdf_split_child_model: Page,
    derivative_service_plugins: [
      IiifPrint::JP2DerivativeService,
      IiifPrint::PDFDerivativeService,
      IiifPrint::TextExtractionDerivativeService,
      IiifPrint::TIFFDerivativeService
    ]
  )
end

Parameters:

  • kwargs (Hash<Symbol,Object>)

    the configuration values that overrides the DEFAULT_MODEL_CONFIGURATION.

Options Hash (**kwargs):

Returns:

  • (Module)

See Also:



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/iiif_print.rb', line 130

def self.model_configuration(**kwargs)
  Module.new do
    def iiif_print_config?
      true
    end

    # We don't know what you may want in your configuration, but from this gems implementation,
    # we're going to provide the defaults to ensure that it works.
    DEFAULT_MODEL_CONFIGURATION.each_pair do |key, default_value|
      kwargs[key] ||= default_value
    end

    define_method(:iiif_print_config) do
      @iiif_print_config ||= ModelConfig.new(**kwargs)
    end
  end
end

.parent_for(file_set) ⇒ #work?, ...

Return the immediate parent of the given :file_set.

Parameters:

  • file_set (FileSet)

Returns:

  • (#work?, Hydra::PCDM::Work)
  • (NilClass)

    when no parent is found.



56
57
58
59
60
# File 'lib/iiif_print.rb', line 56

def self.parent_for(file_set)
  # fallback to Fedora-stored relationships if work's aggregation of
  #   file set is not indexed in Solr
  file_set.parent || file_set.member_of.find(&:work?)
end

.sort_af_fields!(fields, sort_order:) ⇒ Object

Parameters:



243
244
245
246
247
248
249
250
# File 'lib/iiif_print.rb', line 243

def self.sort_af_fields!(fields, sort_order:)
  return fields if sort_order.blank?

  fields.sort_by do |field|
    sort_order_index = sort_order.index(field.name.to_sym)
    sort_order_index.nil? ? sort_order.length : sort_order_index
  end
end

.split_for_path_suffix?(path, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts) ⇒ TrueClass, FalseClass

Parameters:

  • path (String)

    the path, hopefully with an extension, to the file we’re considering splitting.

  • skip_these_endings (Array<#downcase>) (defaults to: skip_splitting_pdf_files_that_end_with_these_texts)

    the endings that we should skip for splitting purposes.

Returns:

  • (TrueClass)

    when the path is one we should split

  • (FalseClass)

    when the path is one we should not split

See Also:

  • skip_splitting_pdf_files_that_end_with_these_texts


285
286
287
288
289
# File 'lib/iiif_print.rb', line 285

def self.split_for_path_suffix?(path, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts)
  return false unless path.downcase.end_with?('.pdf')
  return true if skip_these_endings.empty?
  !path.downcase.end_with?(*skip_these_endings.map(&:downcase))
end