Class: Bulkrax::Importer
Overview
rubocop:disable Metrics/ClassLength
Constant Summary
collapse
- DEFAULT_OBJECT_TYPES =
%w[collection work file_set relationship].freeze
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status_at, #succeeded?
#file?, #increment_counters, #key_without_numbers, #keys_without_numbers, #last_imported_at, #next_import_at, #parser, #parser_class, #zip?, #zip_file?
Instance Attribute Details
#current_run(skip_counts: false) ⇒ Object
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
# File 'app/models/bulkrax/importer.rb', line 112
def current_run(skip_counts: false)
return @current_run if @current_run.present?
@current_run = self.importer_runs.create!
return @current_run if file? && zip?
return @current_run if skip_counts
entry_counts = {
total_work_entries: self.limit || parser.works_total,
total_collection_entries: parser.collections_total,
total_file_set_entries: parser.file_sets_total
}
@current_run.update!(entry_counts)
@current_run
end
|
#file ⇒ Object
Returns the value of attribute file.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def file
@file
end
|
#file_style ⇒ Object
Returns the value of attribute file_style.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def file_style
@file_style
end
|
#only_updates ⇒ Object
Returns the value of attribute only_updates.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def only_updates
@only_updates
end
|
Class Method Details
.frequency_enums ⇒ Object
92
93
94
95
96
97
|
# File 'app/models/bulkrax/importer.rb', line 92
def self.frequency_enums
[['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
end
|
.safe_uri_filename(uri) ⇒ Object
32
33
34
35
36
37
38
39
|
# File 'app/models/bulkrax/importer.rb', line 32
def self.safe_uri_filename(uri)
r = Faraday.head(uri.to_s)
return CGI.parse(r.['content-disposition'])["filename"][0].delete("\"")
rescue
filename = File.basename(uri.to_s)
filename.delete!('/')
filename.presence || SecureRandom.uuid
end
|
Instance Method Details
#completed_statuses ⇒ Object
150
151
152
153
154
|
# File 'app/models/bulkrax/importer.rb', line 150
def completed_statuses
@completed_statuses ||= Bulkrax::Status.latest_by_statusable
.includes(:statusable)
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
end
|
#default_field_mapping ⇒ Object
78
79
80
81
82
83
84
85
86
|
# File 'app/models/bulkrax/importer.rb', line 78
def default_field_mapping
return self.field_mapping if parser.import_fields.nil?
ActiveSupport::HashWithIndifferentAccess.new(
parser.import_fields.reject(&:nil?).map do |m|
Bulkrax.default_field_mapping.call(m)
end.inject(:merge)
)
end
|
#errored_entries_csv_path ⇒ Object
287
288
289
|
# File 'app/models/bulkrax/importer.rb', line 287
def errored_entries_csv_path
@errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
end
|
#existing_entries? ⇒ Boolean
212
213
214
|
# File 'app/models/bulkrax/importer.rb', line 212
def existing_entries?
parser.parser_fields['file_style']&.match(/Existing Entries/)
end
|
#failed_entries? ⇒ Boolean
133
134
135
|
# File 'app/models/bulkrax/importer.rb', line 133
def failed_entries?
entries.failed.any?
end
|
#failed_messages ⇒ Object
143
144
145
146
147
148
|
# File 'app/models/bulkrax/importer.rb', line 143
def failed_messages
failed_statuses.each_with_object({}) do |e, i|
i[e.error_message] ||= []
i[e.error_message] << e.id
end
end
|
#failed_statuses ⇒ Object
137
138
139
140
141
|
# File 'app/models/bulkrax/importer.rb', line 137
def failed_statuses
@failed_statuses ||= Bulkrax::Status.latest_by_statusable
.includes(:statusable)
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
end
|
#frequency ⇒ Object
103
104
105
106
|
# File 'app/models/bulkrax/importer.rb', line 103
def frequency
f = self[:frequency] || "PT0S"
ISO8601::Duration.new(f)
end
|
#frequency=(frequency) ⇒ Object
99
100
101
|
# File 'app/models/bulkrax/importer.rb', line 99
def frequency=(frequency)
self[:frequency] = ISO8601::Duration.new(frequency).to_s
end
|
#import_collections ⇒ Object
220
221
222
|
# File 'app/models/bulkrax/importer.rb', line 220
def import_collections
import_objects(['collection'])
end
|
#import_file_path ⇒ Object
160
161
162
|
# File 'app/models/bulkrax/importer.rb', line 160
def import_file_path
self.parser_fields['import_file_path']
end
|
#import_file_sets ⇒ Object
224
225
226
|
# File 'app/models/bulkrax/importer.rb', line 224
def import_file_sets
import_objects(['file_set'])
end
|
The format for metadata for the incoming import; corresponds to an Entry class
259
260
261
|
# File 'app/models/bulkrax/importer.rb', line 259
def import_metadata_format
[['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
end
|
#import_objects(types_array = nil) ⇒ Object
234
235
236
237
238
239
240
241
242
|
# File 'app/models/bulkrax/importer.rb', line 234
def import_objects(types_array = nil)
self.only_updates ||= false
self.save if self.new_record? types = types_array || DEFAULT_OBJECT_TYPES
existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
mark_unseen_as_skipped
rescue StandardError => e
set_status_info(e)
end
|
#import_relationships ⇒ Object
228
229
230
|
# File 'app/models/bulkrax/importer.rb', line 228
def import_relationships
import_objects(['relationship'])
end
|
#import_works ⇒ Object
216
217
218
|
# File 'app/models/bulkrax/importer.rb', line 216
def import_works
import_objects(['work'])
end
|
#importer_unzip_path(mkdir: false) ⇒ Object
The type of metadata for the incoming import, either one file for all works, or one file per work def import_metadata_type
[['Single Metadata File for all works', 'single'], ['Multiple Files, one per Work', 'multi']]
end
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
|
# File 'app/models/bulkrax/importer.rb', line 268
def importer_unzip_path(mkdir: false)
entry = parser_fields&.[]('import_file_path')
if entry.is_a?(String) && entry.end_with?('.zip') && File.file?(entry) && parser_fields["file_style"] != I18n.t('bulkrax.importer.xml.file_style.server_path')
unzip_dir = File.dirname(entry)
FileUtils.mkdir_p(unzip_dir) if mkdir
return unzip_dir
end
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
@importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
end
|
#last_run ⇒ Object
129
130
131
|
# File 'app/models/bulkrax/importer.rb', line 129
def last_run
@last_run ||= self.importer_runs.last
end
|
#mapping ⇒ Object
If field_mapping is empty, setup a default based on the export_properties
65
66
67
68
69
70
71
72
73
74
75
76
|
# File 'app/models/bulkrax/importer.rb', line 65
def mapping
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
if parser.import_fields.present? || self.field_mapping == [{}]
default_field_mapping
end
else
default_field_mapping.merge(self.field_mapping)
end
end
|
#mark_unseen_as_skipped ⇒ Object
After an import any entries we did not touch are skipped. They are not really pending, complete for the last run, or failed
246
247
248
249
250
|
# File 'app/models/bulkrax/importer.rb', line 246
def mark_unseen_as_skipped
entries.where.not(identifier: seen.keys).find_each do |entry|
entry.set_status_info('Skipped')
end
end
|
208
209
210
|
# File 'app/models/bulkrax/importer.rb', line 208
def metadata_only?
parser.parser_fields['metadata_only'] == true
end
|
#original_file ⇒ Object
168
169
170
|
# File 'app/models/bulkrax/importer.rb', line 168
def original_file
import_file_path if original_file?
end
|
#original_file? ⇒ Boolean
164
165
166
|
# File 'app/models/bulkrax/importer.rb', line 164
def original_file?
import_file_path && File.exist?(import_file_path)
end
|
#original_files ⇒ Array<Hash>
Returns all available original files (CSV and ZIP if present)
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
# File 'app/models/bulkrax/importer.rb', line 174
def original_files
files = []
if import_file_path && File.exist?(import_file_path)
files << {
path: import_file_path,
name: File.basename(import_file_path),
type: :csv
}
end
if parser_fields['attachments_zip_path'] && File.exist?(parser_fields['attachments_zip_path'])
files << {
path: parser_fields['attachments_zip_path'],
name: File.basename(parser_fields['attachments_zip_path']),
type: :zip
}
end
files
end
|
#parser_fields ⇒ Object
88
89
90
|
# File 'app/models/bulkrax/importer.rb', line 88
def parser_fields
self[:parser_fields] || {}
end
|
#path_string ⇒ Object
291
292
293
294
295
|
# File 'app/models/bulkrax/importer.rb', line 291
def path_string
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
rescue
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
end
|
#record_status ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'app/models/bulkrax/importer.rb', line 49
def record_status
importer_run = ImporterRun.find(current_run.id) return if importer_run.enqueued_records.positive? if importer_run.failed_records.positive?
if importer_run.invalid_records.present?
e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
importer_run.importer.set_status_info(e)
else
importer_run.importer.set_status_info('Complete (with failures)')
end
else
importer_run.importer.set_status_info('Complete')
end
end
|
#remove_and_rerun ⇒ Object
204
205
206
|
# File 'app/models/bulkrax/importer.rb', line 204
def remove_and_rerun
self.parser_fields['remove_and_rerun']
end
|
#replace_files ⇒ Object
196
197
198
|
# File 'app/models/bulkrax/importer.rb', line 196
def replace_files
self.parser_fields['replace_files']
end
|
#schedulable? ⇒ Boolean
108
109
110
|
# File 'app/models/bulkrax/importer.rb', line 108
def schedulable?
frequency.to_seconds != 0
end
|
#seen ⇒ Object
156
157
158
|
# File 'app/models/bulkrax/importer.rb', line 156
def seen
@seen ||= {}
end
|
#status ⇒ Object
41
42
43
44
45
46
47
|
# File 'app/models/bulkrax/importer.rb', line 41
def status
if self.validate_only
'Validated'
else
super
end
end
|
#unique_collection_identifier(id) ⇒ Object
Prepend the base_url to ensure unique set identifiers
254
255
256
|
# File 'app/models/bulkrax/importer.rb', line 254
def unique_collection_identifier(id)
"#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
end
|
#update_files ⇒ Object
200
201
202
|
# File 'app/models/bulkrax/importer.rb', line 200
def update_files
self.parser_fields['update_files']
end
|