Class: Bulkrax::CsvEntry
- Inherits:
-
Entry
show all
- Defined in:
- app/models/bulkrax/csv_entry.rb
Overview
TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.
Defined Under Namespace
Modules: AttributeBuilderMethod
Classes: CsvPathError, CsvWrapper, MissingMetadata, RecordNotFound
Instance Attribute Summary
Attributes inherited from Entry
#all_attrs
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Entry
#build, child_field, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #work_identifier
#add_local
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status, #status_at, #succeeded?
#build_for_exporter, #file_extension, #filename, #hyrax_record
#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value
#add_metadata, #excluded?, #field_supported?, #field_to, #fields_that_are_always_multiple, #fields_that_are_always_singular, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #schema_form_definitions, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields
Class Method Details
.data_for_entry(data, _source_id, parser) ⇒ Object
85
86
87
88
89
90
91
92
93
94
95
96
|
# File 'app/models/bulkrax/csv_entry.rb', line 85
def self.data_for_entry(data, _source_id, parser)
data = data.first if data.is_a?(CSV::Table)
raw_data = data.to_h
raw_data[:model] = data[:model] if data[:model].present?
raw_data[:parents] = raw_data[parser.related_parents_raw_mapping.to_sym] if parser.related_parents_raw_mapping.present? && raw_data.key?(parser.related_parents_raw_mapping.to_sym) && parser.related_parents_raw_mapping != 'parents'
raw_data[:children] = raw_data[parser.related_children_raw_mapping.to_sym] if parser.related_children_raw_mapping.present? && raw_data.key?(parser.related_children_raw_mapping.to_sym) && parser.related_children_raw_mapping != 'children'
return raw_data
end
|
.fields_from_data(data) ⇒ Object
32
33
34
|
# File 'app/models/bulkrax/csv_entry.rb', line 32
def self.fields_from_data(data)
data..flatten.compact.uniq
end
|
.matcher_class ⇒ Object
384
385
386
|
# File 'app/models/bulkrax/csv_entry.rb', line 384
def self.matcher_class
Bulkrax::CsvMatcher
end
|
.read_data(path) ⇒ Object
there’s a risk that this reads the whole file into memory and could cause a memory leak we strip any special characters out of the headers. looking at you Excel
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
# File 'app/models/bulkrax/csv_entry.rb', line 40
def self.read_data(path)
raise CsvPathError, 'CSV path empty' if path.blank?
options = {
headers: true,
header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
encoding: 'utf-8'
}.merge(csv_read_data_options)
results = if path.respond_to?(:read)
path.rewind if path.respond_to?(:rewind)
CSV.parse(path.read, **options)
else
CSV.read(path, **options)
end
csv_wrapper_class.new(results)
end
|
Instance Method Details
#add_file ⇒ Object
165
166
167
168
169
170
171
172
173
174
175
176
177
|
# File 'app/models/bulkrax/csv_entry.rb', line 165
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
elsif record['file'].is_a?(Array)
self.parsed_metadata['file'] = record['file']
end
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
next if f.blank?
path_to_file(f.tr(' ', '_'))
end.compact
end
|
#add_identifier ⇒ Object
132
133
134
|
# File 'app/models/bulkrax/csv_entry.rb', line 132
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
|
156
157
158
159
160
161
162
163
|
# File 'app/models/bulkrax/csv_entry.rb', line 156
def add_ingested_metadata
record.each do |key, value|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
end
|
142
143
144
145
146
147
148
149
150
151
152
153
154
|
# File 'app/models/bulkrax/csv_entry.rb', line 142
def add_metadata_for_model
if factory_class.present? && factory_class == Bulkrax.collection_model_class
add_collection_type_gid if defined?(::Hyrax)
elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
add_admin_set_id
end
end
|
179
180
181
182
183
184
185
186
187
188
189
|
# File 'app/models/bulkrax/csv_entry.rb', line 179
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
end
|
203
204
205
206
207
208
209
210
211
212
213
214
|
# File 'app/models/bulkrax/csv_entry.rb', line 203
def build_files_metadata
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end
|
272
273
274
275
276
277
278
279
280
|
# File 'app/models/bulkrax/csv_entry.rb', line 272
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
next unless method_name
send(method_name, key, value)
end
end
|
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
# File 'app/models/bulkrax/csv_entry.rb', line 98
def build_metadata
validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
add_collections
add_visibility
add_metadata_for_model
add_rights_statement
sanitize_controlled_uri_values!
add_local
self.parsed_metadata
end
|
limited metadata is needed for delete jobs
117
118
119
120
121
122
|
# File 'app/models/bulkrax/csv_entry.rb', line 117
def build_metadata_for_delete
self.parsed_metadata = {}
establish_factory_class
add_ingested_metadata
self.parsed_metadata
end
|
#build_object(_key, value) ⇒ Object
282
283
284
285
286
287
288
289
290
|
# File 'app/models/bulkrax/csv_entry.rb', line 282
def build_object(_key, value)
return unless hyrax_record.respond_to?(value['object'])
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
|
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
# File 'app/models/bulkrax/csv_entry.rb', line 216
def build_relationship_metadata
relationship_methods = {
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids parent],
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids member_ids]
}
relationship_methods.each do |relationship_key, methods|
next if relationship_key.blank?
values = []
methods.each do |m|
value = hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
value_id = value.try(:id)&.to_s || value values << value_id if value_id.present?
end
values = values.flatten.uniq
next if values.blank?
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
|
Metadata required by Bulkrax for round-tripping
192
193
194
195
196
197
198
199
200
201
|
# File 'app/models/bulkrax/csv_entry.rb', line 192
def build_system_metadata
self.parsed_metadata['id'] = hyrax_record.id
source_id = hyrax_record.send(work_identifier)
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
source_id = Array.wrap(source_id).first
self.parsed_metadata[source_identifier] = source_id
model_name = Bulkrax.object_factory.model_name(resource: hyrax_record)
self.parsed_metadata[key_for_export('model')] = model_name
end
|
#build_thumbnail_files ⇒ Object
359
360
361
362
363
364
365
366
367
|
# File 'app/models/bulkrax/csv_entry.rb', line 359
def build_thumbnail_files
return unless importerexporter.include_thumbnails
thumbnail = Bulkrax.object_factory.thumbnail_for(resource: hyrax_record)
return unless thumbnail
filenames = map_file_sets(Array.wrap(thumbnail))
thumbnail_mapping = 'thumbnail_file'
handle_join_on_export(thumbnail_mapping, filenames, false)
end
|
#build_value(property_name, mapping_config) ⇒ Object
292
293
294
295
296
297
298
299
300
301
302
303
304
|
# File 'app/models/bulkrax/csv_entry.rb', line 292
def build_value(property_name, mapping_config)
return unless hyrax_record.respond_to?(property_name.to_s)
data = hyrax_record.send(property_name.to_s)
if mapping_config['join'] || !data.is_a?(Enumerable)
self.parsed_metadata[key_for_export(property_name)] = prepare_export_data_with_join(data)
else
data.each_with_index do |d, i|
self.parsed_metadata["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
end
end
end
|
#collection_identifiers ⇒ Object
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
|
# File 'app/models/bulkrax/csv_entry.rb', line 388
def collection_identifiers
return @collection_identifiers if @collection_identifiers.present?
parent_field_mapping = self.class.parent_field(parser)
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
identifiers = []
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
split_references.each do |c_reference|
matching_collection_entries = importerexporter.entries.select do |e|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
e.is_a?(CsvCollectionEntry)
end
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@collection_identifiers = identifiers.compact.presence || []
end
|
#collections_created? ⇒ Boolean
407
408
409
410
|
# File 'app/models/bulkrax/csv_entry.rb', line 407
def collections_created?
true
end
|
#establish_factory_class ⇒ Object
136
137
138
139
140
|
# File 'app/models/bulkrax/csv_entry.rb', line 136
def establish_factory_class
parser.model_field_mappings.each do |key|
add_metadata('model', record[key]) if record.key?(key)
end
end
|
#find_collection_ids ⇒ Object
412
413
414
415
416
417
418
419
420
421
422
423
|
# File 'app/models/bulkrax/csv_entry.rb', line 412
def find_collection_ids
return self.collection_ids if collections_created?
if collection_identifiers.present?
collection_identifiers.each do |collection_id|
c = find_collection(collection_id)
skip = c.blank? || self.collection_ids.include?(c.id)
self.collection_ids << c.id unless skip
end
end
self.collection_ids
end
|
#handle_join_on_export(key, values, join) ⇒ Object
369
370
371
372
373
374
375
376
377
378
|
# File 'app/models/bulkrax/csv_entry.rb', line 369
def handle_join_on_export(key, values, join)
if join
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
else
values.each_with_index do |value, i|
parsed_metadata["#{key}_#{i + 1}"] = value
end
parsed_metadata.delete(key)
end
end
|
#key_for_export(key) ⇒ Object
On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
308
309
310
311
312
313
|
# File 'app/models/bulkrax/csv_entry.rb', line 308
def key_for_export(key)
clean_key = key_without_numbers(key)
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
"#{unnumbered_key}#{key.sub(clean_key, '')}"
end
|
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
# File 'app/models/bulkrax/csv_entry.rb', line 331
def object_metadata(data)
data = data.map { |d| eval(d) }.flatten
data.each_with_index do |obj, index|
next if obj.nil?
obj = obj.with_indifferent_access
obj.each_key do |key|
if obj[key].is_a?(Array)
obj[key].each_with_index do |_nested_item, nested_index|
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
end
else
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
end
end
end
end
|
#path_to_file(file) ⇒ Object
If only filename is given, construct the path (/files/my_file). If file contains a path separator (e.g. attachments/cat_scan.jpg), resolve relative to the CSV’s directory.
427
428
429
430
431
432
433
434
435
436
437
438
439
440
|
# File 'app/models/bulkrax/csv_entry.rb', line 427
def path_to_file(file)
return file if File.exist?(file)
return resolve_relative_file_path(file) if file.include?('/')
path = importerexporter.parser.path_to_files
raise "Could not determine path to files directory. Ensure the import package contains a zip or a valid import_file_path." if path.nil?
f = File.join(path, file)
return f if File.exist?(f)
raise "File not found: #{f}. Check the file column in your CSV and ensure the file exists in the import package or path_to_files directory."
end
|
#prepare_export_data(datum) ⇒ Object
323
324
325
326
327
328
329
|
# File 'app/models/bulkrax/csv_entry.rb', line 323
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
|
#prepare_export_data_with_join(data) ⇒ Object
315
316
317
318
319
320
321
|
# File 'app/models/bulkrax/csv_entry.rb', line 315
def prepare_export_data_with_join(data)
return data.to_s unless data.is_a?(Enumerable)
return "" if data.empty?
data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end
|
#record ⇒ Object
380
381
382
|
# File 'app/models/bulkrax/csv_entry.rb', line 380
def record
@record ||= raw_metadata
end
|
#validate_record ⇒ Object
124
125
126
127
128
129
130
|
# File 'app/models/bulkrax/csv_entry.rb', line 124
def validate_record
raise RecordNotFound, 'Record not found' if record.nil?
unless importerexporter.parser.required_elements?(record)
raise MissingMetadata, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}"
end
end
|