Class: Textus::Action::Ingest

Inherits:
Base
  • Object
show all
Defined in:
lib/textus/action/ingest.rb

Constant Summary collapse

SOURCE_KINDS =
%w[url file asset].freeze
CONTENT_HASH_ALGO =
"sha256"
TOMBSTONE_RETAIN =
%w[ingested_at].freeze

Class Method Summary collapse

Methods inherited from Base

inherited, proposal_from

Methods included from Contract::DSL

#arg, #cli, #cli_stdin, #contract, #contract?, #summary, #surfaces, #verb, #view

Class Method Details

.build_structured(timestamp, container, now, content_hash, kind:, url:, path:, label:, zone:) ⇒ Object

rubocop:disable Metrics/ParameterLists



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/textus/action/ingest.rb', line 94

def self.build_structured(timestamp, container, now, content_hash, kind:, url:, path:, label:, zone:) # rubocop:disable Metrics/ParameterLists
  base = { "ingested_at" => timestamp, "content_hash" => content_hash }
  case kind
  when "url"
    base.merge("source" => { "kind" => "url", "url" => url, "label" => label || url },
               "body" => nil)
  when "file"
    body_content = File.read(path)
    base.merge("source" => { "kind" => "file", "path" => path,
                             "label" => label || File.basename(path) },
               "body" => body_content)
  when "asset"
    asset_rel = copy_asset_file(container, now, path:, zone:)
    base.merge("source" => { "kind" => "asset",
                             "label" => label || File.basename(path) },
               "asset" => asset_rel,
               "body" => nil)
  end
end

.call(container:, call:, kind:, slug:, url: nil, path: nil, zone: nil, label: nil) ⇒ Object

rubocop:disable Metrics/ParameterLists



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/textus/action/ingest.rb', line 27

def self.call(container:, call:, kind:, slug:, url: nil, path: nil, zone: nil, label: nil, **) # rubocop:disable Metrics/ParameterLists
  validation = validate_inputs(kind:, url:, path:, zone:)
  return validation if validation.is_a?(Dry::Monads::Result::Failure)

  now = Time.now.utc
  key = derive_key(now, kind:, slug:)

  content_hash = compute_content_hash(kind:, url:, path:)
  writer = Textus::Store::Envelope::Writer.from(container: container, call: call)
  mentry = container.manifest.resolver.resolve(key).entry
  ts = now.iso8601
  structured = build_structured(ts, container, now, content_hash, kind:, url:, path:, label:, zone:)

  store = container.job_store
  index = Textus::Store::Index::Lookup.new(store: store)
  duplicate_key = find_duplicate(index, content_hash, kind:, url:)

  result = if duplicate_key && duplicate_key != key
             supersede_entry(duplicate_key, key, structured, container, call, store: store, kind:, zone:)
           else
             env = write_raw_entry(key, structured, mentry, writer)
             rebuild_index(container, store)
             env
           end
  Success(result)
end

.compute_content_hash(kind:, url:, path:) ⇒ Object



83
84
85
86
87
88
89
90
91
92
# File 'lib/textus/action/ingest.rb', line 83

def self.compute_content_hash(kind:, url:, path:)
  digest = Digest::SHA256.new
  case kind
  when "url"
    digest.update(url)
  when "file", "asset"
    digest.file(path)
  end
  "#{CONTENT_HASH_ALGO}:#{digest.hexdigest}"
end

.copy_asset_file(container, now, path:, zone:) ⇒ Object



181
182
183
184
185
186
187
188
189
# File 'lib/textus/action/ingest.rb', line 181

def self.copy_asset_file(container, now, path:, zone:)
  date_path = now.strftime("%Y/%m/%d")
  filename  = File.basename(path)
  assets_dir = File.join(container.root, "assets", "raw", date_path, zone)
  FileUtils.mkdir_p(assets_dir)
  FileUtils.cp(path, File.join(assets_dir, filename))
  create_gitignore_sentinel(container)
  "raw/#{date_path}/#{zone}/#{filename}"
end

.create_gitignore_sentinel(container) ⇒ Object



191
192
193
194
195
196
# File 'lib/textus/action/ingest.rb', line 191

def self.create_gitignore_sentinel(container)
  assets_root = File.join(container.root, "assets")
  FileUtils.mkdir_p(assets_root)
  sentinel = File.join(assets_root, ".gitignore")
  File.write(sentinel, "*\n") unless File.exist?(sentinel)
end

.derive_key(now, kind:, slug:) ⇒ Object



78
79
80
81
# File 'lib/textus/action/ingest.rb', line 78

def self.derive_key(now, kind:, slug:)
  date = now.strftime("%Y.%m.%d")
  "raw.#{date}.#{kind}-#{slug}"
end

.dispatch_key(kind:, slug:) ⇒ Object

Key derivation for Gate pre-dispatch auth. Must match the runtime derivation in #call so the same key is checked by auth and used by the action body.



74
75
76
# File 'lib/textus/action/ingest.rb', line 74

def self.dispatch_key(kind:, slug:, **)
  derive_key(Time.now.utc, kind:, slug:)
end

.find_duplicate(index, content_hash, kind:, url:) ⇒ Object



121
122
123
124
125
126
127
128
# File 'lib/textus/action/ingest.rb', line 121

def self.find_duplicate(index, content_hash, kind:, url:)
  dup = index.find_by_hash(content_hash)
  return dup if dup

  return unless kind == "url"

  index.find_by_url(url)
end

.move_asset_file(container, old_asset_rel, zone:) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/textus/action/ingest.rb', line 163

def self.move_asset_file(container, old_asset_rel, zone:)
  old_path = File.join(container.root, "assets", old_asset_rel)
  return unless File.exist?(old_path)

  now = Time.now.utc
  date_path = now.strftime("%Y/%m/%d")
  filename = File.basename(old_path)
  new_dir = File.join(container.root, "assets", "raw", date_path, zone)
  new_path = File.join(new_dir, filename)

  return if old_path == new_path

  FileUtils.mkdir_p(new_dir)
  FileUtils.mv(old_path, new_path)
rescue Errno::ENOENT, Errno::EACCES => e
  warn "[textus ingest] could not move asset #{old_asset_rel}: #{e.message}"
end

.rebuild_index(container, store) ⇒ Object



130
131
132
# File 'lib/textus/action/ingest.rb', line 130

def self.rebuild_index(container, store)
  Textus::Store::Index::Builder.new(store: store).rebuild!(resolver: container.manifest.resolver)
end

.supersede_entry(old_key, new_key, structured, container, call, store:, kind:, zone:) ⇒ Object

rubocop:disable Metrics/ParameterLists



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/textus/action/ingest.rb', line 134

def self.supersede_entry(old_key, new_key, structured, container, call, store:, kind:, zone:) # rubocop:disable Metrics/ParameterLists
  old_mentry = container.manifest.resolver.resolve(old_key).entry
  writer = Textus::Store::Envelope::Writer.from(container: container, call: call)

  reader = Textus::Store::Envelope::Reader.from(container: container)
  old_env = reader.read(old_key)
  old_content = old_env&.content || {}
  tombstone = {}
  TOMBSTONE_RETAIN.each do |k|
    tombstone[k] = old_content[k] if old_content.key?(k)
  end
  source_kind = old_content.dig("source", "kind")
  tombstone["source"] = { "kind" => source_kind } if source_kind
  tombstone["superseded_by"] = new_key

  writer.put(old_key, mentry: old_mentry,
                      payload: Textus::Store::Envelope::Writer::Payload.new(
                        meta: nil, body: nil, content: tombstone,
                      ))

  structured["supersedes"] = old_key
  env = write_raw_entry(new_key, structured, container.manifest.resolver.resolve(new_key).entry, writer)

  move_asset_file(container, old_content["asset"], zone:) if kind == "asset" && old_content["asset"]

  rebuild_index(container, store)
  env
end

.validate_inputs(kind:, url:, path:, zone:) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/textus/action/ingest.rb', line 54

def self.validate_inputs(kind:, url:, path:, zone:)
  unless SOURCE_KINDS.include?(kind)
    return Failure(code: :usage_error,
                   message: "ingest kind must be one of #{SOURCE_KINDS.join("|")}, got #{kind.inspect}")
  end
  case kind
  when "url"
    return Failure(code: :usage_error, message: "ingest url requires --url") unless url
  when "file"
    return Failure(code: :usage_error, message: "ingest file requires --path") unless path
  when "asset"
    return Failure(code: :usage_error, message: "ingest asset requires --path") unless path
    return Failure(code: :usage_error, message: "ingest asset requires --zone") unless zone
  end
  nil
end

.write_raw_entry(key, structured, mentry, writer) ⇒ Object



114
115
116
117
118
119
# File 'lib/textus/action/ingest.rb', line 114

def self.write_raw_entry(key, structured, mentry, writer)
  writer.put(key, mentry: mentry,
                  payload: Textus::Store::Envelope::Writer::Payload.new(
                    meta: nil, body: nil, content: structured,
                  ))
end