Class: Textus::Action::Ingest
Constant Summary collapse
- SOURCE_KINDS =
%w[url file asset].freeze
- CONTENT_HASH_ALGO =
"sha256"- TOMBSTONE_RETAIN =
%w[ingested_at].freeze
Class Method Summary collapse
-
.build_structured(timestamp, container, now, content_hash, kind:, url:, path:, label:, zone:) ⇒ Object
rubocop:disable Metrics/ParameterLists.
-
.call(container:, call:, kind:, slug:, url: nil, path: nil, zone: nil, label: nil) ⇒ Object
rubocop:disable Metrics/ParameterLists.
- .compute_content_hash(kind:, url:, path:) ⇒ Object
- .copy_asset_file(container, now, path:, zone:) ⇒ Object
- .create_gitignore_sentinel(container) ⇒ Object
- .derive_key(now, kind:, slug:) ⇒ Object
-
.dispatch_key(kind:, slug:) ⇒ Object
Key derivation for Gate pre-dispatch auth.
- .find_duplicate(index, content_hash, kind:, url:) ⇒ Object
- .move_asset_file(container, old_asset_rel, zone:) ⇒ Object
- .rebuild_index(container, store) ⇒ Object
-
.supersede_entry(old_key, new_key, structured, container, call, store:, kind:, zone:) ⇒ Object
rubocop:disable Metrics/ParameterLists.
- .validate_inputs(kind:, url:, path:, zone:) ⇒ Object
- .write_raw_entry(key, structured, mentry, writer) ⇒ Object
Methods inherited from Base
Methods included from Contract::DSL
#arg, #cli, #cli_stdin, #contract, #contract?, #summary, #surfaces, #verb, #view
Class Method Details
.build_structured(timestamp, container, now, content_hash, kind:, url:, path:, label:, zone:) ⇒ Object
rubocop:disable Metrics/ParameterLists
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/textus/action/ingest.rb', line 94 def self.build_structured(, container, now, content_hash, kind:, url:, path:, label:, zone:) # rubocop:disable Metrics/ParameterLists base = { "ingested_at" => , "content_hash" => content_hash } case kind when "url" base.merge("source" => { "kind" => "url", "url" => url, "label" => label || url }, "body" => nil) when "file" body_content = File.read(path) base.merge("source" => { "kind" => "file", "path" => path, "label" => label || File.basename(path) }, "body" => body_content) when "asset" asset_rel = copy_asset_file(container, now, path:, zone:) base.merge("source" => { "kind" => "asset", "label" => label || File.basename(path) }, "asset" => asset_rel, "body" => nil) end end |
.call(container:, call:, kind:, slug:, url: nil, path: nil, zone: nil, label: nil) ⇒ Object
rubocop:disable Metrics/ParameterLists
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/textus/action/ingest.rb', line 27 def self.call(container:, call:, kind:, slug:, url: nil, path: nil, zone: nil, label: nil, **) # rubocop:disable Metrics/ParameterLists validation = validate_inputs(kind:, url:, path:, zone:) return validation if validation.is_a?(Dry::Monads::Result::Failure) now = Time.now.utc key = derive_key(now, kind:, slug:) content_hash = compute_content_hash(kind:, url:, path:) writer = Textus::Store::Envelope::Writer.from(container: container, call: call) mentry = container.manifest.resolver.resolve(key).entry ts = now.iso8601 structured = build_structured(ts, container, now, content_hash, kind:, url:, path:, label:, zone:) store = container.job_store index = Textus::Store::Index::Lookup.new(store: store) duplicate_key = find_duplicate(index, content_hash, kind:, url:) result = if duplicate_key && duplicate_key != key supersede_entry(duplicate_key, key, structured, container, call, store: store, kind:, zone:) else env = write_raw_entry(key, structured, mentry, writer) rebuild_index(container, store) env end Success(result) end |
.compute_content_hash(kind:, url:, path:) ⇒ Object
83 84 85 86 87 88 89 90 91 92 |
# File 'lib/textus/action/ingest.rb', line 83 def self.compute_content_hash(kind:, url:, path:) digest = Digest::SHA256.new case kind when "url" digest.update(url) when "file", "asset" digest.file(path) end "#{CONTENT_HASH_ALGO}:#{digest.hexdigest}" end |
.copy_asset_file(container, now, path:, zone:) ⇒ Object
181 182 183 184 185 186 187 188 189 |
# File 'lib/textus/action/ingest.rb', line 181 def self.copy_asset_file(container, now, path:, zone:) date_path = now.strftime("%Y/%m/%d") filename = File.basename(path) assets_dir = File.join(container.root, "assets", "raw", date_path, zone) FileUtils.mkdir_p(assets_dir) FileUtils.cp(path, File.join(assets_dir, filename)) create_gitignore_sentinel(container) "raw/#{date_path}/#{zone}/#{filename}" end |
.create_gitignore_sentinel(container) ⇒ Object
191 192 193 194 195 196 |
# File 'lib/textus/action/ingest.rb', line 191 def self.create_gitignore_sentinel(container) assets_root = File.join(container.root, "assets") FileUtils.mkdir_p(assets_root) sentinel = File.join(assets_root, ".gitignore") File.write(sentinel, "*\n") unless File.exist?(sentinel) end |
.derive_key(now, kind:, slug:) ⇒ Object
78 79 80 81 |
# File 'lib/textus/action/ingest.rb', line 78 def self.derive_key(now, kind:, slug:) date = now.strftime("%Y.%m.%d") "raw.#{date}.#{kind}-#{slug}" end |
.dispatch_key(kind:, slug:) ⇒ Object
Key derivation for Gate pre-dispatch auth. Must match the runtime derivation in #call so the same key is checked by auth and used by the action body.
74 75 76 |
# File 'lib/textus/action/ingest.rb', line 74 def self.dispatch_key(kind:, slug:, **) derive_key(Time.now.utc, kind:, slug:) end |
.find_duplicate(index, content_hash, kind:, url:) ⇒ Object
121 122 123 124 125 126 127 128 |
# File 'lib/textus/action/ingest.rb', line 121 def self.find_duplicate(index, content_hash, kind:, url:) dup = index.find_by_hash(content_hash) return dup if dup return unless kind == "url" index.find_by_url(url) end |
.move_asset_file(container, old_asset_rel, zone:) ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/textus/action/ingest.rb', line 163 def self.move_asset_file(container, old_asset_rel, zone:) old_path = File.join(container.root, "assets", old_asset_rel) return unless File.exist?(old_path) now = Time.now.utc date_path = now.strftime("%Y/%m/%d") filename = File.basename(old_path) new_dir = File.join(container.root, "assets", "raw", date_path, zone) new_path = File.join(new_dir, filename) return if old_path == new_path FileUtils.mkdir_p(new_dir) FileUtils.mv(old_path, new_path) rescue Errno::ENOENT, Errno::EACCES => e warn "[textus ingest] could not move asset #{old_asset_rel}: #{e.}" end |
.rebuild_index(container, store) ⇒ Object
130 131 132 |
# File 'lib/textus/action/ingest.rb', line 130 def self.rebuild_index(container, store) Textus::Store::Index::Builder.new(store: store).rebuild!(resolver: container.manifest.resolver) end |
.supersede_entry(old_key, new_key, structured, container, call, store:, kind:, zone:) ⇒ Object
rubocop:disable Metrics/ParameterLists
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# File 'lib/textus/action/ingest.rb', line 134 def self.supersede_entry(old_key, new_key, structured, container, call, store:, kind:, zone:) # rubocop:disable Metrics/ParameterLists old_mentry = container.manifest.resolver.resolve(old_key).entry writer = Textus::Store::Envelope::Writer.from(container: container, call: call) reader = Textus::Store::Envelope::Reader.from(container: container) old_env = reader.read(old_key) old_content = old_env&.content || {} tombstone = {} TOMBSTONE_RETAIN.each do |k| tombstone[k] = old_content[k] if old_content.key?(k) end source_kind = old_content.dig("source", "kind") tombstone["source"] = { "kind" => source_kind } if source_kind tombstone["superseded_by"] = new_key writer.put(old_key, mentry: old_mentry, payload: Textus::Store::Envelope::Writer::Payload.new( meta: nil, body: nil, content: tombstone, )) structured["supersedes"] = old_key env = write_raw_entry(new_key, structured, container.manifest.resolver.resolve(new_key).entry, writer) move_asset_file(container, old_content["asset"], zone:) if kind == "asset" && old_content["asset"] rebuild_index(container, store) env end |
.validate_inputs(kind:, url:, path:, zone:) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/textus/action/ingest.rb', line 54 def self.validate_inputs(kind:, url:, path:, zone:) unless SOURCE_KINDS.include?(kind) return Failure(code: :usage_error, message: "ingest kind must be one of #{SOURCE_KINDS.join("|")}, got #{kind.inspect}") end case kind when "url" return Failure(code: :usage_error, message: "ingest url requires --url") unless url when "file" return Failure(code: :usage_error, message: "ingest file requires --path") unless path when "asset" return Failure(code: :usage_error, message: "ingest asset requires --path") unless path return Failure(code: :usage_error, message: "ingest asset requires --zone") unless zone end nil end |
.write_raw_entry(key, structured, mentry, writer) ⇒ Object
114 115 116 117 118 119 |
# File 'lib/textus/action/ingest.rb', line 114 def self.write_raw_entry(key, structured, mentry, writer) writer.put(key, mentry: mentry, payload: Textus::Store::Envelope::Writer::Payload.new( meta: nil, body: nil, content: structured, )) end |