Class: Storazzo::SearchEngine
- Inherits:
-
Object
- Object
- Storazzo::SearchEngine
- Includes:
- Common
- Defined in:
- lib/storazzo/search_engine.rb
Constant Summary collapse
- DB_PATH =
File.("~/.storazzo_index.db")
Constants included from Colors
Instance Method Summary collapse
- #create_tables ⇒ Object
- #find_or_create_disk(disk_name, opts = {}) ⇒ Object
- #ingest_stats_file(file_path, disk_name) ⇒ Object
-
#initialize ⇒ SearchEngine
constructor
A new instance of SearchEngine.
- #query(string) ⇒ Object
- #sync_all_from_gcs ⇒ Object
Methods included from Common
#bug, #deb, #err, #fatal, #if_deb?, #linux?, #mac?, #ppp, #pverbose, #slugify, #warn
Methods included from Colors
#azure, #blue, #deb2, #gray, #green, #orange, #pgreen, #pred, #purple, #pwhite, #pyellow, #red, #white, #yellow
Constructor Details
#initialize ⇒ SearchEngine
Returns a new instance of SearchEngine.
13 14 15 16 17 |
# File 'lib/storazzo/search_engine.rb', line 13 def initialize @db = SQLite3::Database.new(DB_PATH) @db.results_as_hash = true create_tables end |
Instance Method Details
#create_tables ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/storazzo/search_engine.rb', line 19 def create_tables @db.execute <<-SQL CREATE TABLE IF NOT EXISTS disks ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, slug TEXT UNIQUE, type TEXT, uuid TEXT, llm_description TEXT, llm_storage_type TEXT, last_scanned_at DATETIME ); SQL @db.execute <<-SQL CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, md5 VARCHAR(32), size INTEGER, path TEXT, disk_id INTEGER, file_mtime DATETIME, content_type TEXT, ingested_at DATETIME, FOREIGN KEY(disk_id) REFERENCES disks(id), UNIQUE(disk_id, path) ); SQL end |
#find_or_create_disk(disk_name, opts = {}) ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/storazzo/search_engine.rb', line 99 def find_or_create_disk(disk_name, opts = {}) slug = slugify(disk_name) type = opts[:type] || 'local' uuid = opts[:uuid] llm_desc = opts[:llm_description] llm_storage = opts[:llm_storage_type] @db.execute <<-SQL, [disk_name, slug, type, uuid, llm_desc, llm_storage] INSERT OR IGNORE INTO disks (name, slug, type, uuid, llm_description, llm_storage_type) VALUES (?, ?, ?, ?, ?, ?) SQL # Update existing record if new info is provided if uuid || llm_desc || llm_storage @db.execute <<-SQL, [uuid, llm_desc, llm_storage, slug] UPDATE disks SET uuid = COALESCE(?, uuid), llm_description = COALESCE(?, llm_description), llm_storage_type = COALESCE(?, llm_storage_type) WHERE slug = ? SQL end @db.get_first_value("SELECT id FROM disks WHERE slug = ?", [slug]) end |
#ingest_stats_file(file_path, disk_name) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/storazzo/search_engine.rb', line 124 def ingest_stats_file(file_path, disk_name) disk_id = find_or_create_disk(disk_name) ingested_at = Time.now.iso8601 puts "Ingesting #{file_path} (disk_id: #{disk_id})..." @db.transaction do File.readlines(file_path).each do |line| next if line.start_with?('#') || line.strip.empty? # Example format: # [file_v1.2] md5 mode type datetime size [content_type] filename parts = line.split(' ') # Locating the `[content_type]` bracket content_type_idx = parts.find_index { |p| p.start_with?('[') && p.end_with?(']') && p != parts.first } next unless content_type_idx md5 = parts[1] file_mtime = parts[4] # Standardized creation/mod time size = parts[content_type_idx - 1].to_i content_type = parts[content_type_idx].gsub(/[\[\]]/, '') path = parts[(content_type_idx + 1)..-1].join(' ') begin @db.execute <<-SQL, [md5, size, path, disk_id, file_mtime, content_type, ingested_at] INSERT OR REPLACE INTO files (md5, size, path, disk_id, file_mtime, content_type, ingested_at) VALUES (?, ?, ?, ?, ?, ?, ?) SQL rescue SQLite3::Exception => e puts "Error inserting #{path}: #{e.}" end end end @db.execute("UPDATE disks SET last_scanned_at = ? WHERE id = ?", [ingested_at, disk_id]) end |
#query(string) ⇒ Object
90 91 92 93 94 95 96 97 |
# File 'lib/storazzo/search_engine.rb', line 90 def query(string) @db.execute <<-SQL, ["%#{string}%", "%#{string}%", "%#{string}%"] SELECT f.*, d.name as disk_name, d.slug as disk_slug FROM files f JOIN disks d ON f.disk_id = d.id WHERE f.path LIKE ? OR d.name LIKE ? OR d.slug LIKE ? SQL end |
#sync_all_from_gcs ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/storazzo/search_engine.rb', line 49 def sync_all_from_gcs client = Storazzo::GCS::Client.new config = Storazzo::RicDiskConfig.instance config.load buckets = config.get_bucket_paths puts "Syncing metadata from #{buckets.size} buckets..." buckets.each do |bucket_url| bucket_name = bucket_url.gsub('gs://', '').split('/').first # For now, we search in the standard 'backup/ricdisk-magic/' path prefix = "backup/ricdisk-magic/" begin bucket = client.storage.bucket(bucket_name) next unless bucket files = bucket.files(prefix: prefix) rds_files = files.select { |f| f.name.end_with?('.rds') } puts "--- Bucket: gs://#{bucket_name} (#{rds_files.size} catalogs found) ---" rds_files.each do |remote_file| # 1. Download to local tmp local_tmp_path = File.join(Dir.tmpdir, File.basename(remote_file.name)) puts "Downloading #{remote_file.name}..." remote_file.download(local_tmp_path) # 2. Ingest into SQLite disk_name = File.basename(remote_file.name, '.rds').gsub('-ricdisk_stats_v11', '') ingest_stats_file(local_tmp_path, disk_name) # 3. Cleanup FileUtils.rm(local_tmp_path) end rescue => e warn "Error syncing from gs://#{bucket_name}: #{e.}" end end end |