Class: Storazzo::SearchEngine

Inherits:
Object
  • Object
show all
Defined in:
lib/storazzo/search_engine.rb

Constant Summary collapse

DB_PATH =
File.expand_path("~/.storazzo_index.db")

Instance Method Summary collapse

Constructor Details

#initializeSearchEngine

Returns a new instance of SearchEngine.



11
12
13
14
15
# File 'lib/storazzo/search_engine.rb', line 11

def initialize
  @db = SQLite3::Database.new(DB_PATH)
  @db.results_as_hash = true
  create_tables
end

Instance Method Details

#create_tablesObject



17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/storazzo/search_engine.rb', line 17

def create_tables
  @db.execute <<-SQL
    CREATE TABLE IF NOT EXISTS files (
      id INTEGER PRIMARY KEY AUTOINCREMENT,
      md5 VARCHAR(32),
      size INTEGER,
      path TEXT,
      disk TEXT,
      UNIQUE(disk, path)
    );
  SQL
end

#ingest_stats_file(file_path, disk_name) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/storazzo/search_engine.rb', line 40

def ingest_stats_file(file_path, disk_name)
  File.readlines(file_path).each do |line|
    next if line.start_with?('#') || line.strip.empty?
    
    # Example format:
    # [file_v1.2] md5 mode type datetime size [content_type] filename
    parts = line.split(' ')
    
    # very basic extraction trying to find the `[content_type]` bracket to locate the filename
    content_type_idx = parts.find_index { |p| p.start_with?('[') && p.end_with?(']') && p != parts.first }
    next unless content_type_idx
    
    md5 = parts[1]
    size = parts[content_type_idx - 1].to_i
    path = parts[(content_type_idx + 1)..-1].join(' ')
    
    begin
      @db.execute("INSERT OR REPLACE INTO files (md5, size, path, disk) VALUES (?, ?, ?, ?)", [md5, size, path, disk_name])
    rescue SQLite3::Exception => e
      puts "Error inserting #{path}: #{e.message}"
    end
  end
end

#query(string) ⇒ Object



36
37
38
# File 'lib/storazzo/search_engine.rb', line 36

def query(string)
  @db.execute("SELECT * FROM files WHERE path LIKE ? OR disk LIKE ?", ["%#{string}%", "%#{string}%"])
end

#sync_from_gcsObject



30
31
32
33
34
# File 'lib/storazzo/search_engine.rb', line 30

def sync_from_gcs
  puts "Syncing metadata from GCS... (Stub)"
  # Here we would use Google::Cloud::Storage to download `.rds` files
  # from the designated GCS metadata bucket and then call `ingest_stats_file`.
end