Class: Ragnar::CLI

Inherits:
Thor
  • Object
show all
Includes:
CLIVisualization, Thor::Interactive::Command
Defined in:
lib/ragnar/cli.rb,
lib/ragnar/cli_umap.rb

Defined Under Namespace

Classes: Umap

Instance Method Summary collapse

Methods included from CLIVisualization

#generate_topic_visualization_html

Instance Method Details

#clear_cache_commandObject



465
466
467
468
# File 'lib/ragnar/cli.rb', line 465

def clear_cache_command
  clear_cache
  say "Cache cleared. Next commands will create fresh instances.", :green
end

#configObject



355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/ragnar/cli.rb', line 355

def config
  config = Config.instance
  
  say "\nConfiguration Settings:", :cyan
  say "-" * 40
  
  if config.config_exists?
    say "Config file: #{config.config_file_path}", :green
  else
    say "Config file: None (using defaults)", :yellow
  end
  
  say "\nPaths:", :cyan
  say "  Database: #{config.database_path}"
  say "  Models: #{config.models_dir}"
  say "  History: #{config.history_file}"
  
  say "\nEmbeddings:", :cyan
  say "  Model: #{config.embedding_model}"
  say "  Chunk size: #{config.chunk_size}"
  say "  Chunk overlap: #{config.chunk_overlap}"
  
  say "\nLLM:", :cyan
  say "  Active profile: #{config.llm_profile_name}", :green
  say "  Provider: #{config.llm_provider}"
  say "  Model: #{config.llm_model}"
  if config.available_profiles.size > 1
    say "  Available profiles: #{config.available_profiles.join(', ')}"
  end
  
  say "\nUMAP:", :cyan
  say "  Reduced dimensions: #{config.get('umap.reduced_dimensions', Ragnar::DEFAULT_REDUCED_DIMENSIONS)}"
  say "  N neighbors: #{config.get('umap.n_neighbors', 15)}"
  say "  Min distance: #{config.get('umap.min_dist', 0.1)}"
  
  say "\nQuery:", :cyan
  say "  Top K: #{config.query_top_k}"
  say "  Query rewriting: #{config.enable_query_rewriting?}"
  say "  Reranking: #{config.enable_reranking?}"
  say "  Reranker model: #{config.reranker_model}" if config.enable_reranking?
end

#index(path) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/ragnar/cli.rb', line 53

def index(path)
  # Expand user paths (handle ~ in user input)
  expanded_path = File.expand_path(path)
  
  unless File.exist?(expanded_path)
    say "Error: Path does not exist: #{expanded_path}", :red
    exit 1
  end

  say "Indexing files from: #{path}", :green

  # Debug options in interactive mode
  puts "Debug - options: #{options.inspect}" if ENV['DEBUG']

  # Get config instance
  config = Config.instance
  
  # Clear database cache when indexing new content
  db_path = options[:db_path] || config.database_path
  if @@cached_db_path == db_path
    @@cached_database = nil
    @@cached_query_processor = nil
  end

  indexer = Indexer.new(
    db_path: db_path,
    chunk_size: options[:chunk_size] || config.chunk_size,
    chunk_overlap: options[:chunk_overlap] || config.chunk_overlap,
    embedding_model: options[:model] || config.embedding_model,
    show_progress: config.show_progress?
  )

  begin
    stats = indexer.index_path(expanded_path)
    say "\nIndexing complete!", :green
    say "Files processed: #{stats[:files_processed]}"
    say "Chunks created: #{stats[:chunks_created]}"
    say "Errors: #{stats[:errors]}" if stats[:errors] > 0
  rescue => e
    say "Error during indexing: #{e.message}", :red
    exit 1
  end
end

#init_configObject



625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
# File 'lib/ragnar/cli.rb', line 625

def init_config
  config = Config.instance
  
  if options[:global]
    config_path = File.expand_path('~/.ragnar.yml')
  else
    config_path = File.join(Dir.pwd, '.ragnar.yml')
  end
  
  if File.exist?(config_path) && !options[:force]
    say "Config file already exists at: #{config_path}", :yellow
    say "Use --force to overwrite, or choose a different location.", :yellow
    return
  end
  
  generated_path = config.generate_config_file(config_path)
  say "Config file created at: #{generated_path}", :green
  say "Edit this file to customize Ragnar's behavior.", :cyan
  
  if config.config_exists?
    say "\nNote: Currently using config from: #{config.config_file_path}", :yellow
  end
end

#modelObject



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# File 'lib/ragnar/cli.rb', line 398

def model
  config = Config.instance

  say "\nLLM Model Configuration:", :cyan
  say "-" * 40

  say "\nProfile: #{config.llm_profile_name}", :green
  say "  Provider: #{config.llm_provider}"
  say "  Model: #{config.llm_model}"

  # Only show GGUF/local file info for local providers
  if config.llm_provider == 'red_candle'
    say "\nEmbedding Model: #{config.embedding_model}"

    # Check if model files exist in HuggingFace cache
    hf_cache = File.expand_path("~/.cache/huggingface/hub")
    model_dir = config.llm_model.gsub("/", "--")
    model_cache = File.join(hf_cache, "models--#{model_dir}")
    if Dir.exist?(model_cache)
      say "\nModel cached: #{model_cache}", :green
    else
      say "\nModel not yet downloaded (will download on first use)", :yellow
    end
  else
    api_key = config.llm_api_key
    env_key = case config.llm_provider
              when 'anthropic' then ENV['ANTHROPIC_API_KEY']
              when 'openai' then ENV['OPENAI_API_KEY']
              end
    has_key = api_key || env_key
    say "\nAPI key: #{has_key ? 'configured' : 'not set'}", has_key ? :green : :red
  end
end

#profile(name = nil) ⇒ Object



433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/ragnar/cli.rb', line 433

def profile(name = nil)
  config = Config.instance

  if name
    begin
      config.set_active_profile(name)
      LLMManager.instance.clear_cache
      say "Switched to profile: #{name}", :green
      say "  Provider: #{config.llm_provider}"
      say "  Model: #{config.llm_model}"
    rescue ArgumentError => e
      say e.message, :red
    end
  else
    say "\nLLM Profiles:", :cyan
    say "-" * 40
    config.llm_profiles.each do |pname, pconfig|
      active = pname == config.llm_profile_name ? " (active)" : ""
      say "  #{pname}#{active}", active.empty? ? :white : :green
      say "    Provider: #{pconfig['provider']}"
      say "    Model: #{pconfig['model']}"
    end
  end
end

#query(question) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/ragnar/cli.rb', line 250

def query(question)
  apply_profile!
  puts "Debug - Query called with: #{question.inspect}" if ENV['DEBUG']
  puts "Debug - Options: #{options.inspect}" if ENV['DEBUG']

  processor = get_cached_query_processor(options[:db_path] || Config.instance.database_path)
  puts "Debug - Processor: #{processor.class}" if ENV['DEBUG']

  begin
    config = Config.instance
    result = processor.query(
      question,
      top_k: options[:top_k] || config.query_top_k,
      verbose: options[:verbose] || @@verbose_mode,
      enable_rewriting: config.enable_query_rewriting?,
      enable_reranking: options[:rerank].nil? ? config.enable_reranking? : options[:rerank]
    )
    puts "Debug - Result keys: #{result.keys}" if ENV['DEBUG']

    if options[:json]
      puts JSON.pretty_generate(result)
    elsif interactive?
      # Clean output for interactive mode - just answer, confidence, and sources
      say "" # Add blank line before answer for spacing
      say result[:answer]
      
      if result[:confidence]
        say "\nConfidence: #{result[:confidence]}%", :magenta
      end

      if result[:sources] && !result[:sources].empty?
        say "\nSources:", :blue
        result[:sources].each_with_index do |source, idx|
          say "  #{idx + 1}. #{source[:source_file]}" if source[:source_file]
        end
      end
      
      say "" # Add blank line for spacing
    else
      # Full output for CLI mode
      say "\n" + "="*60, :green
      say "Query: #{result[:query]}", :cyan

      if result[:clarified] != result[:query]
        say "Clarified: #{result[:clarified]}", :yellow
      end

      say "\nAnswer:", :green
      say result[:answer]

      if result[:confidence]
        say "\nConfidence: #{result[:confidence]}%", :magenta
      end

      if result[:sources] && !result[:sources].empty?
        say "\nSources:", :blue
        result[:sources].each_with_index do |source, idx|
          say "  #{idx + 1}. #{source[:source_file]}" if source[:source_file]
        end
      end

      if (options[:verbose] || false) && result[:sub_queries]
        say "\nSub-queries used:", :yellow
        result[:sub_queries].each { |sq| say "  - #{sq}" }
      end

      say "="*60, :green
    end
  rescue => e
    say "Error processing query: #{e.message}", :red
    puts "Debug - Full backtrace: #{e.backtrace.join("\n")}" if ENV['DEBUG']
    exit 1
  end
end

#resetObject



476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# File 'lib/ragnar/cli.rb', line 476

def reset
  # Determine what to reset
  reset_all = options[:all]
  reset_db = options[:database] || reset_all
  reset_models = options[:models] || reset_all
  reset_cache = options[:cache] || reset_all
  
  # If no specific options, default to all
  if !reset_db && !reset_models && !reset_cache
    reset_all = true
    reset_db = reset_models = reset_cache = true
  end
  
  # Build confirmation message
  items_to_reset = []
  items_to_reset << "database" if reset_db
  items_to_reset << "UMAP models" if reset_models
  items_to_reset << "cache" if reset_cache
  
  # Get paths that will be affected
  config = Config.instance
  db_path = options[:db_path] || config.database_path
  model_path = File.join(config.models_dir, "umap_model.bin")
  
  # Show what will be deleted
  say "\nWARNING: This will delete the following:", :red
  say "-" * 40
  
  if reset_db
    say "Database: #{db_path}", :cyan
    if File.exist?(db_path)
      stats = Database.new(db_path).get_stats rescue nil
      if stats
        say "  (#{stats[:total_documents]} documents, #{stats[:total_chunks]} chunks)", :white
      end
    else
      say "  (does not exist)", :white
    end
  end
  
  if reset_models
    say "UMAP models:", :cyan
    model_files = [
      model_path,
      model_path.sub(/\.bin$/, '_metadata.json'),
      model_path.sub(/\.bin$/, '_embeddings.json')  # Old format, if exists
    ]
    model_files.each do |file|
      if File.exist?(file)
        say "  #{file} (#{(File.size(file) / 1024.0).round(1)} KB)", :white
      end
    end
    if model_files.none? { |f| File.exist?(f) }
      say "  (no models found)", :white
    end
  end
  
  if reset_cache
    cache_dir = File.expand_path("~/.cache/ragnar")
    say "Cache directory: #{cache_dir}", :cyan
    if Dir.exist?(cache_dir)
      cache_size = Dir.glob(File.join(cache_dir, "**/*"))
        .select { |f| File.file?(f) }
        .sum { |f| File.size(f) } / 1024.0 / 1024.0
      say "  (#{cache_size.round(1)} MB)", :white
    else
      say "  (does not exist)", :white
    end
  end
  
  say "-" * 40
  
  # Ask for confirmation unless --force
  unless options[:force]
    message = "\nAre you sure you want to reset #{items_to_reset.join(', ')}?"
    
    # Check if we're in interactive mode
    if ENV['THOR_INTERACTIVE_SESSION'] == 'true'
      # In interactive mode, use a simple prompt
      say message, :yellow
      response = ask("Type 'yes' to confirm, anything else to cancel:", :yellow)
      confirmed = response.downcase == 'yes'
    else
      # In CLI mode, use Thor's yes? method
      confirmed = yes?(message + " (y/N)", :yellow)
    end
    
    unless confirmed
      say "\nReset cancelled.", :cyan
      return
    end
  end
  
  # Perform the reset
  say "\nResetting...", :green
  
  if reset_db && File.exist?(db_path)
    say "Removing database: #{db_path}"
    FileUtils.rm_rf(db_path)
    say "  ✓ Database removed", :green
  end
  
  if reset_models
    model_files = [
      model_path,
      model_path.sub(/\.bin$/, '_metadata.json'),
      model_path.sub(/\.bin$/, '_embeddings.json')
    ]
    model_files.each do |file|
      if File.exist?(file)
        say "Removing model file: #{file}"
        FileUtils.rm_f(file)
        say "  ✓ Removed", :green
      end
    end
  end
  
  if reset_cache
    # Clear in-memory cache
    clear_cache
    
    # Optionally clear cache directory (but preserve history)
    cache_dir = File.expand_path("~/.cache/ragnar")
    if Dir.exist?(cache_dir)
      # Preserve history file
      history_file = File.join(cache_dir, "history")
      history_content = File.read(history_file) if File.exist?(history_file)
      
      # Remove cache directory contents except history
      Dir.glob(File.join(cache_dir, "*")).each do |item|
        next if File.basename(item) == "history"
        if File.directory?(item)
          FileUtils.rm_rf(item)
        else
          FileUtils.rm_f(item)
        end
        say "Removed cache item: #{File.basename(item)}", :green
      end
    end
    say "  ✓ Cache cleared", :green
  end
  
  say "\nReset complete!", :green
  say "You can now start fresh with 'ragnar index <path>'", :cyan
end

#search(query_text) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/ragnar/cli.rb', line 212

def search(query_text)
  database = get_cached_database(options[:database] || Config.instance.database_path)
  embedder = get_cached_embedder()

  # Generate embedding for query
  query_embedding = embedder.embed_text(query_text)

  # Search for similar documents
  results = database.search_similar(query_embedding, k: options[:k])

  if results.empty?
    say "No results found.", :yellow
    return
  end

  say "Found #{results.length} results:\n", :green

  results.each_with_index do |result, idx|
    say "#{idx + 1}. File: #{result[:file_path]}", :cyan
    say "   Chunk: #{result[:chunk_index]}"

    if options[:show_scores]
      say "   Distance: #{result[:distance].round(4)}"
    end

    # Show preview of content
    preview = result[:chunk_text][0..200].gsub(/\s+/, ' ')
    say "   Content: #{preview}..."
    say ""
  end
end

#statsObject



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/ragnar/cli.rb', line 327

def stats
  db = get_cached_database(options[:db_path] || Config.instance.database_path)
  stats = db.get_stats

  say "\nDatabase Statistics", :green
  say "-" * 30
  say "Total documents: #{stats[:total_documents]}"
  say "Unique files: #{stats[:unique_files]}"
  say "Total chunks: #{stats[:total_chunks]}"
  say "With embeddings: #{stats[:with_embeddings]}"
  say "With reduced embeddings: #{stats[:with_reduced_embeddings]}"

  if stats[:total_chunks] > 0
    say "\nAverage chunk size: #{stats[:avg_chunk_size]} characters"
    say "Embedding dimensions: #{stats[:embedding_dims]}"
    say "Reduced dimensions: #{stats[:reduced_dims]}" if stats[:reduced_dims]
  end
rescue => e
  say "Error reading database: #{e.message}", :red
  exit 1
end

#topicsObject



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/ragnar/cli.rb', line 109

def topics
  apply_profile!
  require_relative 'topic_modeling'

  say "Extracting topics from indexed documents...", :green

  # Load embeddings and documents from database - use cache in interactive mode
  database = get_cached_database(options[:db_path] || Config.instance.database_path)

  begin
    # Get all documents with embeddings
    stats = database.get_stats
    if stats[:with_embeddings] == 0
      say "No documents with embeddings found. Please index some documents first.", :red
      exit 1
    end

    say "Loading #{stats[:with_embeddings]} documents...", :yellow

    # Get all documents with embeddings
    docs_with_embeddings = database.get_all_documents_with_embeddings

    if docs_with_embeddings.empty?
      say "Could not load documents from database. Please check your database.", :red
      exit 1
    end

    # Check if we have reduced embeddings available
    first_doc = docs_with_embeddings.first
    has_reduced = first_doc[:reduced_embedding] && !first_doc[:reduced_embedding].empty?

    if has_reduced
      embeddings = docs_with_embeddings.map { |d| d[:reduced_embedding] }
      say "Using reduced embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
      # Already reduced, so don't reduce again in the engine
      reduce_dims = false
    else
      embeddings = docs_with_embeddings.map { |d| d[:embedding] }
      say "Using original embeddings (#{embeddings.first.size} dimensions)", :yellow if options[:verbose]
      # Let the engine handle dimensionality reduction if needed
      reduce_dims = true
    end

    documents = docs_with_embeddings.map { |d| d[:chunk_text] }
     = docs_with_embeddings.map { |d| { file_path: d[:file_path], chunk_index: d[:chunk_index] } }

    say "Loaded #{embeddings.length} embeddings and #{documents.length} documents", :yellow if options[:verbose]

    # Initialize topic modeling engine
    engine = Ragnar::TopicModeling::Engine.new(
      min_cluster_size: options[:min_cluster_size],
      labeling_method: options[:method].to_sym,
      verbose: options[:verbose],
      reduce_dimensions: reduce_dims
    )

    # Extract topics
    say "Clustering documents...", :yellow
    topics = engine.fit(
      embeddings: embeddings,
      documents: documents,
      metadata: 
    )

    # Generate summaries if requested
    if options[:summarize] && topics.any?
      say "Generating topic summaries with LLM...", :yellow
      begin
        chat = LLMManager.instance.default_chat

        # Add summaries to topics
        topics.each_with_index do |topic, i|
          say "  Summarizing topic #{i+1}/#{topics.length}...", :yellow if options[:verbose]
          topic.instance_variable_set(:@summary, summarize_topic(topic, chat))
        end

        say "Topic summaries generated!", :green
      rescue => e
        say "Warning: Could not generate topic summaries: #{e.message}", :yellow
        say "Proceeding without summaries...", :yellow
      end
    end

    # Display results
    display_topics(topics, show_summaries: options[:summarize])

    # Export if requested
    if options[:export]
      # Pass embeddings and cluster IDs for visualization
      export_topics(topics, options[:export], embeddings: embeddings, cluster_ids: engine.instance_variable_get(:@cluster_ids))
    end

  rescue => e
    say "Error extracting topics: #{e.message}", :red
    say e.backtrace.first(5).join("\n") if options[:verbose]
    exit 1
  end
end

#verboseObject



459
460
461
462
# File 'lib/ragnar/cli.rb', line 459

def verbose
  @@verbose_mode = !@@verbose_mode
  say "Verbose mode: #{@@verbose_mode ? 'on' : 'off'}", @@verbose_mode ? :green : :yellow
end

#versionObject



350
351
352
# File 'lib/ragnar/cli.rb', line 350

def version
  say "Ragnar v#{Ragnar::VERSION}"
end