Class: Legion::CLI::Eval

Inherits:

Thor

Object
Thor
Legion::CLI::Eval

show all

Defined in:: lib/legion/cli/eval_command.rb

Class Method Summary collapse

.exit_on_failure? ⇒ Boolean

Instance Method Summary collapse

#compare ⇒ Object
#execute ⇒ Object
#experiments ⇒ Object
#promote ⇒ Object

Class Method Details

.exit_on_failure? ⇒ `Boolean`

Returns:

(Boolean)



8
9
10

# File 'lib/legion/cli/eval_command.rb', line 8

def self.exit_on_failure?
  true
end

Instance Method Details

#compare ⇒ `Object`

# File 'lib/legion/cli/eval_command.rb', line 108

def compare
  setup_connection
  require_dataset!

  client = Legion::Extensions::Dataset::Client.new
  diff   = client.compare_experiments(exp1_name: options[:run1], exp2_name: options[:run2])
  raise CLI::Error, 'One or both experiments not found' if diff[:error]

  out = formatter
  if options[:json]
    out.json(diff)
  else
    out.header("Compare: #{diff[:exp1]} vs #{diff[:exp2]}")
    out.spacer
    table_rows = [
      ['Rows compared', diff[:rows_compared].to_s],
      ['Regressions',   diff[:regression_count].to_s],
      ['Improvements',  diff[:improvement_count].to_s]
    ]
    out.table(%w[metric value], table_rows)
  end
ensure
  Connection.shutdown
end

#execute ⇒ `Object`

# File 'lib/legion/cli/eval_command.rb', line 23

def execute
  setup_connection
  require_eval!
  require_dataset!

  rows   = fetch_dataset_rows(options[:dataset])
  report = run_evaluations(rows)

  avg_score = report.dig(:summary, :avg_score) || 0.0
  passed    = avg_score >= options[:threshold]

  ci_report = build_ci_report(report, avg_score, passed)

  if options[:json]
    formatter.json(ci_report)
  else
    render_human_report(ci_report, avg_score, passed)
  end

  exit(1) if options[:exit_code] && !passed
ensure
  Connection.shutdown
end

#experiments ⇒ `Object`

# File 'lib/legion/cli/eval_command.rb', line 48

def experiments
  setup_connection
  require_dataset!

  client = Legion::Extensions::Dataset::Client.new
  rows   = client.list_experiments
  out    = formatter

  if rows.empty?
    out.warn('no experiments found')
    return
  end

  if options[:json]
    out.json(experiments: rows)
  else
    out.header('Experiments')
    out.spacer
    table_rows = rows.map do |r|
      [r[:id].to_s, r[:name].to_s, r[:status].to_s, r[:created_at].to_s, r[:summary].to_s[0, 60]]
    end
    out.table(%w[id name status created summary], table_rows)
  end
ensure
  Connection.shutdown
end

#promote ⇒ `Object`

# File 'lib/legion/cli/eval_command.rb', line 78

def promote
  setup_connection
  require_dataset!
  require_prompt!

  dataset_client = Legion::Extensions::Dataset::Client.new
  experiment     = dataset_client.get_experiment(name: options[:experiment])
  raise CLI::Error, "Experiment '#{options[:experiment]}' not found" if experiment.nil?
  raise CLI::Error, "Experiment '#{options[:experiment]}' has no prompt linked" if experiment[:prompt_name].nil?

  prompt_client = Legion::Extensions::Prompt::Client.new
  result = prompt_client.tag_prompt(
    name:    experiment[:prompt_name],
    tag:     options[:tag],
    version: experiment[:prompt_version]
  )

  out = formatter
  if options[:json]
    out.json(result)
  else
    out.success("Tagged prompt '#{experiment[:prompt_name]}' v#{experiment[:prompt_version]} as '#{options[:tag]}'")
  end
ensure
  Connection.shutdown
end

Class: Legion::CLI::Eval

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.exit_on_failure? ⇒ Boolean

Instance Method Details

#compare ⇒ Object

#execute ⇒ Object

#experiments ⇒ Object

#promote ⇒ Object

.exit_on_failure? ⇒ `Boolean`

#compare ⇒ `Object`

#execute ⇒ `Object`

#experiments ⇒ `Object`

#promote ⇒ `Object`