Class: Legion::CLI::Eval

Inherits:
Thor
  • Object
show all
Defined in:
lib/legion/cli/eval_command.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.exit_on_failure?Boolean

Returns:

  • (Boolean)


8
9
10
# File 'lib/legion/cli/eval_command.rb', line 8

def self.exit_on_failure?
  true
end

Instance Method Details

#compareObject



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/legion/cli/eval_command.rb', line 108

def compare
  setup_connection
  require_dataset!

  client = Legion::Extensions::Dataset::Client.new
  diff   = client.compare_experiments(exp1_name: options[:run1], exp2_name: options[:run2])
  raise CLI::Error, 'One or both experiments not found' if diff[:error]

  out = formatter
  if options[:json]
    out.json(diff)
  else
    out.header("Compare: #{diff[:exp1]} vs #{diff[:exp2]}")
    out.spacer
    table_rows = [
      ['Rows compared', diff[:rows_compared].to_s],
      ['Regressions',   diff[:regression_count].to_s],
      ['Improvements',  diff[:improvement_count].to_s]
    ]
    out.table(%w[metric value], table_rows)
  end
ensure
  Connection.shutdown
end

#executeObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/legion/cli/eval_command.rb', line 23

def execute
  setup_connection
  require_eval!
  require_dataset!

  rows   = fetch_dataset_rows(options[:dataset])
  report = run_evaluations(rows)

  avg_score = report.dig(:summary, :avg_score) || 0.0
  passed    = avg_score >= options[:threshold]

  ci_report = build_ci_report(report, avg_score, passed)

  if options[:json]
    formatter.json(ci_report)
  else
    render_human_report(ci_report, avg_score, passed)
  end

  exit(1) if options[:exit_code] && !passed
ensure
  Connection.shutdown
end

#experimentsObject



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/legion/cli/eval_command.rb', line 48

def experiments
  setup_connection
  require_dataset!

  client = Legion::Extensions::Dataset::Client.new
  rows   = client.list_experiments
  out    = formatter

  if rows.empty?
    out.warn('no experiments found')
    return
  end

  if options[:json]
    out.json(experiments: rows)
  else
    out.header('Experiments')
    out.spacer
    table_rows = rows.map do |r|
      [r[:id].to_s, r[:name].to_s, r[:status].to_s, r[:created_at].to_s, r[:summary].to_s[0, 60]]
    end
    out.table(%w[id name status created summary], table_rows)
  end
ensure
  Connection.shutdown
end

#promoteObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/legion/cli/eval_command.rb', line 78

def promote
  setup_connection
  require_dataset!
  require_prompt!

  dataset_client = Legion::Extensions::Dataset::Client.new
  experiment     = dataset_client.get_experiment(name: options[:experiment])
  raise CLI::Error, "Experiment '#{options[:experiment]}' not found" if experiment.nil?
  raise CLI::Error, "Experiment '#{options[:experiment]}' has no prompt linked" if experiment[:prompt_name].nil?

  prompt_client = Legion::Extensions::Prompt::Client.new
  result = prompt_client.tag_prompt(
    name:    experiment[:prompt_name],
    tag:     options[:tag],
    version: experiment[:prompt_version]
  )

  out = formatter
  if options[:json]
    out.json(result)
  else
    out.success("Tagged prompt '#{experiment[:prompt_name]}' v#{experiment[:prompt_version]} as '#{options[:tag]}'")
  end
ensure
  Connection.shutdown
end