Class: Legion::CLI::Eval
- Inherits:
-
Thor
- Object
- Thor
- Legion::CLI::Eval
- Defined in:
- lib/legion/cli/eval_command.rb
Class Method Summary collapse
Instance Method Summary collapse
Class Method Details
.exit_on_failure? ⇒ Boolean
8 9 10 |
# File 'lib/legion/cli/eval_command.rb', line 8 def self.exit_on_failure? true end |
Instance Method Details
#compare ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/legion/cli/eval_command.rb', line 108 def compare setup_connection require_dataset! client = Legion::Extensions::Dataset::Client.new diff = client.compare_experiments(exp1_name: [:run1], exp2_name: [:run2]) raise CLI::Error, 'One or both experiments not found' if diff[:error] out = formatter if [:json] out.json(diff) else out.header("Compare: #{diff[:exp1]} vs #{diff[:exp2]}") out.spacer table_rows = [ ['Rows compared', diff[:rows_compared].to_s], ['Regressions', diff[:regression_count].to_s], ['Improvements', diff[:improvement_count].to_s] ] out.table(%w[metric value], table_rows) end ensure Connection.shutdown end |
#execute ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/legion/cli/eval_command.rb', line 23 def execute setup_connection require_eval! require_dataset! rows = fetch_dataset_rows([:dataset]) report = run_evaluations(rows) avg_score = report.dig(:summary, :avg_score) || 0.0 passed = avg_score >= [:threshold] ci_report = build_ci_report(report, avg_score, passed) if [:json] formatter.json(ci_report) else render_human_report(ci_report, avg_score, passed) end exit(1) if [:exit_code] && !passed ensure Connection.shutdown end |
#experiments ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/legion/cli/eval_command.rb', line 48 def experiments setup_connection require_dataset! client = Legion::Extensions::Dataset::Client.new rows = client.list_experiments out = formatter if rows.empty? out.warn('no experiments found') return end if [:json] out.json(experiments: rows) else out.header('Experiments') out.spacer table_rows = rows.map do |r| [r[:id].to_s, r[:name].to_s, r[:status].to_s, r[:created_at].to_s, r[:summary].to_s[0, 60]] end out.table(%w[id name status created summary], table_rows) end ensure Connection.shutdown end |
#promote ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/legion/cli/eval_command.rb', line 78 def promote setup_connection require_dataset! require_prompt! dataset_client = Legion::Extensions::Dataset::Client.new experiment = dataset_client.get_experiment(name: [:experiment]) raise CLI::Error, "Experiment '#{[:experiment]}' not found" if experiment.nil? raise CLI::Error, "Experiment '#{[:experiment]}' has no prompt linked" if experiment[:prompt_name].nil? prompt_client = Legion::Extensions::Prompt::Client.new result = prompt_client.tag_prompt( name: experiment[:prompt_name], tag: [:tag], version: experiment[:prompt_version] ) out = formatter if [:json] out.json(result) else out.success("Tagged prompt '#{experiment[:prompt_name]}' v#{experiment[:prompt_version]} as '#{[:tag]}'") end ensure Connection.shutdown end |