Class: CompletionKit::Run
- Inherits:
-
ApplicationRecord
- Object
- ActiveRecord::Base
- ApplicationRecord
- CompletionKit::Run
- Includes:
- Turbo::Broadcastable
- Defined in:
- app/models/completion_kit/run.rb
Constant Summary collapse
- STATUSES =
%w[pending running completed failed].freeze
Constants inherited from ApplicationRecord
ApplicationRecord::TenantScopedUniquenessValidator
Instance Method Summary collapse
- #as_json(options = {}) ⇒ Object
- #avg_score ⇒ Object
- #generate_responses! ⇒ Object
- #judge_configured? ⇒ Boolean
- #mark_completed! ⇒ Object
- #metric_averages ⇒ Object
- #outstanding_work_zero? ⇒ Boolean
- #progress_snapshot ⇒ Object
- #replace_metrics!(metric_ids) ⇒ Object
- #start! ⇒ Object
Instance Method Details
#as_json(options = {}) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'app/models/completion_kit/run.rb', line 139 def as_json( = {}) snap = progress_snapshot { id: id, name: name, status: status, prompt_id: prompt_id, dataset_id: dataset_id, judge_model: judge_model, temperature: temperature, created_at: created_at, updated_at: updated_at, responses_count: responses.count, avg_score: avg_score, progress_current: snap[:generated_done], progress_total: snap[:generated_total], progress: { generated: { done: snap[:generated_done], total: snap[:generated_total], failed: snap[:generated_failed] }, judged: { done: snap[:judged_done], total: snap[:judged_total], failed: snap[:judged_failed] } }, failed_response_ids: responses.where(status: "failed").pluck(:id), failure_summary: failure_summary, error_message: , metric_ids: metric_ids } end |
#avg_score ⇒ Object
56 57 58 59 60 61 62 |
# File 'app/models/completion_kit/run.rb', line 56 def avg_score all_reviews = responses.flat_map(&:reviews) scores = all_reviews.map(&:ai_score).compact.map(&:to_f) return nil if scores.empty? (scores.sum / scores.length).round(2) end |
#generate_responses! ⇒ Object
112 113 114 |
# File 'app/models/completion_kit/run.rb', line 112 def generate_responses! start! end |
#judge_configured? ⇒ Boolean
44 45 46 |
# File 'app/models/completion_kit/run.rb', line 44 def judge_configured? judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model) end |
#mark_completed! ⇒ Object
20 21 22 23 |
# File 'app/models/completion_kit/run.rb', line 20 def mark_completed! update!(status: "completed") broadcast_ui end |
#metric_averages ⇒ Object
64 65 66 67 68 69 70 |
# File 'app/models/completion_kit/run.rb', line 64 def metric_averages all_reviews = responses.flat_map(&:reviews).select { |r| r.ai_score.present? } all_reviews.group_by(&:metric_name).map do |name, reviews| scores = reviews.map { |r| r.ai_score.to_f } { name: name, avg: (scores.sum / scores.length).round(1) } end end |
#outstanding_work_zero? ⇒ Boolean
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'app/models/completion_kit/run.rb', line 25 def outstanding_work_zero? return false if responses.where.not(status: Response::TERMINAL_STATUSES).exists? metric_ids = metrics.pluck(:id) return true if metric_ids.empty? succeeded_response_ids = responses.where(status: "succeeded").pluck(:id) expected_reviews = succeeded_response_ids.size * metric_ids.size return true if expected_reviews.zero? terminal_review_count = Review.where( response_id: succeeded_response_ids, metric_id: metric_ids, status: Review::TERMINAL_STATUSES ).count terminal_review_count >= expected_reviews end |
#progress_snapshot ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'app/models/completion_kit/run.rb', line 116 def progress_snapshot generated_done = responses.where(status: "succeeded").count generated_failed = responses.where(status: "failed").count generated_total = progress_total metric_count = metrics.count succeeded_count = generated_done judged_total = succeeded_count * metric_count judged_done = Review.joins(:response) .where(completion_kit_responses: { run_id: id }, status: "succeeded").count judged_failed = Review.joins(:response) .where(completion_kit_responses: { run_id: id }, status: "failed").count { generated_done: generated_done, generated_total: generated_total, generated_failed: generated_failed, judged_done: judged_done, judged_total: judged_total, judged_failed: judged_failed } end |
#replace_metrics!(metric_ids) ⇒ Object
48 49 50 51 52 53 54 |
# File 'app/models/completion_kit/run.rb', line 48 def replace_metrics!(metric_ids) return unless metric_ids run_metrics.delete_all Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index| run_metrics.create!(metric_id: metric_id, position: index + 1) end end |
#start! ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'app/models/completion_kit/run.rb', line 72 def start! rows = if dataset CsvProcessor.process_self(self) else [{}] end return fail_with_summary!("Dataset has no rows") if rows.empty? client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model)) unless client.configured? return fail_with_summary!("LLM API not configured: #{client.configuration_errors.join(', ')}") end transaction do responses.destroy_all update!( status: "running", progress_current: 0, progress_total: rows.length, failure_summary: nil, error_message: nil ) rows.each_with_index do |row, index| input = row.empty? ? nil : row.to_json response = responses.create!( status: "pending", row_index: index, input_data: input, expected_output: row["expected_output"] ) GenerateRowJob.perform_later(id, response.id) end end broadcast_ui broadcast_clear_responses true end |