Module: Phronomy::Eval

Defined in:
lib/phronomy/eval.rb,
lib/phronomy/eval/runner.rb,
lib/phronomy/eval/scorer.rb,
lib/phronomy/eval/dataset.rb,
lib/phronomy/eval/metrics.rb,
lib/phronomy/eval/eval_case.rb,
lib/phronomy/eval/comparison.rb,
lib/phronomy/eval/eval_result.rb,
lib/phronomy/eval/scorer/base.rb,
lib/phronomy/eval/scorer/llm_judge.rb,
lib/phronomy/eval/scorer/exact_match.rb,
lib/phronomy/eval/scorer/includes_scorer.rb

Overview

Namespace module for the evaluation framework.

Defined Under Namespace

Modules: Scorer Classes: Comparison, Dataset, EvalCase, EvalResult, Metrics, Runner

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#actualObject (readonly)

the callable's output



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end

#latency_msObject (readonly)

wall-clock time of the callable in ms



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end

#usageObject (readonly)

An immutable record holding the outcome of evaluating one EvalCase.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end

Instance Method Details

#error=(value) ⇒ Object

set when the scorer raised an exception



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end

#eval_case=(value) ⇒ Object

the original sample



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end

#score=(value) ⇒ Object

scorer-assigned value in [0.0, 1.0]



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/phronomy/eval/eval_result.rb', line 13

EvalResult = Data.define(:eval_case, :actual, :score, :usage, :latency_ms, :error) do
  def initialize(eval_case:, actual:, score:, usage:, latency_ms:, error: nil)
    super
  end

  # Returns true when the scorer assigned a perfect score of 1.0.
  def pass?
    score >= 1.0
  end

  # Returns true when the scorer raised an exception.
  def scorer_error?
    !error.nil?
  end
end