Module: SkillBench
- Defined in:
- lib/skill_bench/interactive.rb,
lib/skill_bench/cli.rb,
lib/skill_bench/task.rb,
lib/skill_bench/agent.rb,
lib/skill_bench/judge.rb,
lib/skill_bench/tools.rb,
lib/skill_bench/client.rb,
lib/skill_bench/config.rb,
lib/skill_bench/runner.rb,
lib/skill_bench/version.rb,
lib/skill_bench/criteria.rb,
lib/skill_bench/dimension.rb,
lib/skill_bench/execution.rb,
lib/skill_bench/evaluation.rb,
lib/skill_bench/tools/base.rb,
lib/skill_bench/judge/judge.rb,
lib/skill_bench/models/eval.rb,
lib/skill_bench/agent/runner.rb,
lib/skill_bench/commands/run.rb,
lib/skill_bench/config/store.rb,
lib/skill_bench/delta_report.rb,
lib/skill_bench/error_logger.rb,
lib/skill_bench/judge/prompt.rb,
lib/skill_bench/models/skill.rb,
lib/skill_bench/agent/summary.rb,
lib/skill_bench/commands/init.rb,
lib/skill_bench/models/config.rb,
lib/skill_bench/trend_tracker.rb,
lib/skill_bench/config/applier.rb,
lib/skill_bench/judge/response.rb,
lib/skill_bench/task/evaluator.rb,
lib/skill_bench/tools/registry.rb,
lib/skill_bench/cli/run_command.rb,
lib/skill_bench/config/defaults.rb,
lib/skill_bench/models/provider.rb,
lib/skill_bench/tools/read_file.rb,
lib/skill_bench/cli/eval_command.rb,
lib/skill_bench/cli/help_printer.rb,
lib/skill_bench/cli/init_command.rb,
lib/skill_bench/evaluate_command.rb,
lib/skill_bench/history_recorder.rb,
lib/skill_bench/output_formatter.rb,
lib/skill_bench/package_verifier.rb,
lib/skill_bench/task/file_reader.rb,
lib/skill_bench/tools/dispatcher.rb,
lib/skill_bench/tools/write_file.rb,
lib/skill_bench/agent/react_agent.rb,
lib/skill_bench/cli/skill_command.rb,
lib/skill_bench/commands/eval_new.rb,
lib/skill_bench/evaluation/runner.rb,
lib/skill_bench/execution/sandbox.rb,
lib/skill_bench/tools/run_command.rb,
lib/skill_bench/cli/result_printer.rb,
lib/skill_bench/commands/skill_new.rb,
lib/skill_bench/config/json_loader.rb,
lib/skill_bench/clients/base_client.rb,
lib/skill_bench/config/env_overrides.rb,
lib/skill_bench/evaluation/generator.rb,
lib/skill_bench/cli/eval/eval_options.rb,
lib/skill_bench/clients/retry_handler.rb,
lib/skill_bench/config/facade_readers.rb,
lib/skill_bench/config/facade_writers.rb,
lib/skill_bench/rails/skill_templates.rb,
lib/skill_bench/tools/argument_parser.rb,
lib/skill_bench/agent/react_agent/step.rb,
lib/skill_bench/cli/eval/eval_commands.rb,
lib/skill_bench/clients/providers/groq.rb,
lib/skill_bench/clients/provider_config.rb,
lib/skill_bench/clients/request_builder.rb,
lib/skill_bench/clients/response_parser.rb,
lib/skill_bench/services/json_formatter.rb,
lib/skill_bench/services/runner_service.rb,
lib/skill_bench/services/skill_resolver.rb,
lib/skill_bench/clients/provider_schemas.rb,
lib/skill_bench/clients/providers/gemini.rb,
lib/skill_bench/clients/providers/ollama.rb,
lib/skill_bench/clients/providers/openai.rb,
lib/skill_bench/services/junit_formatter.rb,
lib/skill_bench/clients/provider_registry.rb,
lib/skill_bench/models/criteria_validator.rb,
lib/skill_bench/trend_tracker/persistence.rb,
lib/skill_bench/clients/providers/deepseek.rb,
lib/skill_bench/clients/providers/opencode.rb,
lib/skill_bench/execution/context_hydrator.rb,
lib/skill_bench/services/template_registry.rb,
lib/skill_bench/clients/providers/anthropic.rb,
lib/skill_bench/migration/provider_migrator.rb,
lib/skill_bench/services/feedback_generator.rb,
lib/skill_bench/services/formatting_helpers.rb,
lib/skill_bench/clients/providers/openrouter.rb,
lib/skill_bench/services/iteration_formatter.rb,
lib/skill_bench/agent/react_agent/loop_runner.rb,
lib/skill_bench/clients/providers/null_client.rb,
lib/skill_bench/history_recorder/history_file.rb,
lib/skill_bench/cli/eval/eval_command_registry.rb,
lib/skill_bench/clients/providers/azure_openai.rb,
lib/skill_bench/clients/response_error_handler.rb,
lib/skill_bench/execution/source_path_resolver.rb,
lib/skill_bench/services/delta_table_formatter.rb,
lib/skill_bench/services/option_parser_service.rb,
lib/skill_bench/trend_tracker/trend_calculator.rb,
lib/skill_bench/agent/react_agent/tool_executor.rb,
lib/skill_bench/services/result_printer_service.rb,
lib/skill_bench/history_recorder/summary_service.rb,
lib/skill_bench/services/judge_score_parser_service.rb,
lib/skill_bench/services/output_persistence_service.rb,
lib/skill_bench/history_recorder/persistence_service.rb,
lib/skill_bench/history_recorder/history_path_resolver.rb,
lib/skill_bench/services/template_registry/category_data.rb
Overview
Top-level namespace for the Rails Agent Evaluator.
Defined Under Namespace
Modules: Agent, Cli, Clients, Commands, ErrorLogger, Evaluation, Execution, Interactive, Judge, Migration, Models, Rails, Services, Task, Tools Classes: CLI, Client, Config, Criteria, DeltaReport, Dimension, EvaluateCommand, HelpRequested, HistoryRecorder, OutputFormatter, PackageVerifier, Runner, TrendTracker
Constant Summary collapse
- VERSION =
The current gem version.
'0.1.0'- DEFAULT_DIMENSIONS =
Canonical dimensions used when eval authors do not override descriptions. Weights (max_score) are nil here; the eval’s criteria.json provides them.
[ Dimension.new( name: 'correctness', description: 'Does the output fulfill the task requirements? Are all specified behaviors present and correct?', max_score: nil ), Dimension.new( name: 'skill_adherence', description: 'Did the agent follow the specific patterns, hard gates, and workflows defined in the skill?', max_score: nil ), Dimension.new( name: 'code_quality', description: 'Is the code clean, well-structured, free of smells, follows SRP, and avoids duplication?', max_score: nil ), Dimension.new( name: 'test_coverage', description: 'Are there meaningful tests? Do they test the right things? Are they following TDD/best practices from the skill?', max_score: nil ), Dimension.new( name: 'documentation', description: 'Is there adequate YARD documentation, clear intent, and helpful inline comments where needed?', max_score: nil ) ].freeze