Class: Clacky::Tools::FileReader
- Defined in:
- lib/clacky/tools/file_reader.rb
Constant Summary collapse
- MAX_TEXT_FILE_SIZE =
Maximum text file size (1MB)
1 * 1024 * 1024
- MAX_CONTENT_CHARS =
Maximum content size to return (~10,000 tokens = ~40,000 characters)
60_000- MAX_LINE_CHARS =
Maximum characters per line (prevent single huge lines from bloating tokens)
1000
Instance Method Summary collapse
- #execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ Object
- #format_call(args) ⇒ Object
- #format_result(result) ⇒ Object
-
#format_result_for_llm(result) ⇒ Object
Format result for LLM - handles both text and binary (image) content This method is called by the agent to format tool results before sending to LLM.
Methods inherited from Base
#category, #description, #name, #parameters, #to_function_definition
Instance Method Details
#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/clacky/tools/file_reader.rb', line 47 def execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) # Expand path relative to working_dir when provided = (path, working_dir: working_dir) unless File.exist?() return { path: , content: nil, error: "File not found: #{}" } end # If path is a directory, list its first-level contents (similar to filetree) if File.directory?() return list_directory_contents() end unless File.file?() return { path: , content: nil, error: "Path is not a file: #{}" } end begin # Delegate to FileProcessor for file type dispatch. FileProcessor is # the single source of truth for how a file becomes a readable form # (parser-extracted text, image base64, archive listing, plain text). # FileReader here only shapes the result for the LLM. ref = Utils::FileProcessor.process_path() case ref.type when :image # Images go to LLM as base64 via the image_inject sidecar channel. return handle_image_file() when :pdf, :document, :spreadsheet, :presentation # Parser-backed document formats. FileProcessor has already # produced a preview markdown file (or set parse_error on failure). if ref.preview_path && File.exist?(ref.preview_path) return read_text_file( , max_lines: max_lines, start_line: start_line, end_line: end_line, source_path: ref.preview_path, parsed_from: ref.type ) else return build_parser_failure_result(, ref) end when :text, :csv, :zip # FileProcessor already produced a preview (raw text copy for # text/csv, archive listing for zip/tar). Read the preview with # normal line-range + truncation rules. source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : return read_text_file( , max_lines: max_lines, start_line: start_line, end_line: end_line, source_path: source ) else # Unknown / :file — could be an unrecognised source file, a binary # blob, or anything else. Fall back to: # 1. If FileProcessor.binary_file_path? says it's binary → report unsupported. # 2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.). if Utils::FileProcessor.binary_file_path?() return handle_unsupported_binary(, ref) end return read_text_file( , max_lines: max_lines, start_line: start_line, end_line: end_line ) end rescue StandardError => e { path: , content: nil, error: "Error reading file: #{e.}" } end end |
#format_call(args) ⇒ Object
247 248 249 250 |
# File 'lib/clacky/tools/file_reader.rb', line 247 def format_call(args) path = args[:path] || args['path'] "Read(#{Utils::PathHelper.safe_basename(path)})" end |
#format_result(result) ⇒ Object
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
# File 'lib/clacky/tools/file_reader.rb', line 252 def format_result(result) return result[:error] if result[:error] # Handle directory listing if result[:is_directory] || result['is_directory'] entries = result[:entries_count] || result['entries_count'] || 0 dirs = result[:directories_count] || result['directories_count'] || 0 files = result[:files_count] || result['files_count'] || 0 return "Listed #{entries} entries (#{dirs} directories, #{files} files)" end # Handle binary file if result[:binary] || result['binary'] format_type = result[:format] || result['format'] || 'unknown' size = result[:size_bytes] || result['size_bytes'] || 0 # Check if it has base64 data (LLM-compatible format) if result[:base64_data] || result['base64_data'] size_warning = size > Utils::FileProcessor::MAX_FILE_SIZE ? " (WARNING: large file)" : "" return "Binary file (#{format_type}, #{format_file_size(size)}) - sent to LLM#{size_warning}" else return "Binary file (#{format_type}, #{format_file_size(size)}) - cannot be read as text" end end # Handle text file reading (including parser-extracted documents) lines = result[:lines_read] || result['lines_read'] || 0 truncated = result[:truncated] || result['truncated'] parsed_from = result[:parsed_from] || result['parsed_from'] suffix = parsed_from ? " (from #{parsed_from})" : "" "Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}" end |
#format_result_for_llm(result) ⇒ Object
Format result for LLM - handles both text and binary (image) content This method is called by the agent to format tool results before sending to LLM
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
# File 'lib/clacky/tools/file_reader.rb', line 287 def format_result_for_llm(result) # For LLM-compatible binary files with base64 data (images only — documents # are converted to text upstream via FileProcessor parsers). if result[:binary] && result[:base64_data] description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}" if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens" end # For images: return a plain-text tool result + a sidecar `image_inject` # payload that the agent will append as a follow-up `role: "user"` message. # # WHY: OpenAI-compatible APIs (including OpenRouter/Gemini) only accept # image_url content blocks inside `role: "user"` messages, NOT inside # `role: "tool"` messages. Putting base64 in a tool message causes it to # be JSON-encoded as a plain string, which the tokeniser treats as text — # blowing up token counts by 20-40x (observed: ~115k tokens for a 124 KB jpg). # # The agent detects `:image_inject` in the tool result after observe() and # appends a `role: "user"` system_injected message containing the image block. if result[:mime_type]&.start_with?("image/") return { type: "text", text: description, image_inject: { mime_type: result[:mime_type], base64_data: result[:base64_data], path: result[:path] } } end # No non-image binary type should reach here anymore — documents now # go through the parser + text path. Keep this as a defensive fallback. return { type: "document", path: result[:path], format: result[:format], size_bytes: result[:size_bytes], mime_type: result[:mime_type], description: description } end # For error cases, return hash as-is return result if result[:error] || result[:content].nil? # For directory listings, return as-is (no raw file content to preserve) return result if result[:is_directory] # For plain text files (and parser-extracted documents): return a plain # string so the agent sends it directly to the LLM without JSON-encoding # (avoids \" / \n escaping). header = "File: #{result[:path]}" if result[:parsed_from] header += " [extracted from #{result[:parsed_from]}]" end header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line] header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]" header += " [TRUNCATED]" if result[:truncated] "#{header}\n\n#{result[:content]}" end |