Class: Clacky::Tools::FileReader

Inherits:
Base
  • Object
show all
Defined in:
lib/clacky/tools/file_reader.rb

Constant Summary collapse

MAX_TEXT_FILE_SIZE =

Maximum text file size (1MB)

1 * 1024 * 1024
MAX_CONTENT_CHARS =

Maximum content size to return (~10,000 tokens = ~40,000 characters)

60_000
MAX_LINE_CHARS =

Maximum characters per line (prevent single huge lines from bloating tokens)

1000

Instance Method Summary collapse

Methods inherited from Base

#category, #description, #name, #parameters, #to_function_definition

Instance Method Details

#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/clacky/tools/file_reader.rb', line 47

def execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil)
  # Expand path relative to working_dir when provided
  expanded_path = expand_path(path, working_dir: working_dir)

  unless File.exist?(expanded_path)
    return {
      path: expanded_path,
      content: nil,
      error: "File not found: #{expanded_path}"
    }
  end

  # If path is a directory, list its first-level contents (similar to filetree)
  if File.directory?(expanded_path)
    return list_directory_contents(expanded_path)
  end

  unless File.file?(expanded_path)
    return {
      path: expanded_path,
      content: nil,
      error: "Path is not a file: #{expanded_path}"
    }
  end

  begin
    # Delegate to FileProcessor for file type dispatch. FileProcessor is
    # the single source of truth for how a file becomes a readable form
    # (parser-extracted text, image base64, archive listing, plain text).
    # FileReader here only shapes the result for the LLM.
    ref = Utils::FileProcessor.process_path(expanded_path)

    case ref.type
    when :image
      # Images go to LLM as base64 via the image_inject sidecar channel.
      return handle_image_file(expanded_path)

    when :pdf, :document, :spreadsheet, :presentation
      # Parser-backed document formats. FileProcessor has already
      # produced a preview markdown file (or set parse_error on failure).
      if ref.preview_path && File.exist?(ref.preview_path)
        return read_text_file(
          expanded_path,
          max_lines: max_lines,
          start_line: start_line,
          end_line: end_line,
          source_path: ref.preview_path,
          parsed_from: ref.type
        )
      else
        return build_parser_failure_result(expanded_path, ref)
      end

    when :text, :csv, :zip
      # FileProcessor already produced a preview (raw text copy for
      # text/csv, archive listing for zip/tar). Read the preview with
      # normal line-range + truncation rules.
      source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : expanded_path
      return read_text_file(
        expanded_path,
        max_lines: max_lines,
        start_line: start_line,
        end_line: end_line,
        source_path: source
      )

    else
      # Unknown / :file — could be an unrecognised source file, a binary
      # blob, or anything else. Fall back to:
      #   1. If FileProcessor.binary_file_path? says it's binary → report unsupported.
      #   2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.).
      if Utils::FileProcessor.binary_file_path?(expanded_path)
        return handle_unsupported_binary(expanded_path, ref)
      end

      return read_text_file(
        expanded_path,
        max_lines: max_lines,
        start_line: start_line,
        end_line: end_line
      )
    end
  rescue StandardError => e
    {
      path: expanded_path,
      content: nil,
      error: "Error reading file: #{e.message}"
    }
  end
end

#format_call(args) ⇒ Object



247
248
249
250
# File 'lib/clacky/tools/file_reader.rb', line 247

def format_call(args)
  path = args[:path] || args['path']
  "Read(#{Utils::PathHelper.safe_basename(path)})"
end

#format_result(result) ⇒ Object



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/clacky/tools/file_reader.rb', line 252

def format_result(result)
  return result[:error] if result[:error]

  # Handle directory listing
  if result[:is_directory] || result['is_directory']
    entries = result[:entries_count] || result['entries_count'] || 0
    dirs = result[:directories_count] || result['directories_count'] || 0
    files = result[:files_count] || result['files_count'] || 0
    return "Listed #{entries} entries (#{dirs} directories, #{files} files)"
  end

  # Handle binary file
  if result[:binary] || result['binary']
    format_type = result[:format] || result['format'] || 'unknown'
    size = result[:size_bytes] || result['size_bytes'] || 0

    # Check if it has base64 data (LLM-compatible format)
    if result[:base64_data] || result['base64_data']
      size_warning = size > Utils::FileProcessor::MAX_FILE_SIZE ? " (WARNING: large file)" : ""
      return "Binary file (#{format_type}, #{format_file_size(size)}) - sent to LLM#{size_warning}"
    else
      return "Binary file (#{format_type}, #{format_file_size(size)}) - cannot be read as text"
    end
  end

  # Handle text file reading (including parser-extracted documents)
  lines = result[:lines_read] || result['lines_read'] || 0
  truncated = result[:truncated] || result['truncated']
  parsed_from = result[:parsed_from] || result['parsed_from']
  suffix = parsed_from ? " (from #{parsed_from})" : ""
  "Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}"
end

#format_result_for_llm(result) ⇒ Object

Format result for LLM - handles both text and binary (image) content This method is called by the agent to format tool results before sending to LLM



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/clacky/tools/file_reader.rb', line 287

def format_result_for_llm(result)
  # For LLM-compatible binary files with base64 data (images only — documents
  # are converted to text upstream via FileProcessor parsers).
  if result[:binary] && result[:base64_data]
    description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}"

    if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE
      description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens"
    end

    # For images: return a plain-text tool result + a sidecar `image_inject`
    # payload that the agent will append as a follow-up `role: "user"` message.
    #
    # WHY: OpenAI-compatible APIs (including OpenRouter/Gemini) only accept
    # image_url content blocks inside `role: "user"` messages, NOT inside
    # `role: "tool"` messages.  Putting base64 in a tool message causes it to
    # be JSON-encoded as a plain string, which the tokeniser treats as text —
    # blowing up token counts by 20-40x (observed: ~115k tokens for a 124 KB jpg).
    #
    # The agent detects `:image_inject` in the tool result after observe() and
    # appends a `role: "user"` system_injected message containing the image block.
    if result[:mime_type]&.start_with?("image/")
      return {
        type: "text",
        text: description,
        image_inject: {
          mime_type: result[:mime_type],
          base64_data: result[:base64_data],
          path: result[:path]
        }
      }
    end

    # No non-image binary type should reach here anymore — documents now
    # go through the parser + text path. Keep this as a defensive fallback.
    return {
      type: "document",
      path: result[:path],
      format: result[:format],
      size_bytes: result[:size_bytes],
      mime_type: result[:mime_type],
      description: description
    }
  end

  # For error cases, return hash as-is
  return result if result[:error] || result[:content].nil?

  # For directory listings, return as-is (no raw file content to preserve)
  return result if result[:is_directory]

  # For plain text files (and parser-extracted documents): return a plain
  # string so the agent sends it directly to the LLM without JSON-encoding
  # (avoids \" / \n escaping).
  header = "File: #{result[:path]}"
  if result[:parsed_from]
    header += " [extracted from #{result[:parsed_from]}]"
  end
  header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line]
  header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]"
  header += " [TRUNCATED]" if result[:truncated]
  "#{header}\n\n#{result[:content]}"
end