Class: Clacky::Tools::FileReader

Inherits:

Base

Object
Base
Clacky::Tools::FileReader

show all

Defined in:: lib/clacky/tools/file_reader.rb

Constant Summary collapse

MAX_TEXT_FILE_SIZE = Maximum text file size (1MB)

1 * 1024 * 1024

MAX_CONTENT_CHARS = Maximum content size to return (~10,000 tokens = ~40,000 characters)

60_000

MAX_LINE_CHARS = Maximum characters per line (prevent single huge lines from bloating tokens)

Instance Method Summary collapse

#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ Object
#format_call(args) ⇒ Object
#format_result(result) ⇒ Object
#format_result_for_llm(result) ⇒ Object

Format result for LLM - handles both text and binary (image) content This method is called by the agent to format tool results before sending to LLM.

Methods inherited from Base

#category, #description, #name, #parameters, #to_function_definition

Instance Method Details

#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ `Object`

# File 'lib/clacky/tools/file_reader.rb', line 47

def execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil)
  # Expand path relative to working_dir when provided
  expanded_path = expand_path(path, working_dir: working_dir)

  unless File.exist?(expanded_path)
    return {
      path: expanded_path,
      content: nil,
      error: "File not found: #{expanded_path}"
    }
  end

  # If path is a directory, list its first-level contents (similar to filetree)
  if File.directory?(expanded_path)
    return list_directory_contents(expanded_path)
  end

  unless File.file?(expanded_path)
    return {
      path: expanded_path,
      content: nil,
      error: "Path is not a file: #{expanded_path}"
    }
  end

  begin
    # Delegate to FileProcessor for file type dispatch. FileProcessor is
    # the single source of truth for how a file becomes a readable form
    # (parser-extracted text, image base64, archive listing, plain text).
    # FileReader here only shapes the result for the LLM.
    ref = Utils::FileProcessor.process_path(expanded_path)

    case ref.type
    when :image
      # Images go to LLM as base64 via the image_inject sidecar channel.
      return handle_image_file(expanded_path)

    when :pdf, :document, :spreadsheet, :presentation
      # Parser-backed document formats. FileProcessor has already
      # produced a preview markdown file (or set parse_error on failure).
      if ref.preview_path && File.exist?(ref.preview_path)
        return read_text_file(
          expanded_path,
          max_lines: max_lines,
          start_line: start_line,
          end_line: end_line,
          source_path: ref.preview_path,
          parsed_from: ref.type
        )
      else
        return build_parser_failure_result(expanded_path, ref)
      end

    when :text, :csv, :zip
      # FileProcessor already produced a preview (raw text copy for
      # text/csv, archive listing for zip/tar). Read the preview with
      # normal line-range + truncation rules.
      source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : expanded_path
      return read_text_file(
        expanded_path,
        max_lines: max_lines,
        start_line: start_line,
        end_line: end_line,
        source_path: source
      )

    else
      # Unknown / :file — could be an unrecognised source file, a binary
      # blob, or anything else. Fall back to:
      #   1. If FileProcessor.binary_file_path? says it's binary → report unsupported.
      #   2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.).
      if Utils::FileProcessor.binary_file_path?(expanded_path)
        return handle_unsupported_binary(expanded_path, ref)
      end

      return read_text_file(
        expanded_path,
        max_lines: max_lines,
        start_line: start_line,
        end_line: end_line
      )
    end
  rescue StandardError => e
    {
      path: expanded_path,
      content: nil,
      error: "Error reading file: #{e.message}"
    }
  end
end

#format_call(args) ⇒ `Object`

# File 'lib/clacky/tools/file_reader.rb', line 247

def format_call(args)
  path = args[:path] || args['path']
  "Read(#{Utils::PathHelper.safe_basename(path)})"
end

#format_result(result) ⇒ `Object`

# File 'lib/clacky/tools/file_reader.rb', line 252

def format_result(result)
  return result[:error] if result[:error]

  # Handle directory listing
  if result[:is_directory] || result['is_directory']
    entries = result[:entries_count] || result['entries_count'] || 0
    dirs = result[:directories_count] || result['directories_count'] || 0
    files = result[:files_count] || result['files_count'] || 0
    return "Listed #{entries} entries (#{dirs} directories, #{files} files)"
  end

  # Handle binary file
  if result[:binary] || result['binary']
    format_type = result[:format] || result['format'] || 'unknown'
    size = result[:size_bytes] || result['size_bytes'] || 0

    # Check if it has base64 data (LLM-compatible format)
    if result[:base64_data] || result['base64_data']
      size_warning = size > Utils::FileProcessor::MAX_FILE_SIZE ? " (WARNING: large file)" : ""
      return "Binary file (#{format_type}, #{format_file_size(size)}) - sent to LLM#{size_warning}"
    else
      return "Binary file (#{format_type}, #{format_file_size(size)}) - cannot be read as text"
    end
  end

  # Handle text file reading (including parser-extracted documents)
  lines = result[:lines_read] || result['lines_read'] || 0
  truncated = result[:truncated] || result['truncated']
  parsed_from = result[:parsed_from] || result['parsed_from']
  suffix = parsed_from ? " (from #{parsed_from})" : ""
  "Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}"
end

#format_result_for_llm(result) ⇒ `Object`

Format result for LLM - handles both text and binary (image) content This method is called by the agent to format tool results before sending to LLM

# File 'lib/clacky/tools/file_reader.rb', line 287

def format_result_for_llm(result)
  # For LLM-compatible binary files with base64 data (images only — documents
  # are converted to text upstream via FileProcessor parsers).
  if result[:binary] && result[:base64_data]
    description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}"

    if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE
      description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens"
    end

    # For images: return a plain-text tool result + a sidecar `image_inject`
    # payload that the agent will append as a follow-up `role: "user"` message.
    #
    # WHY: OpenAI-compatible APIs (including OpenRouter/Gemini) only accept
    # image_url content blocks inside `role: "user"` messages, NOT inside
    # `role: "tool"` messages.  Putting base64 in a tool message causes it to
    # be JSON-encoded as a plain string, which the tokeniser treats as text —
    # blowing up token counts by 20-40x (observed: ~115k tokens for a 124 KB jpg).
    #
    # The agent detects `:image_inject` in the tool result after observe() and
    # appends a `role: "user"` system_injected message containing the image block.
    if result[:mime_type]&.start_with?("image/")
      return {
        type: "text",
        text: description,
        image_inject: {
          mime_type: result[:mime_type],
          base64_data: result[:base64_data],
          path: result[:path]
        }
      }
    end

    # No non-image binary type should reach here anymore — documents now
    # go through the parser + text path. Keep this as a defensive fallback.
    return {
      type: "document",
      path: result[:path],
      format: result[:format],
      size_bytes: result[:size_bytes],
      mime_type: result[:mime_type],
      description: description
    }
  end

  # For error cases, return hash as-is
  return result if result[:error] || result[:content].nil?

  # For directory listings, return as-is (no raw file content to preserve)
  return result if result[:is_directory]

  # For plain text files (and parser-extracted documents): return a plain
  # string so the agent sends it directly to the LLM without JSON-encoding
  # (avoids \" / \n escaping).
  header = "File: #{result[:path]}"
  if result[:parsed_from]
    header += " [extracted from #{result[:parsed_from]}]"
  end
  header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line]
  header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]"
  header += " [TRUNCATED]" if result[:truncated]
  "#{header}\n\n#{result[:content]}"
end

Class: Clacky::Tools::FileReader

Constant Summary collapse

Instance Method Summary collapse

Methods inherited from Base

Instance Method Details

#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ Object

#format_call(args) ⇒ Object

#format_result(result) ⇒ Object

#format_result_for_llm(result) ⇒ Object

#execute(path:, max_lines: 1000, start_line: nil, end_line: nil, working_dir: nil) ⇒ `Object`

#format_call(args) ⇒ `Object`

#format_result(result) ⇒ `Object`

#format_result_for_llm(result) ⇒ `Object`