Module: Ace::Core::Atoms::FileReader

Defined in:
lib/ace/core/atoms/file_reader.rb

Overview

Pure file reading functions with safety checks

Constant Summary collapse

MAX_FILE_SIZE =

Default maximum file size (1MB)

1_048_576
BINARY_EXTENSIONS =

Binary file detection patterns

%w[
  .jpg .jpeg .png .gif .bmp .ico .webp .svg
  .pdf .doc .docx .xls .xlsx .ppt .pptx
  .zip .tar .gz .bz2 .7z .rar
  .exe .dll .so .dylib .app
  .mp3 .mp4 .avi .mov .wmv .flv
  .ttf .otf .woff .woff2 .eot
  .class .jar .war .ear
  .pyc .pyo .o .a
].freeze

Class Method Summary collapse

Class Method Details

.binary?(path) ⇒ Boolean

Check if file appears to be binary

Parameters:

  • path (String)

    Path to file

Returns:

  • (Boolean)

    true if file appears to be binary



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/ace/core/atoms/file_reader.rb', line 80

def binary?(path)
  return false if path.nil?

  # Check extension first
  ext = File.extname(path).downcase
  return true if BINARY_EXTENSIONS.include?(ext)

  # Sample first 8KB of file for null bytes
  expanded_path = File.expand_path(path)
  return false unless File.exist?(expanded_path)

  sample_size = [File.size(expanded_path), 8192].min
  sample = File.read(expanded_path, sample_size, mode: "rb")

  # Check for null bytes (common in binary files)
  # Also check for common binary file markers
  return true if sample.include?("\x00")

  # Check if it's mostly non-printable characters
  # Count non-ASCII printable characters
  non_printable = sample.bytes.count { |b| b < 32 || b > 126 }
  printable = sample.bytes.count { |b| b >= 32 && b <= 126 }

  # If more than 30% non-printable, consider it binary
  non_printable.to_f / (non_printable + printable) > 0.3
rescue
  # If we can't read it, assume it might be binary
  true
end

.metadata(path) ⇒ Hash

Get file metadata

Parameters:

  • path (String)

    Path to file

Returns:

  • (Hash)

    File metadata



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/ace/core/atoms/file_reader.rb', line 113

def (path)
  return {exists: false} if path.nil?

  expanded_path = File.expand_path(path)

  unless File.exist?(expanded_path)
    return {exists: false, path: path}
  end

  stat = File.stat(expanded_path)

  {
    exists: true,
    path: path,
    absolute_path: expanded_path,
    size: stat.size,
    modified: stat.mtime,
    created: stat.ctime,
    readable: File.readable?(expanded_path),
    writable: File.writable?(expanded_path),
    directory: stat.directory?,
    file: stat.file?,
    binary: binary?(expanded_path)
  }
rescue => e
  {exists: false, path: path, error: e.message}
end

.read(path, max_size: MAX_FILE_SIZE) ⇒ Hash

Read file with size limit

Parameters:

  • path (String)

    Path to file

  • max_size (Integer) (defaults to: MAX_FILE_SIZE)

    Maximum file size in bytes

Returns:

  • (Hash)

    Boolean, content: String, error: String



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/ace/core/atoms/file_reader.rb', line 31

def read(path, max_size: MAX_FILE_SIZE)
  return {success: false, error: "Path cannot be nil"} if path.nil?

  expanded_path = File.expand_path(path)

  unless File.exist?(expanded_path)
    return {success: false, error: "File not found: #{path}"}
  end

  unless File.file?(expanded_path)
    return {success: false, error: "Not a file: #{path}"}
  end

  file_size = File.size(expanded_path)
  if file_size > max_size
    return {
      success: false,
      error: "File too large: #{file_size} bytes (max: #{max_size})"
    }
  end

  if binary?(expanded_path)
    return {
      success: false,
      error: "Binary file detected: #{path}"
    }
  end

  content = File.read(expanded_path, encoding: "UTF-8")
  {success: true, content: content, size: file_size}
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
  {success: false, error: "File contains invalid UTF-8: #{path}"}
rescue => e
  {success: false, error: "Failed to read file: #{e.message}"}
end

.read_lines(path, limit: 100, offset: 0) ⇒ Hash

Read lines from file with line limit

Parameters:

  • path (String)

    Path to file

  • limit (Integer) (defaults to: 100)

    Maximum number of lines

  • offset (Integer) (defaults to: 0)

    Starting line number (0-based)

Returns:

  • (Hash)

    Boolean, lines: Array, total_lines: Integer, error: String



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/ace/core/atoms/file_reader.rb', line 146

def read_lines(path, limit: 100, offset: 0)
  return {success: false, error: "Path cannot be nil"} if path.nil?

  expanded_path = File.expand_path(path)

  unless File.exist?(expanded_path)
    return {success: false, error: "File not found: #{path}"}
  end

  if binary?(expanded_path)
    return {success: false, error: "Binary file detected: #{path}"}
  end

  lines = []
  total_lines = 0

  File.foreach(expanded_path, encoding: "UTF-8").with_index do |line, index|
    total_lines = index + 1
    if index >= offset && lines.size < limit
      lines << line.chomp
    end
  end

  {
    success: true,
    lines: lines,
    total_lines: total_lines,
    offset: offset,
    limit: limit
  }
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
  {success: false, error: "File contains invalid UTF-8: #{path}"}
rescue => e
  {success: false, error: "Failed to read lines: #{e.message}"}
end

.readable?(path) ⇒ Boolean

Check if file exists and is readable

Parameters:

  • path (String)

    Path to file

Returns:

  • (Boolean)

    true if file exists and is readable



70
71
72
73
74
75
# File 'lib/ace/core/atoms/file_reader.rb', line 70

def readable?(path)
  return false if path.nil?

  expanded_path = File.expand_path(path)
  File.exist?(expanded_path) && File.file?(expanded_path) && File.readable?(expanded_path)
end