Module: Clacky::Utils::FileIgnoreHelper

Defined in:
lib/clacky/utils/file_ignore_helper.rb

Overview

Helper module for file ignoring functionality shared between tools

Defined Under Namespace

Classes: WalkBudgetExceeded

Constant Summary collapse

DEFAULT_IGNORED_PATTERNS =

Default patterns to ignore when .gitignore is not available

[
  'node_modules',
  'vendor/bundle',
  '.git',
  '.svn',
  'tmp',
  'log',
  'coverage',
  'dist',
  'build',
  '.bundle',
  '.sass-cache',
  '.DS_Store',
  '*.log'
].freeze
CONFIG_FILE_PATTERNS =

Config file patterns that should always be searchable/visible

[
  /\.env/,
  /\.ya?ml$/,
  /\.json$/,
  /\.toml$/,
  /\.ini$/,
  /\.conf$/,
  /\.config$/,
].freeze
ALWAYS_IGNORED_DIRS =

Directories that are always ignored regardless of .gitignore rules

['.git', '.svn', '.hg'].freeze
MAX_DIRS_VISITED =

Hard ceiling on directories visited in a single walk. Prevents indefinite traversal across huge trees (e.g. /root, $HOME, /mnt/c on WSL).

20_000
WALK_TIMEOUT_SECONDS =

Wall-clock budget for a single walk, in seconds.

15

Class Method Summary collapse

Class Method Details

.dangerous_root?(path) ⇒ Boolean

Paths considered too broad to recursively walk by default. Searching from these would commonly traverse millions of files (system roots, $HOME with many workspaces, WSL Windows mounts). Tools should refuse such requests and ask for a narrower base_path.

Returns:

  • (Boolean)


123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/clacky/utils/file_ignore_helper.rb', line 123

def self.dangerous_root?(path)
  return false if path.nil? || path.empty?

  expanded = File.expand_path(path)
  return true if expanded == "/"

  system_roots = ["/root", "/home", "/Users", "/mnt", "/media", "/var", "/etc", "/usr", "/opt"]
  return true if system_roots.include?(expanded)

  ["/Users/", "/home/"].each do |prefix|
    next unless expanded.start_with?(prefix)
    tail = expanded[prefix.length..]
    return true if tail && !tail.empty? && !tail.include?("/")
  end

  return true if expanded =~ %r{\A/mnt/[a-zA-Z]\z}

  home = ENV["HOME"]
  return true if home && !home.empty? && expanded == File.expand_path(home)

  false
end

.find_gitignore(path) ⇒ Object

Find .gitignore file in the search path or parent directories Only searches within the search path and up to the current working directory



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/clacky/utils/file_ignore_helper.rb', line 37

def self.find_gitignore(path)
  search_path = File.directory?(path) ? path : File.dirname(path)

  # Look for .gitignore in current and parent directories
  current = File.expand_path(search_path)
  cwd = File.expand_path(Dir.pwd) # intentional: gitignore boundary uses process cwd as fallback
  root = File.expand_path('/')

  # Limit search: only go up to current working directory
  # This prevents finding .gitignore files from unrelated parent directories
  # when searching in temporary directories (like /tmp in tests)
  search_limit = if current.start_with?(cwd)
                  cwd
                else
                  current
                end

  loop do
    gitignore = File.join(current, '.gitignore')
    return gitignore if File.exist?(gitignore)

    # Stop if we've reached the search limit or root
    break if current == search_limit || current == root
    current = File.dirname(current)
  end

  nil
end

.is_config_file?(file) ⇒ Boolean

Check if file is a config file (should not be ignored even if in .gitignore)

Returns:

  • (Boolean)


115
116
117
# File 'lib/clacky/utils/file_ignore_helper.rb', line 115

def self.is_config_file?(file)
  CONFIG_FILE_PATTERNS.any? { |pattern| file.match?(pattern) }
end

.should_ignore_file?(file, base_path, gitignore) ⇒ Boolean

Check if file should be ignored based on .gitignore or default patterns

Returns:

  • (Boolean)


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/clacky/utils/file_ignore_helper.rb', line 70

def self.should_ignore_file?(file, base_path, gitignore)
  # Always calculate path relative to base_path for consistency
  # Expand both paths to handle symlinks and relative paths correctly
  expanded_file = File.expand_path(file)
  expanded_base = File.expand_path(base_path)

  # For files, use the directory as base
  expanded_base = File.dirname(expanded_base) if File.file?(expanded_base)

  # Calculate relative path
  if expanded_file.start_with?(expanded_base)
    relative_path = expanded_file[(expanded_base.length + 1)..-1] || File.basename(expanded_file)
  else
    # File is outside base path - use just the filename
    relative_path = File.basename(expanded_file)
  end

  # Clean up relative path
  relative_path = relative_path.sub(/^\.\//, '') if relative_path

  # Always ignore version control directories regardless of .gitignore rules
  return true if ALWAYS_IGNORED_DIRS.any? do |dir|
    relative_path.start_with?("#{dir}/") || relative_path == dir
  end

  if gitignore
    # Use .gitignore rules
    gitignore.ignored?(relative_path)
  else
    # Use default ignore patterns - only match against relative path components
    DEFAULT_IGNORED_PATTERNS.any? do |pattern|
      if pattern.include?('*')
        File.fnmatch(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
      else
        # Match pattern as a path component (not substring of absolute path)
        relative_path.start_with?("#{pattern}/") ||
        relative_path.include?("/#{pattern}/") ||
        relative_path == pattern ||
        File.basename(relative_path) == pattern
      end
    end
  end
end

.walk_files(base_path, gitignore: nil, skipped: nil, status: nil, max_dirs_visited: MAX_DIRS_VISITED, timeout_seconds: WALK_TIMEOUT_SECONDS, &block) ⇒ Object

Walk a directory tree, pruning ignored directories early. Yields each non-ignored file path. Supports nested .gitignore files.

Parameters:

  • skipped (Hash, nil) (defaults to: nil)

    If provided, increments :ignored for each gitignore-skipped entry.

  • status (Hash, nil) (defaults to: nil)

    If provided, populated with :truncated and :truncation_reason when the walk is aborted due to dir-count or wall-clock budget.



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/clacky/utils/file_ignore_helper.rb', line 167

def self.walk_files(base_path, gitignore: nil, skipped: nil, status: nil,
                    max_dirs_visited: MAX_DIRS_VISITED,
                    timeout_seconds: WALK_TIMEOUT_SECONDS,
                    &block)
  unless block_given?
    return enum_for(:walk_files, base_path,
                    gitignore: gitignore, skipped: skipped, status: status,
                    max_dirs_visited: max_dirs_visited, timeout_seconds: timeout_seconds)
  end

  root_gitignore = gitignore || begin
    gi_path = find_gitignore(base_path)
    gi_path ? Clacky::GitignoreParser.new(gi_path) : nil
  end

  budget = {
    dirs_visited: 0,
    max_dirs: max_dirs_visited,
    deadline: timeout_seconds ? (Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout_seconds) : nil
  }

  begin
    _walk_recursive(base_path, base_path, root_gitignore, skipped, budget, &block)
  rescue WalkBudgetExceeded => e
    if status
      status[:truncated] = true
      status[:truncation_reason] = e.reason.to_s
    end
  end
end