Module: MarkdownServer::Helpers::SearchHelpers

Defined in:
lib/markdown_server/helpers/search_helpers.rb

Constant Summary collapse

BINARY_EXTENSIONS =
%w[
  .png .jpg .jpeg .gif .bmp .ico .svg .webp
  .pdf .epub .mobi
  .zip .gz .tar .bz2 .7z .rar
  .exe .dll .so .dylib .o
  .mp3 .mp4 .avi .mov .wav .flac .ogg
  .woff .woff2 .ttf .eot .otf
  .pyc .class .beam
  .sqlite .db
].freeze
MAX_SEARCH_FILES =
100
MAX_FILE_READ_BYTES =

500KB

512_000
CONTEXT_LINES =

lines before/after match to send

2
MAX_LINE_DISPLAY =

chars before truncating a line

200

Instance Method Summary collapse

Instance Method Details

#collect_matching_lines(lines, regexes) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/markdown_server/helpers/search_helpers.rb', line 106

def collect_matching_lines(lines, regexes)
  match_indices = Set.new
  lines.each_with_index do |line, i|
    if regexes.any? { |re| re.match?(line) }
      match_indices << i
    end
  end

  # Build context groups
  groups = []
  sorted = match_indices.sort

  sorted.each do |idx|
    range_start = [idx - CONTEXT_LINES, 0].max
    range_end = [idx + CONTEXT_LINES, lines.length - 1].min

    if groups.last && range_start <= groups.last[:end] + 1
      groups.last[:end] = range_end
    else
      groups << { start: range_start, end: range_end }
    end
  end

  groups.map do |g|
    context_lines = (g[:start]..g[:end]).map do |i|
      distance = match_indices.include?(i) ? 0 : match_indices.map { |m| (m - i).abs }.min
      { number: i + 1, text: lines[i].to_s.chomp, distance: distance }
    end
    { lines: context_lines }
  end
end

#compile_regexes(query) ⇒ Object



176
177
178
179
180
181
182
# File 'lib/markdown_server/helpers/search_helpers.rb', line 176

def compile_regexes(query)
  words = query.split(/\s+/).reject(&:empty?)
  return nil if words.empty?
  words.map { |w| Regexp.new(w, Regexp::IGNORECASE) }
rescue RegexpError => e
  raise RegexpError, e.message
end

#highlight_search_line(text, regexes, is_match) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/markdown_server/helpers/search_helpers.rb', line 138

def highlight_search_line(text, regexes, is_match)
  # Build a combined regex with non-greedy quantifiers for shorter highlights
  combined = Regexp.union(regexes.map { |r|
    Regexp.new(r.source.gsub(/(?<!\\)([*+}])(?!\?)/, '\1?'), r.options)
  })

  # Truncate long lines, centering around the first match
  prefix_trunc = false
  suffix_trunc = false
  if text.length > MAX_LINE_DISPLAY
    if is_match && (m = combined.match(text))
      center = m.begin(0) + m[0].length / 2
      half = MAX_LINE_DISPLAY / 2
      start = [[center - half, 0].max, [text.length - MAX_LINE_DISPLAY, 0].max].min
    else
      start = 0
    end
    prefix_trunc = start > 0
    suffix_trunc = (start + MAX_LINE_DISPLAY) < text.length
    text = text[start, MAX_LINE_DISPLAY]
  end

  html = ""
  html << '<span class="truncated">...</span>' if prefix_trunc
  if is_match
    pieces = text.split(combined)
    matches = text.scan(combined)
    pieces.each_with_index do |piece, i|
      html << h(piece)
      html << %(<span class="highlight-match">#{h(matches[i])}</span>) if matches[i]
    end
  else
    html << h(text)
  end
  html << '<span class="truncated">...</span>' if suffix_trunc
  html
end

#search_files(dir_path, regexes) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/markdown_server/helpers/search_helpers.rb', line 37

def search_files(dir_path, regexes)
  results = []
  base = File.realpath(root_dir)

  catch(:search_limit) do
    walk_directory(dir_path) do |file_path|
      throw :search_limit if results.length >= MAX_SEARCH_FILES

      content = File.binread(file_path, MAX_FILE_READ_BYTES) rescue next
      content.force_encoding("utf-8")
      next unless content.valid_encoding?

      # All regexes must match somewhere in the file
      next unless regexes.all? { |re| re.match?(content) }

      relative = file_path.sub("#{base}/", "")
      lines = content.lines
      matches = collect_matching_lines(lines, regexes)

      results << { path: relative, matches: matches }
    end
  end

  results
end

#search_single_file(file_path, regexes) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/markdown_server/helpers/search_helpers.rb', line 20

def search_single_file(file_path, regexes)
  base = File.realpath(root_dir)
  begin
    content = File.binread(file_path, MAX_FILE_READ_BYTES)
  rescue
    return []
  end
  content.force_encoding("utf-8")
  return [] unless content.valid_encoding?
  return [] unless regexes.all? { |re| re.match?(content) }

  relative = file_path.sub("#{base}/", "")
  lines = content.lines
  matches = collect_matching_lines(lines, regexes)
  [{ path: relative, matches: matches }]
end

#walk_directory(dir_path, parent_segs = nil, parent_mode = nil, &block) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/markdown_server/helpers/search_helpers.rb', line 63

def walk_directory(dir_path, parent_segs = nil, parent_mode = nil, &block)
  rules = Array(settings.unhide_rules)
  bases = permitted_bases

  if parent_segs.nil?
    base = File.realpath(root_dir)
    real_dir = File.realpath(dir_path) rescue dir_path
    parent_segs = (real_dir == base || !real_dir.start_with?("#{base}/")) ? [] : real_dir.sub("#{base}/", "").split("/")
    parent_mode = :open
    parent_segs.each_with_index do |seg, i|
      ok, parent_mode = MarkdownServer::Unhide.step(parent_mode, parent_segs, i, seg, rules)
      return unless ok
    end
  end

  Dir.entries(dir_path).sort.each do |entry|
    next if entry == "." || entry == ".."

    visible, child_mode = MarkdownServer::Unhide.entry_step(parent_mode, parent_segs, entry, rules)
    next unless visible

    full = File.join(dir_path, entry)
    real = File.realpath(full) rescue next
    next unless MarkdownServer::PermittedBases.base_for(real, bases)

    # Restricted entries that are symlinks: only descend / index when
    # the symlink's realpath is explicitly in --follow-link. Mirrors
    # entry_admitted? in path_helpers; prevents unhide from
    # double-walking internal-aliased dotfile symlinks.
    if MarkdownServer::Unhide.restricted?(entry) && File.symlink?(full)
      next unless Array(settings.followed_links).include?(real)
    end

    if File.directory?(full)
      walk_directory(full, parent_segs + [entry], child_mode, &block)
    elsif File.file?(full)
      ext = File.extname(entry).downcase
      next if BINARY_EXTENSIONS.include?(ext)
      block.call(full)
    end
  end
end