Module: MarkdownServer::Helpers::SearchHelpers

Defined in:
lib/markdown_server/helpers/search_helpers.rb

Constant Summary collapse

BINARY_EXTENSIONS =
%w[
  .png .jpg .jpeg .gif .bmp .ico .svg .webp
  .pdf .epub .mobi
  .zip .gz .tar .bz2 .7z .rar
  .exe .dll .so .dylib .o
  .mp3 .mp4 .avi .mov .wav .flac .ogg
  .woff .woff2 .ttf .eot .otf
  .pyc .class .beam
  .sqlite .db
].freeze
MAX_SEARCH_FILES =
100
MAX_FILE_READ_BYTES =

500KB

512_000
CONTEXT_LINES =

lines before/after match to send

2
MAX_LINE_DISPLAY =

chars before truncating a line

200

Instance Method Summary collapse

Instance Method Details

#collect_matching_lines(lines, regexes) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/markdown_server/helpers/search_helpers.rb', line 78

def collect_matching_lines(lines, regexes)
  match_indices = Set.new
  lines.each_with_index do |line, i|
    if regexes.any? { |re| re.match?(line) }
      match_indices << i
    end
  end

  # Build context groups
  groups = []
  sorted = match_indices.sort

  sorted.each do |idx|
    range_start = [idx - CONTEXT_LINES, 0].max
    range_end = [idx + CONTEXT_LINES, lines.length - 1].min

    if groups.last && range_start <= groups.last[:end] + 1
      groups.last[:end] = range_end
    else
      groups << { start: range_start, end: range_end }
    end
  end

  groups.map do |g|
    context_lines = (g[:start]..g[:end]).map do |i|
      distance = match_indices.include?(i) ? 0 : match_indices.map { |m| (m - i).abs }.min
      { number: i + 1, text: lines[i].to_s.chomp, distance: distance }
    end
    { lines: context_lines }
  end
end

#compile_regexes(query) ⇒ Object



148
149
150
151
152
153
154
# File 'lib/markdown_server/helpers/search_helpers.rb', line 148

def compile_regexes(query)
  words = query.split(/\s+/).reject(&:empty?)
  return nil if words.empty?
  words.map { |w| Regexp.new(w, Regexp::IGNORECASE) }
rescue RegexpError => e
  raise RegexpError, e.message
end

#highlight_search_line(text, regexes, is_match) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/markdown_server/helpers/search_helpers.rb', line 110

def highlight_search_line(text, regexes, is_match)
  # Build a combined regex with non-greedy quantifiers for shorter highlights
  combined = Regexp.union(regexes.map { |r|
    Regexp.new(r.source.gsub(/(?<!\\)([*+}])(?!\?)/, '\1?'), r.options)
  })

  # Truncate long lines, centering around the first match
  prefix_trunc = false
  suffix_trunc = false
  if text.length > MAX_LINE_DISPLAY
    if is_match && (m = combined.match(text))
      center = m.begin(0) + m[0].length / 2
      half = MAX_LINE_DISPLAY / 2
      start = [[center - half, 0].max, [text.length - MAX_LINE_DISPLAY, 0].max].min
    else
      start = 0
    end
    prefix_trunc = start > 0
    suffix_trunc = (start + MAX_LINE_DISPLAY) < text.length
    text = text[start, MAX_LINE_DISPLAY]
  end

  html = ""
  html << '<span class="truncated">...</span>' if prefix_trunc
  if is_match
    pieces = text.split(combined)
    matches = text.scan(combined)
    pieces.each_with_index do |piece, i|
      html << h(piece)
      html << %(<span class="highlight-match">#{h(matches[i])}</span>) if matches[i]
    end
  else
    html << h(text)
  end
  html << '<span class="truncated">...</span>' if suffix_trunc
  html
end

#search_files(dir_path, regexes) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/markdown_server/helpers/search_helpers.rb', line 37

def search_files(dir_path, regexes)
  results = []
  base = File.realpath(root_dir)

  catch(:search_limit) do
    walk_directory(dir_path) do |file_path|
      throw :search_limit if results.length >= MAX_SEARCH_FILES

      content = File.binread(file_path, MAX_FILE_READ_BYTES) rescue next
      content.force_encoding("utf-8")
      next unless content.valid_encoding?

      # All regexes must match somewhere in the file
      next unless regexes.all? { |re| re.match?(content) }

      relative = file_path.sub("#{base}/", "")
      lines = content.lines
      matches = collect_matching_lines(lines, regexes)

      results << { path: relative, matches: matches }
    end
  end

  results
end

#search_single_file(file_path, regexes) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/markdown_server/helpers/search_helpers.rb', line 20

def search_single_file(file_path, regexes)
  base = File.realpath(root_dir)
  begin
    content = File.binread(file_path, MAX_FILE_READ_BYTES)
  rescue
    return []
  end
  content.force_encoding("utf-8")
  return [] unless content.valid_encoding?
  return [] unless regexes.all? { |re| re.match?(content) }

  relative = file_path.sub("#{base}/", "")
  lines = content.lines
  matches = collect_matching_lines(lines, regexes)
  [{ path: relative, matches: matches }]
end

#walk_directory(dir_path, &block) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/markdown_server/helpers/search_helpers.rb', line 63

def walk_directory(dir_path, &block)
  Dir.entries(dir_path).sort.each do |entry|
    next if entry.start_with?(".") || EXCLUDED.include?(entry)
    full = File.join(dir_path, entry)

    if File.directory?(full)
      walk_directory(full, &block)
    elsif File.file?(full)
      ext = File.extname(entry).downcase
      next if BINARY_EXTENSIONS.include?(ext)
      block.call(full)
    end
  end
end