Module: MarkdownServer::Helpers::SearchHelpers
- Defined in:
- lib/markdown_server/helpers/search_helpers.rb
Constant Summary collapse
- BINARY_EXTENSIONS =
%w[ .png .jpg .jpeg .gif .bmp .ico .svg .webp .pdf .epub .mobi .zip .gz .tar .bz2 .7z .rar .exe .dll .so .dylib .o .mp3 .mp4 .avi .mov .wav .flac .ogg .woff .woff2 .ttf .eot .otf .pyc .class .beam .sqlite .db ].freeze
- MAX_SEARCH_FILES =
100- MAX_FILE_READ_BYTES =
500KB
512_000- CONTEXT_LINES =
lines before/after match to send
2- MAX_LINE_DISPLAY =
chars before truncating a line
200
Instance Method Summary collapse
- #collect_matching_lines(lines, regexes) ⇒ Object
- #compile_regexes(query) ⇒ Object
- #highlight_search_line(text, regexes, is_match) ⇒ Object
- #search_files(dir_path, regexes) ⇒ Object
- #search_single_file(file_path, regexes) ⇒ Object
- #walk_directory(dir_path, &block) ⇒ Object
Instance Method Details
#collect_matching_lines(lines, regexes) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 78 def collect_matching_lines(lines, regexes) match_indices = Set.new lines.each_with_index do |line, i| if regexes.any? { |re| re.match?(line) } match_indices << i end end # Build context groups groups = [] sorted = match_indices.sort sorted.each do |idx| range_start = [idx - CONTEXT_LINES, 0].max range_end = [idx + CONTEXT_LINES, lines.length - 1].min if groups.last && range_start <= groups.last[:end] + 1 groups.last[:end] = range_end else groups << { start: range_start, end: range_end } end end groups.map do |g| context_lines = (g[:start]..g[:end]).map do |i| distance = match_indices.include?(i) ? 0 : match_indices.map { |m| (m - i).abs }.min { number: i + 1, text: lines[i].to_s.chomp, distance: distance } end { lines: context_lines } end end |
#compile_regexes(query) ⇒ Object
148 149 150 151 152 153 154 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 148 def compile_regexes(query) words = query.split(/\s+/).reject(&:empty?) return nil if words.empty? words.map { |w| Regexp.new(w, Regexp::IGNORECASE) } rescue RegexpError => e raise RegexpError, e. end |
#highlight_search_line(text, regexes, is_match) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 110 def highlight_search_line(text, regexes, is_match) # Build a combined regex with non-greedy quantifiers for shorter highlights combined = Regexp.union(regexes.map { |r| Regexp.new(r.source.gsub(/(?<!\\)([*+}])(?!\?)/, '\1?'), r.) }) # Truncate long lines, centering around the first match prefix_trunc = false suffix_trunc = false if text.length > MAX_LINE_DISPLAY if is_match && (m = combined.match(text)) center = m.begin(0) + m[0].length / 2 half = MAX_LINE_DISPLAY / 2 start = [[center - half, 0].max, [text.length - MAX_LINE_DISPLAY, 0].max].min else start = 0 end prefix_trunc = start > 0 suffix_trunc = (start + MAX_LINE_DISPLAY) < text.length text = text[start, MAX_LINE_DISPLAY] end html = "" html << '<span class="truncated">...</span>' if prefix_trunc if is_match pieces = text.split(combined) matches = text.scan(combined) pieces.each_with_index do |piece, i| html << h(piece) html << %(<span class="highlight-match">#{h(matches[i])}</span>) if matches[i] end else html << h(text) end html << '<span class="truncated">...</span>' if suffix_trunc html end |
#search_files(dir_path, regexes) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 37 def search_files(dir_path, regexes) results = [] base = File.realpath(root_dir) catch(:search_limit) do walk_directory(dir_path) do |file_path| throw :search_limit if results.length >= MAX_SEARCH_FILES content = File.binread(file_path, MAX_FILE_READ_BYTES) rescue next content.force_encoding("utf-8") next unless content.valid_encoding? # All regexes must match somewhere in the file next unless regexes.all? { |re| re.match?(content) } relative = file_path.sub("#{base}/", "") lines = content.lines matches = collect_matching_lines(lines, regexes) results << { path: relative, matches: matches } end end results end |
#search_single_file(file_path, regexes) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 20 def search_single_file(file_path, regexes) base = File.realpath(root_dir) begin content = File.binread(file_path, MAX_FILE_READ_BYTES) rescue return [] end content.force_encoding("utf-8") return [] unless content.valid_encoding? return [] unless regexes.all? { |re| re.match?(content) } relative = file_path.sub("#{base}/", "") lines = content.lines matches = collect_matching_lines(lines, regexes) [{ path: relative, matches: matches }] end |
#walk_directory(dir_path, &block) ⇒ Object
63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/markdown_server/helpers/search_helpers.rb', line 63 def walk_directory(dir_path, &block) Dir.entries(dir_path).sort.each do |entry| next if entry.start_with?(".") || EXCLUDED.include?(entry) full = File.join(dir_path, entry) if File.directory?(full) walk_directory(full, &block) elsif File.file?(full) ext = File.extname(entry).downcase next if BINARY_EXTENSIONS.include?(ext) block.call(full) end end end |