4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/wayback_machine_downloader/subdom_processor.rb', line 4
def process_subdomains
return unless @recursive_subdomains
puts "Starting subdomain processing..."
base_domain = extract_base_domain(@base_url)
@processed_domains = Set.new([base_domain])
@subdomain_queue = Queue.new
initial_files = Dir.glob(File.join(backup_path, "**/*.{html,htm,css,js}"))
puts "Scanning #{initial_files.size} downloaded files for subdomain links..."
subdomains_found = scan_files_for_subdomains(initial_files, base_domain)
if subdomains_found.empty?
puts "No subdomains found in downloaded content."
return
end
puts "Found #{subdomains_found.size} subdomains to process: #{subdomains_found.join(', ')}"
subdomains_found.each do |subdomain|
full_domain = "#{subdomain}.#{base_domain}"
@subdomain_queue << "https://#{full_domain}/"
end
download_subdomains(base_domain)
rewrite_subdomain_links(base_domain) if @rewrite
end
|