Class: RockautoApi::Parsers::PartExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/rockauto_api/parsers/part_extractor.rb

Constant Summary collapse

KNOWN_BRANDS =
%w[
  HONDA TOYOTA FORD CHEVROLET DODGE NISSAN BMW MERCEDES VOLKSWAGEN
  SUBARU MAZDA HYUNDAI KIA AUDI LEXUS JEEP GMC RAM CHRYSLER
  BOSCH DENSO NGK ACDELCO MOTORCRAFT DELPHI WALKER MONROE
  GATES DAYCO CONTINENTAL TIMKEN SKF FEL-PRO MAHLE VICTOR
  REINZ MOOG DORMAN CARDONE STANDARD BECK/ARNLEY MEVOTECH
].freeze
PART_NUMBER_REGEX =
/[A-Z0-9]{6,}/
PRICE_REGEX =
/\$[\d,.]+/

Class Method Summary collapse

Class Method Details

.clean_name(text, price, part_number, brand) ⇒ Object



70
71
72
73
74
75
76
# File 'lib/rockauto_api/parsers/part_extractor.rb', line 70

def self.clean_name(text, price, part_number, brand)
  result = text.to_s.dup
  result = result.gsub(price, "") if price
  result = result.gsub(part_number, "") if part_number
  result = result.gsub(/#{Regexp.escape(brand)}/i, "") if brand
  result.gsub(/\s+/, " ").strip
end

.extract_from_row(row_node) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/rockauto_api/parsers/part_extractor.rb', line 17

def self.extract_from_row(row_node)
  cells = row_node.css("td")
  return nil if cells.empty?

  texts = cells.map { |c| c.text.strip }.reject(&:empty?)

  price = texts.find { |t| t.match?(PRICE_REGEX) }
  part_number = texts.find { |t| t.match?(PART_NUMBER_REGEX) && !t.match?(PRICE_REGEX) }

  brand = nil
  KNOWN_BRANDS.each do |b|
    if texts.any? { |t| t.upcase.include?(b) }
      brand = b
      break
    end
  end

  name_candidates = texts.reject { |t|
    t == price || t == part_number || (brand && t.upcase.include?(brand))
  }
  name = name_candidates.max_by(&:length) || texts.first || "Unknown"

  links = row_node.css("a")
  url = nil
  image_url = nil
  info_url = nil

  links.each do |link|
    href = link["href"]
    next unless href
    if href.include?("moreinfo")
      info_url = Parsers::HtmlHelpers.make_absolute_url(href)
    elsif href.include?("catalog") && !href.include?("moreinfo")
      url = Parsers::HtmlHelpers.make_absolute_url(href)
    end
  end

  img = row_node.at_css("img")
  image_url = Parsers::HtmlHelpers.make_absolute_url(img["src"]) if img && img["src"]

  Models::PartInfo.new(
    name: clean_name(name, price, part_number, brand),
    part_number: part_number || "Unknown",
    brand: brand,
    price: price,
    url: url,
    image_url: image_url,
    info_url: info_url
  )
rescue StandardError
  nil
end