Class: Identifiers::ISBN

Inherits:
Object
  • Object
show all
Defined in:
lib/identifiers/isbn.rb

Constant Summary collapse

ISBN_13_REGEXP =
/
  (?<!\p{Pd})         # Not part of a longer hyphen-separated run of digits
  \b
  (
    97[89]            # ISBN (GS1) Bookland prefix
    ([\p{Pd}\p{Zs}])? # Optional hyphenation
    (?:
      \d              # Digit
      \2?             # Optional hyphenation
    ){9}
    \d                # Check digit
  )
  \b
  (?!\p{Pd})          # Not part of a longer hyphen-separated run of digits
/x.freeze
ISBN_10_REGEXP =
/
  (?<!              # Don't match a hyphenated or spaced ISBN-13
    97[89]
    [\p{Pd}\p{Zs}]
  )
  (?<!\p{Pd})         # Not part of a longer hyphen-separated run of digits
  \b
  (
    \d{1,5}           # Registration group identifier
    ([\p{Pd}\p{Zs}])? # Optional hyphenation
    (?:
      \d              # Digit
      \2?             # Optional hyphenation
    ){4,8}
    [\dX]             # Check digit
  )
  \b
  (?!\p{Pd})          # Not part of a longer hyphen-separated run of digits
/x.freeze
ISBN_A_REGEXP =
%r{
  \b
  (?<=10\.) # Directory indicator (always 10)
  97[89]\.  # ISBN (GS1) Bookland prefix
  \d{2,8}   # ISBN registration group element and publisher prefix
  /         # Prefix/suffix divider
  \d{1,7}   # ISBN title enumerator and check digit
  \b
}x.freeze
NON_CANONICAL_DASHES_REGEXP =

Dashes other than the ASCII hyphen we normalise to, including U+2212 MINUS SIGN, which is not part of pPd

/[[\p{Pd}−]&&[^-]]/.freeze

Class Method Summary collapse

Class Method Details

.digits_of(isbn) ⇒ Object



125
126
127
# File 'lib/identifiers/isbn.rb', line 125

def self.digits_of(isbn)
  isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum
end

.extract(str, prefixes = []) ⇒ Object



52
53
54
55
56
57
58
59
# File 'lib/identifiers/isbn.rb', line 52

def self.extract(str, prefixes = [])
  str = str.to_s
  # Normalise dashes to a single ASCII hyphen so one ISBN can mix them
  str = str.gsub(NON_CANONICAL_DASHES_REGEXP, '-') if str.match?(NON_CANONICAL_DASHES_REGEXP)
  return extract_with_prefix(str, prefixes) if prefixes.any?

  extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str)
end

.extract_isbn_as(str) ⇒ Object



69
70
71
# File 'lib/identifiers/isbn.rb', line 69

def self.extract_isbn_as(str)
  extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', ''))
end

.extract_ten_digit_isbns(str) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/identifiers/isbn.rb', line 82

def self.extract_ten_digit_isbns(str)
  str
    .to_s
    .scan(ISBN_10_REGEXP)
    .select { |isbn, hyphen| !hyphen || isbn.count(hyphen) == 3 }
    .map { |isbn, hyphen| isbn.delete(hyphen.to_s) }
    .select { |isbn| valid_isbn_10?(isbn) }
    .map do |isbn|
      isbn.chop!
      isbn.prepend('978')
      isbn << isbn_13_check_digit(isbn).to_s

      isbn
    end
end

.extract_thirteen_digit_isbns(str) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/identifiers/isbn.rb', line 73

def self.extract_thirteen_digit_isbns(str)
  str
    .to_s
    .scan(ISBN_13_REGEXP)
    .select { |isbn, hyphen| !hyphen || isbn.count(hyphen) == 4 }
    .map { |isbn, hyphen| isbn.delete(hyphen.to_s) }
    .select { |isbn| valid_isbn_13?(isbn) }
end

.extract_with_prefix(str, prefixes) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/identifiers/isbn.rb', line 61

def self.extract_with_prefix(str, prefixes)
  prefix_regexp = generate_prefix_regexp(prefixes)

  [isbn_a_candidate_matcher, ISBN_13_REGEXP, ISBN_10_REGEXP].inject([]) do |matches, isbn_regexp|
    matches | isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp)
  end
end

.generate_prefix_regexp(prefixes) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/identifiers/isbn.rb', line 140

def self.generate_prefix_regexp(prefixes)
  joined_prefixes = Regexp.union(prefixes).source

  Regexp.new(
    "(?<=                 # Lookbehind for a prefix
       #{joined_prefixes} # ie:p1|p2|p3
     )
     :?                   # Optional colon. If you want to use a different separator, you can add it as a prefix
     \\s*                 # Optional whitespaces
     ", Regexp::IGNORECASE | Regexp::EXTENDED
  )
end

.isbn_13_check_digit(isbn) ⇒ Object



98
99
100
101
102
103
104
105
106
107
# File 'lib/identifiers/isbn.rb', line 98

def self.isbn_13_check_digit(isbn)
  sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)
  check_digit = 10 - (sum % 10)

  if check_digit == 10
    0
  else
    check_digit
  end
end

.isbn_a_candidate_matcherObject



153
154
155
156
# File 'lib/identifiers/isbn.rb', line 153

def self.isbn_a_candidate_matcher
  # We capture the ISBN-A prefix for the ISBN-A regexp to work correctly when extracting ISBN-As
  Regexp.new(ISBN_A_REGEXP.source.gsub('(?<=10\\.)', '10\.'), Regexp::IGNORECASE | Regexp::EXTENDED)
end

.isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp) ⇒ Object



129
130
131
132
133
134
135
136
137
138
# File 'lib/identifiers/isbn.rb', line 129

def self.isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp)
  regexp = Regexp.new("#{prefix_regexp}#{isbn_regexp}", Regexp::IGNORECASE | Regexp::EXTENDED)

  str
    .to_s
    .scan(regexp)
    .filter_map do |match|
      extract(Array(match).first)&.first
    end
end

.valid_isbn_10?(isbn) ⇒ Boolean

Returns:

  • (Boolean)


117
118
119
120
121
122
123
# File 'lib/identifiers/isbn.rb', line 117

def self.valid_isbn_10?(isbn)
  return false unless String(isbn).length == 10 && isbn =~ ISBN_10_REGEXP

  result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+)

  (result % 11).zero?
end

.valid_isbn_13?(isbn) ⇒ Boolean

Returns:

  • (Boolean)


109
110
111
112
113
114
115
# File 'lib/identifiers/isbn.rb', line 109

def self.valid_isbn_13?(isbn)
  return false unless String(isbn).length == 13 && isbn =~ ISBN_13_REGEXP

  result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+)

  (result % 10).zero?
end