Class: Identifiers::ISBN
- Inherits:
-
Object
- Object
- Identifiers::ISBN
- Defined in:
- lib/identifiers/isbn.rb
Constant Summary collapse
- ISBN_13_REGEXP =
/ (?<!\p{Pd}) # Not part of a longer hyphen-separated run of digits \b ( 97[89] # ISBN (GS1) Bookland prefix ([\p{Pd}\p{Zs}])? # Optional hyphenation (?: \d # Digit \2? # Optional hyphenation ){9} \d # Check digit ) \b (?!\p{Pd}) # Not part of a longer hyphen-separated run of digits /x.freeze
- ISBN_10_REGEXP =
/ (?<! # Don't match a hyphenated or spaced ISBN-13 97[89] [\p{Pd}\p{Zs}] ) (?<!\p{Pd}) # Not part of a longer hyphen-separated run of digits \b ( \d{1,5} # Registration group identifier ([\p{Pd}\p{Zs}])? # Optional hyphenation (?: \d # Digit \2? # Optional hyphenation ){4,8} [\dX] # Check digit ) \b (?!\p{Pd}) # Not part of a longer hyphen-separated run of digits /x.freeze
- ISBN_A_REGEXP =
%r{ \b (?<=10\.) # Directory indicator (always 10) 97[89]\. # ISBN (GS1) Bookland prefix \d{2,8} # ISBN registration group element and publisher prefix / # Prefix/suffix divider \d{1,7} # ISBN title enumerator and check digit \b }x.freeze
- NON_CANONICAL_DASHES_REGEXP =
Dashes other than the ASCII hyphen we normalise to, including U+2212 MINUS SIGN, which is not part of pPd
/[[\p{Pd}−]&&[^-]]/.freeze
Class Method Summary collapse
- .digits_of(isbn) ⇒ Object
- .extract(str, prefixes = []) ⇒ Object
- .extract_isbn_as(str) ⇒ Object
- .extract_ten_digit_isbns(str) ⇒ Object
- .extract_thirteen_digit_isbns(str) ⇒ Object
- .extract_with_prefix(str, prefixes) ⇒ Object
- .generate_prefix_regexp(prefixes) ⇒ Object
- .isbn_13_check_digit(isbn) ⇒ Object
- .isbn_a_candidate_matcher ⇒ Object
- .isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp) ⇒ Object
- .valid_isbn_10?(isbn) ⇒ Boolean
- .valid_isbn_13?(isbn) ⇒ Boolean
Class Method Details
.digits_of(isbn) ⇒ Object
125 126 127 |
# File 'lib/identifiers/isbn.rb', line 125 def self.digits_of(isbn) isbn.to_s.each_char.map { |char| char == 'X' ? 10 : Integer(char) }.to_enum end |
.extract(str, prefixes = []) ⇒ Object
52 53 54 55 56 57 58 59 |
# File 'lib/identifiers/isbn.rb', line 52 def self.extract(str, prefixes = []) str = str.to_s # Normalise dashes to a single ASCII hyphen so one ISBN can mix them str = str.gsub(NON_CANONICAL_DASHES_REGEXP, '-') if str.match?(NON_CANONICAL_DASHES_REGEXP) return extract_with_prefix(str, prefixes) if prefixes.any? extract_isbn_as(str) + extract_thirteen_digit_isbns(str) + extract_ten_digit_isbns(str) end |
.extract_isbn_as(str) ⇒ Object
69 70 71 |
# File 'lib/identifiers/isbn.rb', line 69 def self.extract_isbn_as(str) extract_thirteen_digit_isbns(str.to_s.scan(ISBN_A_REGEXP).join("\n").tr('/.', '')) end |
.extract_ten_digit_isbns(str) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/identifiers/isbn.rb', line 82 def self.extract_ten_digit_isbns(str) str .to_s .scan(ISBN_10_REGEXP) .select { |isbn, hyphen| !hyphen || isbn.count(hyphen) == 3 } .map { |isbn, hyphen| isbn.delete(hyphen.to_s) } .select { |isbn| valid_isbn_10?(isbn) } .map do |isbn| isbn.chop! isbn.prepend('978') isbn << isbn_13_check_digit(isbn).to_s isbn end end |
.extract_thirteen_digit_isbns(str) ⇒ Object
73 74 75 76 77 78 79 80 |
# File 'lib/identifiers/isbn.rb', line 73 def self.extract_thirteen_digit_isbns(str) str .to_s .scan(ISBN_13_REGEXP) .select { |isbn, hyphen| !hyphen || isbn.count(hyphen) == 4 } .map { |isbn, hyphen| isbn.delete(hyphen.to_s) } .select { |isbn| valid_isbn_13?(isbn) } end |
.extract_with_prefix(str, prefixes) ⇒ Object
61 62 63 64 65 66 67 |
# File 'lib/identifiers/isbn.rb', line 61 def self.extract_with_prefix(str, prefixes) prefix_regexp = generate_prefix_regexp(prefixes) [isbn_a_candidate_matcher, ISBN_13_REGEXP, ISBN_10_REGEXP].inject([]) do |matches, isbn_regexp| matches | isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp) end end |
.generate_prefix_regexp(prefixes) ⇒ Object
140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/identifiers/isbn.rb', line 140 def self.generate_prefix_regexp(prefixes) joined_prefixes = Regexp.union(prefixes).source Regexp.new( "(?<= # Lookbehind for a prefix #{joined_prefixes} # ie:p1|p2|p3 ) :? # Optional colon. If you want to use a different separator, you can add it as a prefix \\s* # Optional whitespaces ", Regexp::IGNORECASE | Regexp::EXTENDED ) end |
.isbn_13_check_digit(isbn) ⇒ Object
98 99 100 101 102 103 104 105 106 107 |
# File 'lib/identifiers/isbn.rb', line 98 def self.isbn_13_check_digit(isbn) sum = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+) check_digit = 10 - (sum % 10) if check_digit == 10 0 else check_digit end end |
.isbn_a_candidate_matcher ⇒ Object
153 154 155 156 |
# File 'lib/identifiers/isbn.rb', line 153 def self.isbn_a_candidate_matcher # We capture the ISBN-A prefix for the ISBN-A regexp to work correctly when extracting ISBN-As Regexp.new(ISBN_A_REGEXP.source.gsub('(?<=10\\.)', '10\.'), Regexp::IGNORECASE | Regexp::EXTENDED) end |
.isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp) ⇒ Object
129 130 131 132 133 134 135 136 137 138 |
# File 'lib/identifiers/isbn.rb', line 129 def self.isbn_with_prefix_candidates(str, prefix_regexp, isbn_regexp) regexp = Regexp.new("#{prefix_regexp}#{isbn_regexp}", Regexp::IGNORECASE | Regexp::EXTENDED) str .to_s .scan(regexp) .filter_map do |match| extract(Array(match).first)&.first end end |
.valid_isbn_10?(isbn) ⇒ Boolean
117 118 119 120 121 122 123 |
# File 'lib/identifiers/isbn.rb', line 117 def self.valid_isbn_10?(isbn) return false unless String(isbn).length == 10 && isbn =~ ISBN_10_REGEXP result = digits_of(isbn).with_index.map { |digit, weight| digit * weight.succ }.reduce(:+) (result % 11).zero? end |
.valid_isbn_13?(isbn) ⇒ Boolean
109 110 111 112 113 114 115 |
# File 'lib/identifiers/isbn.rb', line 109 def self.valid_isbn_13?(isbn) return false unless String(isbn).length == 13 && isbn =~ ISBN_13_REGEXP result = digits_of(isbn).zip([1, 3].cycle).map { |digit, weight| digit * weight }.reduce(:+) (result % 10).zero? end |