Module: Pubid::Iso::Utilities
- Defined in:
- lib/pubid/iso/utilities.rb
Overview
Utility methods for ISO identifiers
Class Method Summary collapse
-
.parse_from_title(title) ⇒ Identifier?
Parse identifier from document title.
Class Method Details
.parse_from_title(title) ⇒ Identifier?
Parse identifier from document title
This method attempts to extract an ISO identifier from a document title by progressively removing trailing words until a valid identifier is found.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/pubid/iso/utilities.rb', line 22 def parse_from_title(title) return nil if title.nil? || title.empty? # Try to extract from the title by looking for common patterns # Pattern 1: Title starts with identifier (e.g., "ISO 9001:2015 - Quality...") if match = title.match(/^([A-Z]{2,4}(?:\/[A-Z]{2,4})?\s+\d+(?:-\d+)?(?::\d{4})?(?:\/[A-Z]+\s+\d+(?::\d{4})?)?)/i) candidate = match[1] begin return Pubid::Iso.parse(candidate) rescue Parslet::ParseFailed # Continue to next method end end # Pattern 2: Look for identifier within title (e.g., "Document on ISO 9001 requirements") # Common ISO identifier pattern: ISO/IEC 1234-1:2015 identifier_pattern = / \b (?:ISO|IEC) (?:\/[A-Z]{2,4})? # Optional copublisher \s+ \d+(?:-\d+)? # Number and optional part (?::\d{4})? # Optional year (?:\/[A-Z]+\s+\d+(?::\d{4})?)? # Optional supplement \b /ix # Scan for matches and try each one matches = title.to_enum(:scan, identifier_pattern).map do Regexp.last_match end matches.each do |m| return Pubid::Iso.parse(m[0].strip) rescue Parslet::ParseFailed # Try next match end # Pattern 3: Split title into words and try progressively shorter suffixes words = title.split(/\s+/) (2..[words.length, 8].min).each do |count| candidate = words[-count..].join(" ") begin return Pubid::Iso.parse(candidate) rescue Parslet::ParseFailed # Try with fewer words end end # Pattern 4: Try removing trailing descriptive text # Common patterns: " ā Title", " - Title", ": Title" title.split(/[\sā:-]+/).reverse.each do |part| next if part.nil? || part.length < 5 # Skip very short parts begin return Pubid::Iso.parse(part.strip) rescue Parslet::ParseFailed # Try next part end end nil end |