Class: Jekyll::Site
- Inherits:
-
Object
- Object
- Jekyll::Site
- Defined in:
- lib/jekyll/polyglot/patches/jekyll/site.rb
Instance Attribute Summary collapse
-
#active_lang ⇒ Object
Returns the value of attribute active_lang.
-
#default_lang ⇒ Object
readonly
Returns the value of attribute default_lang.
-
#exclude_from_localization ⇒ Object
readonly
Returns the value of attribute exclude_from_localization.
-
#fallback_canonical_to_default_lang ⇒ Object
readonly
Returns the value of attribute fallback_canonical_to_default_lang.
-
#file_langs ⇒ Object
Returns the value of attribute file_langs.
-
#lang_from_path ⇒ Object
readonly
Returns the value of attribute lang_from_path.
-
#lang_vars ⇒ Object
readonly
Returns the value of attribute lang_vars.
-
#languages ⇒ Object
readonly
Returns the value of attribute languages.
Instance Method Summary collapse
-
#absolute_url_regex(url, disabled = false) ⇒ Object
a regex that matches absolute urls in a html document matches href=“baseurl/foo/bar-baz” and others like it avoids matching excluded files.
- #assignPageLanguagePermalinks(doc, docs) ⇒ Object
- #assignPageRedirects(doc, docs) ⇒ Object
-
#coordinate_documents(docs) ⇒ Object
assigns natural permalinks to documents and prioritizes documents with active_lang languages over others.
- #correct_nonrelativized_absolute_urls(doc, regex, url) ⇒ Object
- #correct_nonrelativized_urls(doc, regex) ⇒ Object
- #derive_lang_from_path(doc) ⇒ Object
-
#document_url_regex ⇒ Object
a regex that matches urls or permalinks with i18n prefixes or suffixes matches /en/foo , .en/foo , foo.en/ and other simmilar default urls made by jekyll when parsing documents without explicitly set permalinks.
- #fetch_languages ⇒ Object
-
#find_translations(page_id, normalized_permalink, candidate_docs = nil) ⇒ Object
Returns a hash of { lang => permalink } for all docs that are translations of the given page.
-
#glob_to_regex(pattern) ⇒ Object
Convert glob pattern to regex pattern * matches any characters except / ? matches any single character except /.
-
#normalized_permalink_for_doc(doc) ⇒ Object
Returns the doc’s permalink with its own language prefix stripped, so it can be matched against sibling docs that share the same un-prefixed permalink.
- #prepare ⇒ Object
- #process ⇒ Object
- #process_active_language ⇒ Object
- #process_default_language ⇒ Object
-
#process_documents(docs) ⇒ Object
performs any necessary operations on the documents before rendering them.
- #process_language(lang) ⇒ Object
- #process_orig ⇒ Object
-
#relative_url_regex(disabled = false) ⇒ Object
a regex that matches relative urls in a html document matches href=“baseurl/foo/bar-baz” href=“/foo/bar-baz” and others like it avoids matching excluded files.
- #relativize_absolute_urls(doc, regex, url) ⇒ Object
- #relativize_urls(doc, regex) ⇒ Object
- #site_payload ⇒ Object
- #site_payload_orig ⇒ Object
- #split_on_multiple_delimiters(string) ⇒ Object
Instance Attribute Details
#active_lang ⇒ Object
Returns the value of attribute active_lang.
7 8 9 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 7 def active_lang @active_lang end |
#default_lang ⇒ Object (readonly)
Returns the value of attribute default_lang.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def default_lang @default_lang end |
#exclude_from_localization ⇒ Object (readonly)
Returns the value of attribute exclude_from_localization.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def exclude_from_localization @exclude_from_localization end |
#fallback_canonical_to_default_lang ⇒ Object (readonly)
Returns the value of attribute fallback_canonical_to_default_lang.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def fallback_canonical_to_default_lang @fallback_canonical_to_default_lang end |
#file_langs ⇒ Object
Returns the value of attribute file_langs.
7 8 9 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 7 def file_langs @file_langs end |
#lang_from_path ⇒ Object (readonly)
Returns the value of attribute lang_from_path.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def lang_from_path @lang_from_path end |
#lang_vars ⇒ Object (readonly)
Returns the value of attribute lang_vars.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def lang_vars @lang_vars end |
#languages ⇒ Object (readonly)
Returns the value of attribute languages.
6 7 8 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6 def languages @languages end |
Instance Method Details
#absolute_url_regex(url, disabled = false) ⇒ Object
a regex that matches absolute urls in a html document matches href=“baseurl/foo/bar-baz” and others like it avoids matching excluded files. prepare makes sure that all @exclude dirs have a trailing slash.
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 353 def absolute_url_regex(url, disabled = false) regex = '' unless disabled @exclude.each do |x| escaped_x = glob_to_regex(x) regex += "(?!#{escaped_x})" end @languages.each do |x| escaped_x = Regexp.escape(x) regex += "(?!#{escaped_x}/)" end end start = disabled ? 'ferh' : 'href' # Build negative lookbehind to exclude hreflang URLs from relativization # hreflang tags for default language and x-default should not be relativized neglookbehind = disabled ? "" : "(?<!hreflang=\"#{@default_lang}\" |hreflang=\"x-default\" )" %r{#{neglookbehind}#{start}="?#{url}#{@baseurl}/((?:#{regex}[^,'"\s/?.]+\.?)*(?:/[^\]\[)("'\s]*)?)"} end |
#assignPageLanguagePermalinks(doc, docs) ⇒ Object
240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 240 def assignPageLanguagePermalinks(doc, docs) page_id = doc.data['page_id'] normalized_permalink = normalized_permalink_for_doc(doc) translations = find_translations(page_id, normalized_permalink, docs) doc.data['permalink_lang'] = translations configured = ([@default_lang] + @languages).uniq doc.data['available_languages'] = translations.keys # missing_languages signals "a visitor in this lang would see different # content than another lang's visitor". A single-source page falls back # identically everywhere, so nothing is missing in that case. doc.data['missing_languages'] = translations.size > 1 ? (configured - translations.keys) : [] end |
#assignPageRedirects(doc, docs) ⇒ Object
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 201 def assignPageRedirects(doc, docs) # Preserve and normalize user-defined redirect_from user_redirects = doc.data['redirect_from'] || [] user_redirects = [user_redirects] unless user_redirects.is_a?(Array) # Determine document language doc_lang = doc.data['lang'] || derive_lang_from_path(doc) || @default_lang # Scope user-defined redirects to document's language if non-default if doc_lang != @default_lang && !user_redirects.empty? user_redirects = user_redirects.map do |redirect_path| # Normalize path to start with / redirect_path = "/#{redirect_path}" unless redirect_path.start_with?('/') # Only prefix if not already prefixed with this language if redirect_path.start_with?("/#{doc_lang}/") redirect_path else "/#{doc_lang}#{redirect_path}" end end end # Compute page_id based redirects (cross-language) computed_redirects = [] pageId = doc.data['page_id'] if !pageId.nil? && !pageId.empty? docs_with_same_id = docs.select { |dd| dd.data['page_id'] == pageId } docs_with_same_id.each do |dd| if dd.data['permalink'] != doc.data['permalink'] computed_redirects << dd.data['permalink'] end end end # Merge user-defined and computed redirects, removing duplicates all_redirects = (user_redirects + computed_redirects).uniq doc.data['redirect_from'] = all_redirects unless all_redirects.empty? end |
#coordinate_documents(docs) ⇒ Object
assigns natural permalinks to documents and prioritizes documents with active_lang languages over others. If lang is not set in front matter, then this tries to derive from the path, if the lang_from_path is set. otherwise it will assign the document to the default_lang
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 146 def coordinate_documents(docs) regex = document_url_regex approved = {} # Build set of valid languages (default + configured) valid_languages = ([@default_lang] + @languages).uniq docs.each do |doc| # Get the explicitly declared language (frontmatter or path-derived) explicit_lang = doc.data['lang'] || derive_lang_from_path(doc) lang = explicit_lang || @default_lang # FILTER: Skip documents whose explicit lang is not in configured languages. # Check the explicit value (not the fallback) so that documents with an # unconfigured lang like 'de' are excluded even if normalization would # map them to default_lang. Compare case-insensitively so case-mismatched # frontmatter (e.g. 'pt-br' vs configured 'pt-BR') is normalized below # rather than rejected here. if explicit_lang && valid_languages.none? { |l| l.downcase == explicit_lang.downcase } Jekyll.logger.warn "Polyglot:", "Skipping #{doc.relative_path} - lang '#{explicit_lang}' not in configured languages #{valid_languages.inspect}" next end # If the doc lang matches a config language case-insensitively, use the config case config_lang = @languages.find { |l| l.downcase == lang.downcase } lang = config_lang if config_lang doc.data['lang'] = lang if doc.data['lang'] && config_lang lang_exclusive = doc.data['lang-exclusive'] || [] url = doc.url.gsub(regex, '/') page_id = doc.data['page_id'] || url doc.data['permalink'] = url if doc.data['permalink'].to_s.empty? && !doc.data['lang'].to_s.empty? # Set rendered_lang to indicate what language this page is actually rendered in # This allows templates to detect fallback pages (rendered_lang != active_lang) doc.data['rendered_lang'] = lang # skip entirely if nothing to check next if @file_langs.nil? # skip this document if it has already been processed next if @file_langs[page_id] == @active_lang # skip this document if it has a fallback and it isn't assigned to the active language next if @file_langs[page_id] == @default_lang && lang != @active_lang # skip this document if it has lang-exclusive defined and the active_lang is not included next if !lang_exclusive.empty? && !lang_exclusive.include?(@active_lang) approved[page_id] = doc @file_langs[page_id] = lang end approved.each_value do |doc| assignPageRedirects(doc, docs) assignPageLanguagePermalinks(doc, docs) end approved.values end |
#correct_nonrelativized_absolute_urls(doc, regex, url) ⇒ Object
388 389 390 391 392 393 394 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 388 def correct_nonrelativized_absolute_urls(doc, regex, url) return if doc.output.nil? modified_output = doc.output.dup modified_output.gsub!(regex, "href=\"#{url}#{@baseurl}/\\1\"") doc.output = modified_output end |
#correct_nonrelativized_urls(doc, regex) ⇒ Object
396 397 398 399 400 401 402 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 396 def correct_nonrelativized_urls(doc, regex) return if doc.output.nil? modified_output = doc.output.dup modified_output.gsub!(regex, "href=\"#{@baseurl}/\\1\"") doc.output = modified_output end |
#derive_lang_from_path(doc) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 128 def derive_lang_from_path(doc) unless @lang_from_path return nil end segments = split_on_multiple_delimiters(doc.path) segments.each do |segment| match = @languages.find { |lang| lang.downcase == segment.downcase } return match if match end nil end |
#document_url_regex ⇒ Object
a regex that matches urls or permalinks with i18n prefixes or suffixes matches /en/foo , .en/foo , foo.en/ and other simmilar default urls made by jekyll when parsing documents without explicitly set permalinks
320 321 322 323 324 325 326 327 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 320 def document_url_regex regex = '' (@languages || []).each do |lang| regex += "([/.]#{lang}[/.])|" end regex.chomp! '|' /#{regex}/ end |
#fetch_languages ⇒ Object
24 25 26 27 28 29 30 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 24 def fetch_languages @default_lang = config.fetch('default_lang', 'en') @languages = config.fetch('languages', ['en']).uniq @keep_files += (@languages - [@default_lang]) @active_lang = @default_lang @lang_vars = config.fetch('lang_vars', []) end |
#find_translations(page_id, normalized_permalink, candidate_docs = nil) ⇒ Object
Returns a hash of { lang => permalink } for all docs that are translations of the given page. Matches by page_id when present, otherwise by normalized permalink. Filters out languages not in the configured languages list. candidate_docs defaults to site.collections + site.pages so the helper can be called from Liquid render contexts where the caller doesn’t already hold a docs array.
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 261 def find_translations(page_id, normalized_permalink, candidate_docs = nil) candidate_docs ||= collections.values.flat_map(&:docs) + pages valid_languages = ([@default_lang] + @languages).uniq matching = if !page_id.to_s.empty? candidate_docs.select { |d| d.data['page_id'] == page_id } elsif !normalized_permalink.to_s.empty? candidate_docs.select { |d| normalized_permalink_for_doc(d) == normalized_permalink } else [] end matching.each_with_object({}) do |d, h| explicit_lang = d.data['lang'] || derive_lang_from_path(d) doclang = explicit_lang || @default_lang next if explicit_lang && !valid_languages.include?(explicit_lang) h[doclang] = d.data['permalink'] end end |
#glob_to_regex(pattern) ⇒ Object
Convert glob pattern to regex pattern
-
matches any characters except /
? matches any single character except /
121 122 123 124 125 126 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 121 def glob_to_regex(pattern) # Escape special regex characters first escaped = Regexp.escape(pattern) # Convert glob patterns to regex patterns escaped.gsub("\\*", '.*').gsub("\\?", '.') end |
#normalized_permalink_for_doc(doc) ⇒ Object
Returns the doc’s permalink with its own language prefix stripped, so it can be matched against sibling docs that share the same un-prefixed permalink. Returns nil when no usable permalink is present.
286 287 288 289 290 291 292 293 294 295 296 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 286 def normalized_permalink_for_doc(doc) permalink = doc.data['permalink'] || (doc.respond_to?(:url) ? doc.url : nil) return nil if permalink.to_s.empty? permalink = "/#{permalink}" unless permalink.start_with?('/') lang = doc.data['lang'] return permalink if lang.to_s.empty? stripped = permalink.delete_prefix("/#{lang}/") stripped.start_with?('/') ? stripped : "/#{stripped}" end |
#prepare ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 9 def prepare @file_langs = {} fetch_languages @parallel_localization = config.fetch('parallel_localization', true) @lang_from_path = config.fetch('lang_from_path', false) @fallback_canonical_to_default_lang = config.fetch('fallback_canonical_to_default_lang', false) @exclude_from_localization = config.fetch('exclude_from_localization', []).map do |e| if File.directory?(e) && e[-1] != '/' "#{e}/" else e end end end |
#process ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 33 def process prepare all_langs = ([@default_lang] + @languages).uniq if @parallel_localization nproc = Etc.nprocessors pids = {} begin all_langs.each do |lang| pids[lang] = fork do process_language lang end while pids.length >= (lang == all_langs[-1] ? 1 : nproc) sleep 0.1 pids.map do |pid_lang, pid| next unless waitpid pid, Process::WNOHANG pids.delete pid_lang raise "Polyglot subprocess #{pid} (#{pid_lang}) failed (#{$?.exitstatus})" unless $?.success? end end end rescue Interrupt all_langs.each do |lang| next unless pids.key? lang puts "Killing #{pids[lang]} : #{lang}" kill('INT', pids[lang]) end end else all_langs.each do |lang| process_language lang end end Jekyll::Hooks.trigger :polyglot, :post_write, self end |
#process_active_language ⇒ Object
101 102 103 104 105 106 107 108 109 110 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 101 def process_active_language old_dest = @dest old_exclude = @exclude @file_langs = {} @dest = "#{@dest}/#{@active_lang}" @exclude += @exclude_from_localization process_orig @dest = old_dest @exclude = old_exclude end |
#process_default_language ⇒ Object
95 96 97 98 99 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 95 def process_default_language old_include = @include process_orig @include = old_include end |
#process_documents(docs) ⇒ Object
performs any necessary operations on the documents before rendering them
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 299 def process_documents(docs) # return if @active_lang == @default_lang url = config.fetch('url', false) rel_regex = relative_url_regex(false) abs_regex = absolute_url_regex(url, false) non_rel_regex = relative_url_regex(true) non_abs_regex = absolute_url_regex(url, true) docs.each do |doc| unless @active_lang == @default_lang then relativize_urls(doc, rel_regex) end correct_nonrelativized_urls(doc, non_rel_regex) if url unless @active_lang == @default_lang then relativize_absolute_urls(doc, abs_regex, url) end correct_nonrelativized_absolute_urls(doc, non_abs_regex, url) end end end |
#process_language(lang) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 82 def process_language(lang) @active_lang = lang config['active_lang'] = @active_lang lang_vars.each do |v| config[v] = @active_lang end if @active_lang == @default_lang then process_default_language else process_active_language end end |
#process_orig ⇒ Object
32 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 32 alias process_orig process |
#relative_url_regex(disabled = false) ⇒ Object
a regex that matches relative urls in a html document matches href=“baseurl/foo/bar-baz” href=“/foo/bar-baz” and others like it avoids matching excluded files. prepare makes sure that all @exclude dirs have a trailing slash.
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 333 def relative_url_regex(disabled = false) regex = '' unless disabled @exclude.each do |x| escaped_x = glob_to_regex(x) regex += "(?!#{escaped_x})" end @languages.each do |x| escaped_x = Regexp.escape(x) regex += "(?!#{escaped_x}/)" end end start = disabled ? 'ferh' : 'href' %r{#{start}="?#{@baseurl}/((?:#{regex}[^,'"\s/?.]+\.?)*(?:/[^\]\[)("'\s]*)?)"} end |
#relativize_absolute_urls(doc, regex, url) ⇒ Object
380 381 382 383 384 385 386 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 380 def relativize_absolute_urls(doc, regex, url) return if doc.output.nil? modified_output = doc.output.dup modified_output.gsub!(regex, "href=\"#{url}#{@baseurl}/#{@active_lang}/\\1\"") doc.output = modified_output end |
#relativize_urls(doc, regex) ⇒ Object
372 373 374 375 376 377 378 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 372 def relativize_urls(doc, regex) return if doc.output.nil? modified_output = doc.output.dup modified_output.gsub!(regex, "href=\"#{@baseurl}/#{@active_lang}/\\1\"") doc.output = modified_output end |
#site_payload ⇒ Object
71 72 73 74 75 76 77 78 79 80 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 71 def site_payload payload = site_payload_orig payload['site']['default_lang'] = default_lang payload['site']['languages'] = languages payload['site']['active_lang'] = active_lang lang_vars.each do |v| payload['site'][v] = active_lang end payload end |
#site_payload_orig ⇒ Object
70 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 70 alias site_payload_orig site_payload |
#split_on_multiple_delimiters(string) ⇒ Object
112 113 114 115 116 |
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 112 def split_on_multiple_delimiters(string) delimiters = ['.', '/'] regex = Regexp.union(delimiters) string.split(regex) end |