Class: Jekyll::Site

Inherits:
Object
  • Object
show all
Defined in:
lib/jekyll/polyglot/patches/jekyll/site.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#active_langObject

Returns the value of attribute active_lang.



7
8
9
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 7

def active_lang
  @active_lang
end

#default_langObject (readonly)

Returns the value of attribute default_lang.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def default_lang
  @default_lang
end

#exclude_from_localizationObject (readonly)

Returns the value of attribute exclude_from_localization.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def exclude_from_localization
  @exclude_from_localization
end

#fallback_canonical_to_default_langObject (readonly)

Returns the value of attribute fallback_canonical_to_default_lang.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def fallback_canonical_to_default_lang
  @fallback_canonical_to_default_lang
end

#file_langsObject

Returns the value of attribute file_langs.



7
8
9
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 7

def file_langs
  @file_langs
end

#lang_from_pathObject (readonly)

Returns the value of attribute lang_from_path.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def lang_from_path
  @lang_from_path
end

#lang_varsObject (readonly)

Returns the value of attribute lang_vars.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def lang_vars
  @lang_vars
end

#languagesObject (readonly)

Returns the value of attribute languages.



6
7
8
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 6

def languages
  @languages
end

Instance Method Details

#absolute_url_regex(url, disabled = false) ⇒ Object

a regex that matches absolute urls in a html document matches href=“baseurl/foo/bar-baz” and others like it avoids matching excluded files. prepare makes sure that all @exclude dirs have a trailing slash.



353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 353

def absolute_url_regex(url, disabled = false)
  regex = ''
  unless disabled
    @exclude.each do |x|
      escaped_x = glob_to_regex(x)
      regex += "(?!#{escaped_x})"
    end
    @languages.each do |x|
      escaped_x = Regexp.escape(x)
      regex += "(?!#{escaped_x}/)"
    end
  end
  start = disabled ? 'ferh' : 'href'
  # Build negative lookbehind to exclude hreflang URLs from relativization
  # hreflang tags for default language and x-default should not be relativized
  neglookbehind = disabled ? "" : "(?<!hreflang=\"#{@default_lang}\" |hreflang=\"x-default\" )"
  %r{#{neglookbehind}#{start}="?#{url}#{@baseurl}/((?:#{regex}[^,'"\s/?.]+\.?)*(?:/[^\]\[)("'\s]*)?)"}
end


240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 240

def assignPageLanguagePermalinks(doc, docs)
  page_id = doc.data['page_id']
  normalized_permalink = normalized_permalink_for_doc(doc)
  translations = find_translations(page_id, normalized_permalink, docs)

  doc.data['permalink_lang'] = translations
  configured = ([@default_lang] + @languages).uniq
  doc.data['available_languages'] = translations.keys
  # missing_languages signals "a visitor in this lang would see different
  # content than another lang's visitor". A single-source page falls back
  # identically everywhere, so nothing is missing in that case.
  doc.data['missing_languages'] =
    translations.size > 1 ? (configured - translations.keys) : []
end

#assignPageRedirects(doc, docs) ⇒ Object



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 201

def assignPageRedirects(doc, docs)
  # Preserve and normalize user-defined redirect_from
  user_redirects = doc.data['redirect_from'] || []
  user_redirects = [user_redirects] unless user_redirects.is_a?(Array)

  # Determine document language
  doc_lang = doc.data['lang'] || derive_lang_from_path(doc) || @default_lang

  # Scope user-defined redirects to document's language if non-default
  if doc_lang != @default_lang && !user_redirects.empty?
    user_redirects = user_redirects.map do |redirect_path|
      # Normalize path to start with /
      redirect_path = "/#{redirect_path}" unless redirect_path.start_with?('/')
      # Only prefix if not already prefixed with this language
      if redirect_path.start_with?("/#{doc_lang}/")
        redirect_path
      else
        "/#{doc_lang}#{redirect_path}"
      end
    end
  end

  # Compute page_id based redirects (cross-language)
  computed_redirects = []
  pageId = doc.data['page_id']
  if !pageId.nil? && !pageId.empty?
    docs_with_same_id = docs.select { |dd| dd.data['page_id'] == pageId }
    docs_with_same_id.each do |dd|
      if dd.data['permalink'] != doc.data['permalink']
        computed_redirects << dd.data['permalink']
      end
    end
  end

  # Merge user-defined and computed redirects, removing duplicates
  all_redirects = (user_redirects + computed_redirects).uniq
  doc.data['redirect_from'] = all_redirects unless all_redirects.empty?
end

#coordinate_documents(docs) ⇒ Object

assigns natural permalinks to documents and prioritizes documents with active_lang languages over others. If lang is not set in front matter, then this tries to derive from the path, if the lang_from_path is set. otherwise it will assign the document to the default_lang



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 146

def coordinate_documents(docs)
  regex = document_url_regex
  approved = {}
  # Build set of valid languages (default + configured)
  valid_languages = ([@default_lang] + @languages).uniq

  docs.each do |doc|
    # Get the explicitly declared language (frontmatter or path-derived)
    explicit_lang = doc.data['lang'] || derive_lang_from_path(doc)
    lang = explicit_lang || @default_lang

    # FILTER: Skip documents whose explicit lang is not in configured languages.
    # Check the explicit value (not the fallback) so that documents with an
    # unconfigured lang like 'de' are excluded even if normalization would
    # map them to default_lang. Compare case-insensitively so case-mismatched
    # frontmatter (e.g. 'pt-br' vs configured 'pt-BR') is normalized below
    # rather than rejected here.
    if explicit_lang && valid_languages.none? { |l| l.downcase == explicit_lang.downcase }
      Jekyll.logger.warn "Polyglot:", "Skipping #{doc.relative_path} - lang '#{explicit_lang}' not in configured languages #{valid_languages.inspect}"
      next
    end

    # If the doc lang matches a config language case-insensitively, use the config case
    config_lang = @languages.find { |l| l.downcase == lang.downcase }
    lang = config_lang if config_lang
    doc.data['lang'] = lang if doc.data['lang'] && config_lang

    lang_exclusive = doc.data['lang-exclusive'] || []

    url = doc.url.gsub(regex, '/')
    page_id = doc.data['page_id'] || url
    doc.data['permalink'] = url if doc.data['permalink'].to_s.empty? && !doc.data['lang'].to_s.empty?
    # Set rendered_lang to indicate what language this page is actually rendered in
    # This allows templates to detect fallback pages (rendered_lang != active_lang)
    doc.data['rendered_lang'] = lang

    # skip entirely if nothing to check
    next if @file_langs.nil?
    # skip this document if it has already been processed
    next if @file_langs[page_id] == @active_lang
    # skip this document if it has a fallback and it isn't assigned to the active language
    next if @file_langs[page_id] == @default_lang && lang != @active_lang
    # skip this document if it has lang-exclusive defined and the active_lang is not included
    next if !lang_exclusive.empty? && !lang_exclusive.include?(@active_lang)

    approved[page_id] = doc
    @file_langs[page_id] = lang
  end
  approved.each_value do |doc|
    assignPageRedirects(doc, docs)
    assignPageLanguagePermalinks(doc, docs)
  end
  approved.values
end

#correct_nonrelativized_absolute_urls(doc, regex, url) ⇒ Object



388
389
390
391
392
393
394
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 388

def correct_nonrelativized_absolute_urls(doc, regex, url)
  return if doc.output.nil?

  modified_output = doc.output.dup
  modified_output.gsub!(regex, "href=\"#{url}#{@baseurl}/\\1\"")
  doc.output = modified_output
end

#correct_nonrelativized_urls(doc, regex) ⇒ Object



396
397
398
399
400
401
402
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 396

def correct_nonrelativized_urls(doc, regex)
  return if doc.output.nil?

  modified_output = doc.output.dup
  modified_output.gsub!(regex, "href=\"#{@baseurl}/\\1\"")
  doc.output = modified_output
end

#derive_lang_from_path(doc) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 128

def derive_lang_from_path(doc)
  unless @lang_from_path
    return nil
  end

  segments = split_on_multiple_delimiters(doc.path)
  segments.each do |segment|
    match = @languages.find { |lang| lang.downcase == segment.downcase }
    return match if match
  end

  nil
end

#document_url_regexObject

a regex that matches urls or permalinks with i18n prefixes or suffixes matches /en/foo , .en/foo , foo.en/ and other simmilar default urls made by jekyll when parsing documents without explicitly set permalinks



320
321
322
323
324
325
326
327
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 320

def document_url_regex
  regex = ''
  (@languages || []).each do |lang|
    regex += "([/.]#{lang}[/.])|"
  end
  regex.chomp! '|'
  /#{regex}/
end

#fetch_languagesObject



24
25
26
27
28
29
30
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 24

def fetch_languages
  @default_lang = config.fetch('default_lang', 'en')
  @languages = config.fetch('languages', ['en']).uniq
  @keep_files += (@languages - [@default_lang])
  @active_lang = @default_lang
  @lang_vars = config.fetch('lang_vars', [])
end

#find_translations(page_id, normalized_permalink, candidate_docs = nil) ⇒ Object

Returns a hash of { lang => permalink } for all docs that are translations of the given page. Matches by page_id when present, otherwise by normalized permalink. Filters out languages not in the configured languages list. candidate_docs defaults to site.collections + site.pages so the helper can be called from Liquid render contexts where the caller doesn’t already hold a docs array.



261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 261

def find_translations(page_id, normalized_permalink, candidate_docs = nil)
  candidate_docs ||= collections.values.flat_map(&:docs) + pages
  valid_languages = ([@default_lang] + @languages).uniq

  matching =
    if !page_id.to_s.empty?
      candidate_docs.select { |d| d.data['page_id'] == page_id }
    elsif !normalized_permalink.to_s.empty?
      candidate_docs.select { |d| normalized_permalink_for_doc(d) == normalized_permalink }
    else
      []
    end

  matching.each_with_object({}) do |d, h|
    explicit_lang = d.data['lang'] || derive_lang_from_path(d)
    doclang = explicit_lang || @default_lang
    next if explicit_lang && !valid_languages.include?(explicit_lang)

    h[doclang] = d.data['permalink']
  end
end

#glob_to_regex(pattern) ⇒ Object

Convert glob pattern to regex pattern

  • matches any characters except /

? matches any single character except /



121
122
123
124
125
126
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 121

def glob_to_regex(pattern)
  # Escape special regex characters first
  escaped = Regexp.escape(pattern)
  # Convert glob patterns to regex patterns
  escaped.gsub("\\*", '.*').gsub("\\?", '.')
end

Returns the doc’s permalink with its own language prefix stripped, so it can be matched against sibling docs that share the same un-prefixed permalink. Returns nil when no usable permalink is present.



286
287
288
289
290
291
292
293
294
295
296
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 286

def normalized_permalink_for_doc(doc)
  permalink = doc.data['permalink'] || (doc.respond_to?(:url) ? doc.url : nil)
  return nil if permalink.to_s.empty?

  permalink = "/#{permalink}" unless permalink.start_with?('/')
  lang = doc.data['lang']
  return permalink if lang.to_s.empty?

  stripped = permalink.delete_prefix("/#{lang}/")
  stripped.start_with?('/') ? stripped : "/#{stripped}"
end

#prepareObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 9

def prepare
  @file_langs = {}
  fetch_languages
  @parallel_localization = config.fetch('parallel_localization', true)
  @lang_from_path = config.fetch('lang_from_path', false)
  @fallback_canonical_to_default_lang = config.fetch('fallback_canonical_to_default_lang', false)
  @exclude_from_localization = config.fetch('exclude_from_localization', []).map do |e|
    if File.directory?(e) && e[-1] != '/'
      "#{e}/"
    else
      e
    end
  end
end

#processObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 33

def process
  prepare
  all_langs = ([@default_lang] + @languages).uniq
  if @parallel_localization
    nproc = Etc.nprocessors
    pids = {}
    begin
      all_langs.each do |lang|
        pids[lang] = fork do
          process_language lang
        end
        while pids.length >= (lang == all_langs[-1] ? 1 : nproc)
          sleep 0.1
          pids.map do |pid_lang, pid|
            next unless waitpid pid, Process::WNOHANG

            pids.delete pid_lang
            raise "Polyglot subprocess #{pid} (#{pid_lang}) failed (#{$?.exitstatus})" unless $?.success?
          end
        end
      end
    rescue Interrupt
      all_langs.each do |lang|
        next unless pids.key? lang

        puts "Killing #{pids[lang]} : #{lang}"
        kill('INT', pids[lang])
      end
    end
  else
    all_langs.each do |lang|
      process_language lang
    end
  end
  Jekyll::Hooks.trigger :polyglot, :post_write, self
end

#process_active_languageObject



101
102
103
104
105
106
107
108
109
110
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 101

def process_active_language
  old_dest = @dest
  old_exclude = @exclude
  @file_langs = {}
  @dest = "#{@dest}/#{@active_lang}"
  @exclude += @exclude_from_localization
  process_orig
  @dest = old_dest
  @exclude = old_exclude
end

#process_default_languageObject



95
96
97
98
99
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 95

def process_default_language
  old_include = @include
  process_orig
  @include = old_include
end

#process_documents(docs) ⇒ Object

performs any necessary operations on the documents before rendering them



299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 299

def process_documents(docs)
  # return if @active_lang == @default_lang

  url = config.fetch('url', false)
  rel_regex = relative_url_regex(false)
  abs_regex = absolute_url_regex(url, false)
  non_rel_regex = relative_url_regex(true)
  non_abs_regex = absolute_url_regex(url, true)
  docs.each do |doc|
    unless @active_lang == @default_lang then relativize_urls(doc, rel_regex) end
    correct_nonrelativized_urls(doc, non_rel_regex)
    if url
      unless @active_lang == @default_lang then relativize_absolute_urls(doc, abs_regex, url) end
      correct_nonrelativized_absolute_urls(doc, non_abs_regex, url)
    end
  end
end

#process_language(lang) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 82

def process_language(lang)
  @active_lang = lang
  config['active_lang'] = @active_lang
  lang_vars.each do |v|
    config[v] = @active_lang
  end
  if @active_lang == @default_lang
  then process_default_language
  else
    process_active_language
  end
end

#process_origObject



32
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 32

alias process_orig process

#relative_url_regex(disabled = false) ⇒ Object

a regex that matches relative urls in a html document matches href=“baseurl/foo/bar-baz” href=“/foo/bar-baz” and others like it avoids matching excluded files. prepare makes sure that all @exclude dirs have a trailing slash.



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 333

def relative_url_regex(disabled = false)
  regex = ''
  unless disabled
    @exclude.each do |x|
      escaped_x = glob_to_regex(x)
      regex += "(?!#{escaped_x})"
    end
    @languages.each do |x|
      escaped_x = Regexp.escape(x)
      regex += "(?!#{escaped_x}/)"
    end
  end
  start = disabled ? 'ferh' : 'href'
  %r{#{start}="?#{@baseurl}/((?:#{regex}[^,'"\s/?.]+\.?)*(?:/[^\]\[)("'\s]*)?)"}
end

#relativize_absolute_urls(doc, regex, url) ⇒ Object



380
381
382
383
384
385
386
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 380

def relativize_absolute_urls(doc, regex, url)
  return if doc.output.nil?

  modified_output = doc.output.dup
  modified_output.gsub!(regex, "href=\"#{url}#{@baseurl}/#{@active_lang}/\\1\"")
  doc.output = modified_output
end

#relativize_urls(doc, regex) ⇒ Object



372
373
374
375
376
377
378
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 372

def relativize_urls(doc, regex)
  return if doc.output.nil?

  modified_output = doc.output.dup
  modified_output.gsub!(regex, "href=\"#{@baseurl}/#{@active_lang}/\\1\"")
  doc.output = modified_output
end

#site_payloadObject



71
72
73
74
75
76
77
78
79
80
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 71

def site_payload
  payload = site_payload_orig
  payload['site']['default_lang'] = default_lang
  payload['site']['languages'] = languages
  payload['site']['active_lang'] = active_lang
  lang_vars.each do |v|
    payload['site'][v] = active_lang
  end
  payload
end

#site_payload_origObject



70
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 70

alias site_payload_orig site_payload

#split_on_multiple_delimiters(string) ⇒ Object



112
113
114
115
116
# File 'lib/jekyll/polyglot/patches/jekyll/site.rb', line 112

def split_on_multiple_delimiters(string)
  delimiters = ['.', '/']
  regex = Regexp.union(delimiters)
  string.split(regex)
end