Class: AlExtPosts::ExternalPostsGenerator

Inherits:
Jekyll::Generator
  • Object
show all
Defined in:
lib/al_ext_posts.rb

Instance Method Summary collapse

Instance Method Details

#create_document(site, source_name, url, content, src = {}) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/al_ext_posts.rb', line 49

def create_document(site, source_name, url, content, src = {})
  # check if title is composed only of whitespace or foreign characters
  if content[:title].gsub(/[^\w]/, '').strip.empty?
    # use the source name and last url segment as fallback
    slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}"
  else
    # parse title from the post or use the source name and last url segment as fallback
    slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
    slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty?
  end

  path = site.in_source_dir("_posts/#{slug}.md")
  doc = Jekyll::Document.new(
    path, { :site => site, :collection => site.collections['posts'] }
  )
  doc.data['external_source'] = source_name
  doc.data['title'] = content[:title]
  doc.data['feed_content'] = content[:content]
  doc.data['description'] = content[:summary]
  doc.data['date'] = content[:published]
  doc.data['redirect'] = url

  # Apply default categories and tags from source configuration
  if src['categories'] && src['categories'].is_a?(Array) && !src['categories'].empty?
    doc.data['categories'] = src['categories']
  end
  if src['tags'] && src['tags'].is_a?(Array) && !src['tags'].empty?
    doc.data['tags'] = src['tags']
  end

  doc.content = content[:content]
  site.collections['posts'].docs << doc
end

#fetch_content_from_url(url) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/al_ext_posts.rb', line 120

def fetch_content_from_url(url)
  html = HTTParty.get(url).body
  parsed_html = Nokogiri::HTML(html)

  title = parsed_html.at('head title')&.text.strip || ''

  description = parsed_html.at('head meta[name="description"]')&.attr('content')
  description ||= parsed_html.at('head meta[name="og:description"]')&.attr('content')
  description ||= parsed_html.at('head meta[property="og:description"]')&.attr('content')

  body_content = parsed_html.search('p').map { |e| e.text }
  body_content = body_content.join() || ''

  {
    title: title,
    content: body_content,
    summary: description
    # Note: The published date is now added in the fetch_from_urls method.
  }
end

#fetch_from_rss(site, src) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/al_ext_posts.rb', line 25

def fetch_from_rss(site, src)
  xml = HTTParty.get(src['rss_url']).body
  return if xml.nil?
  begin
    feed = Feedjira.parse(xml)
  rescue StandardError => e
    puts "Error parsing RSS feed from #{src['rss_url']} - #{e.message}"
    return
  end
  process_entries(site, src, feed.entries)
end

#fetch_from_urls(site, src) ⇒ Object



83
84
85
86
87
88
89
90
# File 'lib/al_ext_posts.rb', line 83

def fetch_from_urls(site, src)
  src['posts'].each do |post|
    puts "...fetching #{post['url']}"
    content = fetch_content_from_url(post['url'])
    content[:published] = parse_published_date(post['published_date'])
    create_document(site, src['name'], post['url'], content, (src, post))
  end
end

#generate(site) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/al_ext_posts.rb', line 12

def generate(site)
  if site.config['external_sources'] != nil
    site.config['external_sources'].each do |src|
      puts "Fetching external posts from #{src['name']}:"
      if src['rss_url']
        fetch_from_rss(site, src)
      elsif src['posts']
        fetch_from_urls(site, src)
      end
    end
  end
end

#metadata_for_post(src, post) ⇒ Object



92
93
94
95
96
97
98
99
# File 'lib/al_ext_posts.rb', line 92

def (src, post)
   = src.dup
  %w[categories tags].each do |key|
    value = (post, key)
    [key] = value if value && !(value.respond_to?(:empty?) && value.empty?)
  end
  
end

#metadata_value(post, key) ⇒ Object



101
102
103
104
105
106
107
# File 'lib/al_ext_posts.rb', line 101

def (post, key)
  if post.respond_to?(:key?)
    post[key] || post[key.to_sym]
  elsif post.respond_to?(key)
    post.public_send(key)
  end
end

#parse_published_date(published_date) ⇒ Object



109
110
111
112
113
114
115
116
117
118
# File 'lib/al_ext_posts.rb', line 109

def parse_published_date(published_date)
  case published_date
  when String
    Time.parse(published_date).utc
  when Date
    published_date.to_time.utc
  else
    raise "Invalid date format for #{published_date}"
  end
end

#process_entries(site, src, entries) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
# File 'lib/al_ext_posts.rb', line 37

def process_entries(site, src, entries)
  entries.each do |e|
    puts "...fetching #{e.url}"
    create_document(site, src['name'], e.url, {
      title: e.title,
      content: e.content,
      summary: e.summary,
      published: e.published
    }, (src, e))
  end
end