Class: AlExtPosts::ExternalPostsGenerator
- Inherits:
-
Jekyll::Generator
- Object
- Jekyll::Generator
- AlExtPosts::ExternalPostsGenerator
- Defined in:
- lib/al_ext_posts.rb
Instance Method Summary collapse
- #create_document(site, source_name, url, content, src = {}) ⇒ Object
- #fetch_content_from_url(url) ⇒ Object
- #fetch_from_rss(site, src) ⇒ Object
- #fetch_from_urls(site, src) ⇒ Object
- #generate(site) ⇒ Object
- #metadata_for_post(src, post) ⇒ Object
- #metadata_value(post, key) ⇒ Object
- #parse_published_date(published_date) ⇒ Object
- #process_entries(site, src, entries) ⇒ Object
Instance Method Details
#create_document(site, source_name, url, content, src = {}) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/al_ext_posts.rb', line 49 def create_document(site, source_name, url, content, src = {}) # check if title is composed only of whitespace or foreign characters if content[:title].gsub(/[^\w]/, '').strip.empty? # use the source name and last url segment as fallback slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" else # parse title from the post or use the source name and last url segment as fallback slug = content[:title].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') slug = "#{source_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')}-#{url.split('/').last}" if slug.empty? end path = site.in_source_dir("_posts/#{slug}.md") doc = Jekyll::Document.new( path, { :site => site, :collection => site.collections['posts'] } ) doc.data['external_source'] = source_name doc.data['title'] = content[:title] doc.data['feed_content'] = content[:content] doc.data['description'] = content[:summary] doc.data['date'] = content[:published] doc.data['redirect'] = url # Apply default categories and tags from source configuration if src['categories'] && src['categories'].is_a?(Array) && !src['categories'].empty? doc.data['categories'] = src['categories'] end if src['tags'] && src['tags'].is_a?(Array) && !src['tags'].empty? doc.data['tags'] = src['tags'] end doc.content = content[:content] site.collections['posts'].docs << doc end |
#fetch_content_from_url(url) ⇒ Object
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/al_ext_posts.rb', line 120 def fetch_content_from_url(url) html = HTTParty.get(url).body parsed_html = Nokogiri::HTML(html) title = parsed_html.at('head title')&.text.strip || '' description = parsed_html.at('head meta[name="description"]')&.attr('content') description ||= parsed_html.at('head meta[name="og:description"]')&.attr('content') description ||= parsed_html.at('head meta[property="og:description"]')&.attr('content') body_content = parsed_html.search('p').map { |e| e.text } body_content = body_content.join() || '' { title: title, content: body_content, summary: description # Note: The published date is now added in the fetch_from_urls method. } end |
#fetch_from_rss(site, src) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/al_ext_posts.rb', line 25 def fetch_from_rss(site, src) xml = HTTParty.get(src['rss_url']).body return if xml.nil? begin feed = Feedjira.parse(xml) rescue StandardError => e puts "Error parsing RSS feed from #{src['rss_url']} - #{e.}" return end process_entries(site, src, feed.entries) end |
#fetch_from_urls(site, src) ⇒ Object
83 84 85 86 87 88 89 90 |
# File 'lib/al_ext_posts.rb', line 83 def fetch_from_urls(site, src) src['posts'].each do |post| puts "...fetching #{post['url']}" content = fetch_content_from_url(post['url']) content[:published] = parse_published_date(post['published_date']) create_document(site, src['name'], post['url'], content, (src, post)) end end |
#generate(site) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/al_ext_posts.rb', line 12 def generate(site) if site.config['external_sources'] != nil site.config['external_sources'].each do |src| puts "Fetching external posts from #{src['name']}:" if src['rss_url'] fetch_from_rss(site, src) elsif src['posts'] fetch_from_urls(site, src) end end end end |
#metadata_for_post(src, post) ⇒ Object
92 93 94 95 96 97 98 99 |
# File 'lib/al_ext_posts.rb', line 92 def (src, post) = src.dup %w[categories tags].each do |key| value = (post, key) [key] = value if value && !(value.respond_to?(:empty?) && value.empty?) end end |
#metadata_value(post, key) ⇒ Object
101 102 103 104 105 106 107 |
# File 'lib/al_ext_posts.rb', line 101 def (post, key) if post.respond_to?(:key?) post[key] || post[key.to_sym] elsif post.respond_to?(key) post.public_send(key) end end |
#parse_published_date(published_date) ⇒ Object
109 110 111 112 113 114 115 116 117 118 |
# File 'lib/al_ext_posts.rb', line 109 def parse_published_date(published_date) case published_date when String Time.parse(published_date).utc when Date published_date.to_time.utc else raise "Invalid date format for #{published_date}" end end |
#process_entries(site, src, entries) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/al_ext_posts.rb', line 37 def process_entries(site, src, entries) entries.each do |e| puts "...fetching #{e.url}" create_document(site, src['name'], e.url, { title: e.title, content: e.content, summary: e.summary, published: e.published }, (src, e)) end end |