Class: Post

Inherits:
Object
  • Object
show all
Defined in:
lib/Post.rb

Defined Under Namespace

Classes: PostInfo

Constant Summary collapse

POST_VIEWER_EDGE_QUERY_PATH =
File.expand_path('Queries/PostViewerEdgeContentQuery.graphql', __dir__).freeze
POST_PAGE_QUERY_PATH =
File.expand_path('Queries/PostPageQuery.graphql', __dir__).freeze

Class Method Summary collapse

Class Method Details

.fetchPostParagraphs(postID) ⇒ Object



28
29
30
31
32
# File 'lib/Post.rb', line 28

def self.fetchPostParagraphs(postID)
  json = postGraphQL("PostViewerEdgeContentQuery", postViewerEdgeContentQueryString,
                     { "postId" => postID })
  json&.dig(0, "data", "post", "viewerEdge", "fullContent")
end

.getPostIDFromPostURLString(postURLString) ⇒ Object



18
19
20
21
# File 'lib/Post.rb', line 18

def self.getPostIDFromPostURLString(postURLString)
  uri = URI.parse(postURLString)
  uri.path.split('/').last.split('-').last
end

.getPostPathFromPostURLString(postURLString) ⇒ Object



23
24
25
26
# File 'lib/Post.rb', line 23

def self.getPostPathFromPostURLString(postURLString)
  uri = URI.parse(postURLString)
  uri.path.split('/').last
end

.parsePostInfo(postID, pathPolicy) ⇒ Object

Fetches post-level metadata (title, tags, creator, dates, preview image, collection) directly from Medium’s PostPageQuery GraphQL operation. Replaces the previous approach of scraping window.APOLLO_STATE out of the post HTML page, which Medium has been progressively dismantling.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/Post.rb', line 38

def self.parsePostInfo(postID, pathPolicy)
  json = postGraphQL("PostPageQuery", postPageQueryString,
                     { "postId" => postID,
                       "postMeteringOptions" => { "referrer" => "https://medium.com/me/stories" },
                       "includeShouldFollowPost" => false })
  return nil if json.nil?

  result = json.dig(0, "data", "postResult")
  return nil if result.nil?

   = PostInfo.new
  .description = result.dig("previewContent", "subtitle")&.gsub(/[^[:print:]]/, '')
  .title = result["title"]&.gsub(/[^[:print:]]/, '')
  .tags = result["tags"]&.map { |tag| tag["normalizedTagSlug"] }
  .creator = result.dig("creator", "name")
  .collectionName = result.dig("collection", "name")

  firstPublishedAt = result["firstPublishedAt"]
  .firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond) if firstPublishedAt

  latestPublishedAt = result["latestPublishedAt"]
  .latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond) if latestPublishedAt

  previewImageFileName = result.dig("previewImage", "id")
  if previewImageFileName
    imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
    absolutePath = imagePathPolicy.getAbsolutePath(previewImageFileName)

    miro_host = ENV.fetch('MIRO_MEDIUM_HOST', 'https://miro.medium.com')
    imageURL = "#{miro_host}/#{previewImageFileName}"

    if ImageDownloader.download(absolutePath, imageURL)
      .previewImage = imagePathPolicy.getRelativePath(previewImageFileName)
    end
  end

  
end

.postGraphQL(operationName, queryString, variables) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/Post.rb', line 85

def self.postGraphQL(operationName, queryString, variables)
  body = [{
    "operationName" => operationName,
    "variables" => variables,
    "query" => queryString
  }]

  host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
  response = Request.body(Request.URL(host, 'POST', body))
  return nil if response.nil?

  JSON.parse(response)
end

.postPageQueryStringObject



81
82
83
# File 'lib/Post.rb', line 81

def self.postPageQueryString
  @postPageQueryString ||= File.read(POST_PAGE_QUERY_PATH)
end

.postViewerEdgeContentQueryStringObject



77
78
79
# File 'lib/Post.rb', line 77

def self.postViewerEdgeContentQueryString
  @postViewerEdgeContentQueryString ||= File.read(POST_VIEWER_EDGE_QUERY_PATH)
end