Class: Hacker::News::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/hacker/news/client.rb

Overview

Client for the Hacker News Firebase API.

Examples:

client = Hacker::News::Client.new
item = client.item(1)
puts item.title if item.is_a?(Hacker::News::Story)

Constant Summary collapse

DEFAULT_BASE_URL =
'https://hacker-news.firebaseio.com/v0'
DEFAULT_TIMEOUT =
10.0
DEFAULT_CONCURRENCY =
10
DEFAULT_USER_AGENT =
"hn-client-ruby/#{VERSION}".freeze
DEFAULT_STORIES_LIMIT =
30

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(base_url: nil, timeout: DEFAULT_TIMEOUT, concurrency: DEFAULT_CONCURRENCY, user_agent: DEFAULT_USER_AGENT, transport: nil) ⇒ Client

Returns a new instance of Client.

Parameters:

  • base_url (String, nil) (defaults to: nil)

    overrides the API root. Defaults to ENV or the Firebase URL.

  • timeout (Numeric) (defaults to: DEFAULT_TIMEOUT)

    per-request budget in seconds.

  • concurrency (Integer) (defaults to: DEFAULT_CONCURRENCY)

    batch fan-out cap.

  • user_agent (String) (defaults to: DEFAULT_USER_AGENT)
  • transport (#call, nil) (defaults to: nil)

    optional callable transport.call(url, timeout, user_agent) returning an object responding to .code and .body. Used in tests to mock HTTP.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/hacker/news/client.rb', line 40

def initialize(base_url: nil, timeout: DEFAULT_TIMEOUT, concurrency: DEFAULT_CONCURRENCY,
               user_agent: DEFAULT_USER_AGENT, transport: nil)
  # Treat HN_BASE="" as unset (common in .env files).
  env_base = ENV.fetch('HN_BASE', nil)
  env_base = nil if env_base && env_base.empty?
  @base_url = (base_url || env_base || DEFAULT_BASE_URL).sub(%r{/+$}, '')
  # Reject obviously-wrong timeout / concurrency values so users get a
  # default instead of a hang (timeout ≤ 0) or an empty-batch silent
  # failure (concurrency ≤ 0).
  @timeout = timeout.positive? ? timeout : DEFAULT_TIMEOUT
  @concurrency = concurrency.positive? ? concurrency : DEFAULT_CONCURRENCY
  @user_agent = user_agent
  @transport = transport
end

Instance Attribute Details

#base_urlObject (readonly)

Returns the value of attribute base_url.



32
33
34
# File 'lib/hacker/news/client.rb', line 32

def base_url
  @base_url
end

#concurrencyObject (readonly)

Returns the value of attribute concurrency.



32
33
34
# File 'lib/hacker/news/client.rb', line 32

def concurrency
  @concurrency
end

#timeoutObject (readonly)

Returns the value of attribute timeout.



32
33
34
# File 'lib/hacker/news/client.rb', line 32

def timeout
  @timeout
end

#user_agentObject (readonly)

Returns the value of attribute user_agent.



32
33
34
# File 'lib/hacker/news/client.rb', line 32

def user_agent
  @user_agent
end

Instance Method Details

#ask_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



154
155
156
# File 'lib/hacker/news/client.rb', line 154

def ask_stories(limit: DEFAULT_STORIES_LIMIT)  = hydrate(ask_story_ids, limit)
# @param limit [Integer]
# @return [Array<Item>]

#ask_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


137
138
# File 'lib/hacker/news/client.rb', line 137

def ask_story_ids = id_list('/askstories.json')
# @return [Array<Integer>]

#best_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



151
152
153
# File 'lib/hacker/news/client.rb', line 151

def best_stories(limit: DEFAULT_STORIES_LIMIT) = hydrate(best_story_ids, limit)
# @param limit [Integer]
# @return [Array<Item>]

#best_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


135
136
# File 'lib/hacker/news/client.rb', line 135

def best_story_ids = id_list('/beststories.json')
# @return [Array<Integer>]

#comment_tree(id) ⇒ CommentTreeNode?

Recursively fetch a comment tree rooted at id. Uses one global SizedQueue-based semaphore bounding in-flight HTTP requests. Deleted nodes pruned. Fails fast.

Fan-out uses a bounded worker pool to prevent unbounded Thread creation on large trees (a story with 500 top-level kids × 50 replies each previously spawned 25k+ threads).

Parameters:

  • id (Integer)

Returns:



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/hacker/news/client.rb', line 171

def comment_tree(id)
  sem = SizedQueue.new(@concurrency) # acts as counting semaphore
  first_error = nil
  err_mutex = Mutex.new
  cancelled = false

  record_error = lambda do |exc|
    err_mutex.synchronize do
      first_error ||= exc
      cancelled = true
    end
  end

  visit = lambda do |node_id|
    # Fail-fast short-circuit: don't start new work if a peer errored.
    break nil if err_mutex.synchronize { cancelled }

    # Acquire semaphore slot only for the HTTP call itself. Release
    # BEFORE recursing into children so we never hold a permit across
    # a wait for descendants (that path is the deadlock we fixed in Go).
    sem.push(:slot)
    body = nil
    begin
      body = get_json("/item/#{node_id}.json")
    rescue StandardError => e
      record_error.call(e)
      break nil
    ensure
      sem.pop
    end
    break nil if body.nil? || (body.is_a?(Hash) && body['deleted'] == true)

    kids = body['kids'] || []
    # Parallel kid fetch via a throwaway Thread-per-kid — this is fine
    # because the semaphore above limits concurrent HTTP. Threads that
    # never acquire stay cheap and exit quickly.
    replies = kids.map { |k| Thread.new { visit.call(k) } }.map(&:value).compact

    CommentTreeNode.new(
      {
        'id' => body['id'],
        'type' => 'comment',
        'by' => body['by'],
        'time' => body['time'],
        'parent' => body['parent'],
        'text' => body['text'],
        'dead' => body['dead'] == true,
        'kids' => kids
      },
      replies: replies
    )
  end

  root = visit.call(id)
  raise first_error if first_error

  root
end

#item(id) ⇒ Item?

Fetch a single item.

Parameters:

  • id (Integer)

Returns:

  • (Item, nil)

    nil for unknown ids and deleted stubs.

Raises:



59
60
61
62
63
64
65
# File 'lib/hacker/news/client.rb', line 59

def item(id)
  body = get_json("/item/#{id}.json")
  return nil if body.nil?
  return nil if body.is_a?(Hash) && body['deleted'] == true

  Item.from_hash(body)
end

#items(ids) ⇒ Array<Item>

Fetch many items with bounded concurrency. Fail-fast: any error aborts remaining fetches and is re-raised.

Parameters:

  • ids (Array<Integer>)

Returns:

  • (Array<Item>)

    nulls/deleted dropped; surviving order preserved.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/hacker/news/client.rb', line 71

def items(ids)
  return [] if ids.empty?

  results = Array.new(ids.size)
  first_error = nil
  mutex = Mutex.new
  queue = Queue.new
  ids.each_with_index { |id, i| queue.push([i, id]) }
  queue.close

  workers = Array.new([@concurrency, ids.size].min) do
    Thread.new do
      loop do
        pair = queue.pop
        break if pair.nil?
        break if mutex.synchronize { first_error }

        i, id = pair
        begin
          results[i] = item(id)
        rescue StandardError => e
          mutex.synchronize { first_error ||= e }
          break
        end
      end
    end
  end
  workers.each(&:join)
  raise first_error if first_error

  results.compact
end

#job_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



160
# File 'lib/hacker/news/client.rb', line 160

def job_stories(limit: DEFAULT_STORIES_LIMIT)  = hydrate(job_story_ids, limit)

#job_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


141
# File 'lib/hacker/news/client.rb', line 141

def job_story_ids = id_list('/jobstories.json')

#max_itemInteger

Returns current largest item id.

Returns:

  • (Integer)

    current largest item id.

Raises:



115
116
117
118
119
120
# File 'lib/hacker/news/client.rb', line 115

def max_item
  body = get_json('/maxitem.json')
  raise JsonError, "hn: maxitem expected Integer, got #{body.class}" unless body.is_a?(Integer)

  body
end

#new_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



148
149
150
# File 'lib/hacker/news/client.rb', line 148

def new_stories(limit: DEFAULT_STORIES_LIMIT)  = hydrate(new_story_ids, limit)
# @param limit [Integer]
# @return [Array<Item>]

#new_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


133
134
# File 'lib/hacker/news/client.rb', line 133

def new_story_ids = id_list('/newstories.json')
# @return [Array<Integer>]

#show_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



157
158
159
# File 'lib/hacker/news/client.rb', line 157

def show_stories(limit: DEFAULT_STORIES_LIMIT) = hydrate(show_story_ids, limit)
# @param limit [Integer]
# @return [Array<Item>]

#show_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


139
140
# File 'lib/hacker/news/client.rb', line 139

def show_story_ids = id_list('/showstories.json')
# @return [Array<Integer>]

#top_stories(limit: DEFAULT_STORIES_LIMIT) ⇒ Array<Item>

Parameters:

  • limit (Integer) (defaults to: DEFAULT_STORIES_LIMIT)

Returns:



145
146
147
# File 'lib/hacker/news/client.rb', line 145

def top_stories(limit: DEFAULT_STORIES_LIMIT)  = hydrate(top_story_ids, limit)
# @param limit [Integer]
# @return [Array<Item>]

#top_story_idsArray<Integer>

Returns:

  • (Array<Integer>)


131
132
# File 'lib/hacker/news/client.rb', line 131

def top_story_ids = id_list('/topstories.json')
# @return [Array<Integer>]

#updatesUpdates

Returns:

Raises:



123
124
125
126
127
128
# File 'lib/hacker/news/client.rb', line 123

def updates
  body = get_json('/updates.json')
  raise JsonError, "hn: updates expected Hash, got #{body.class}" unless body.is_a?(Hash)

  Updates.new(items: body['items'] || [], profiles: body['profiles'] || [])
end

#user(username) ⇒ User?

Fetch a user profile. nil for unknown users.

Parameters:

  • username (String)

Returns:



107
108
109
110
111
112
# File 'lib/hacker/news/client.rb', line 107

def user(username)
  body = get_json("/user/#{username}.json")
  return nil if body.nil?

  User.from_hash(body)
end