Class: Rospatent::Client

Inherits:
Object
  • Object
show all
Includes:
InputValidator
Defined in:
lib/rospatent/client.rb

Overview

Main client for interacting with the Rospatent API

Instance Method Summary collapse

Methods included from InputValidator

#validate_array, #validate_date, #validate_enum, #validate_hash, #validate_params, #validate_patent_id, #validate_positive_integer, #validate_required_date, #validate_required_string, #validate_string, #validate_text_with_word_count

Constructor Details

#initialize(token: nil, logger: nil, cache: nil) ⇒ Client

Create a new client instance

Parameters:

  • token (String) (defaults to: nil)

    JWT token for authentication (optional if set in configuration)

  • logger (Rospatent::Logger) (defaults to: nil)

    Custom logger instance (optional)

  • cache (Rospatent::Cache) (defaults to: nil)

    Custom cache instance (optional)

Raises:



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rospatent/client.rb', line 19

def initialize(token: nil, logger: nil, cache: nil)
  @token = token || Rospatent.configuration.token
  raise Errors::MissingTokenError, "API token is required" unless @token

  # Initialize logger
  @logger = logger || create_logger

  # Initialize cache
  @cache = cache || create_cache

  # Track request metrics
  @request_count = 0
  @total_duration = 0.0
end

Instance Method Details

#batch_patents(document_ids, batch_size: 10) ⇒ Enumerator

Batch process multiple patents

Parameters:

  • document_ids (Array<String>)

    Array of document IDs

  • batch_size (Integer) (defaults to: 10)

    Number of patents to process concurrently

Returns:

  • (Enumerator)

    Enumerator that yields patent documents



449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# File 'lib/rospatent/client.rb', line 449

def batch_patents(document_ids, batch_size: 10)
  return enum_for(:batch_patents, document_ids, batch_size: batch_size) unless block_given?

  validate_array(document_ids, "document_ids", max_size: 1000)
  validated_batch_size = validate_positive_integer(batch_size, "batch_size", max_value: 50)

  document_ids.each_slice(validated_batch_size) do |batch|
    threads = batch.map do |doc_id|
      Thread.new do
        patent(doc_id)
      rescue StandardError => e
        @logger.log_error(e, { document_id: doc_id, operation: "batch_patents" })
        { error: e.message, document_id: doc_id }
      end
    end

    threads.each { |thread| yield thread.value }
  end
end

#classification_code(classifier_id, code:, lang: "ru") ⇒ Hash

Get detailed information about a specific classification code

Examples:

Get information about IPC code

info = client.classification_code("ipc", code: "F02K9/00", lang: "ru")

Parameters:

  • classifier_id (String)

    Classification system identifier (“ipc” or “cpc”)

  • code (String)

    Classification code to look up

  • lang (String) (defaults to: "ru")

    Language for the description (“ru” or “en”)

Returns:

  • (Hash)

    Detailed information about the classification code

Raises:



354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/rospatent/client.rb', line 354

def classification_code(classifier_id, code:, lang: "ru")
  # Validate inputs
  validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
  validated_code = validate_string(code, "code", max_length: 50)
  validated_lang = validate_enum(lang, %w[ru en], "lang").to_s

  # Check cache first
  cache_key = "classification:code:#{validated_classifier}:#{validated_code}:#{validated_lang}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload
  payload = {
    code: validated_code,
    lang: validated_lang
  }

  # Make a POST request to the classification code endpoint
  result = post("/patsearch/v0.2/classification/#{validated_classifier}/code", payload)

  # Cache the result for longer since classification codes don't change often
  @cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#classification_search(classifier_id, query:, lang: "ru") ⇒ Hash

Search within a classification system (IPC or CPC) using natural language

Examples:

Search for rocket-related IPC codes

results = client.classification_search("ipc", query: "ракета", lang: "ru")

Parameters:

  • classifier_id (String)

    Classification system identifier (“ipc” or “cpc”)

  • query (String)

    Search query in natural language

  • lang (String) (defaults to: "ru")

    Language for the search (“ru” or “en”)

Returns:

  • (Hash)

    Search results containing classification codes and descriptions

Raises:



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/rospatent/client.rb', line 313

def classification_search(classifier_id, query:, lang: "ru")
  # Validate inputs
  validated_classifier = validate_enum(classifier_id, %w[ipc cpc], "classifier_id").to_s
  validated_query = validate_string(query, "query", max_length: 1000)
  validated_lang = validate_enum(lang, %w[ru en], "lang").to_s

  # Check cache first
  cache_key = "classification:search:#{validated_classifier}:" \
              "#{validated_query}:#{validated_lang}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload
  payload = {
    query: validated_query,
    lang: validated_lang
  }

  # Make a POST request to the classification search endpoint
  result = post("/patsearch/v0.2/classification/#{validated_classifier}/search", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#datasets_treeArray<Hash>

Get the list of available search datasets (collections)

Returns:

  • (Array<Hash>)

    List of available datasets organized in a tree structure



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/rospatent/client.rb', line 190

def datasets_tree
  # Check cache first
  cache_key = "datasets:tree"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Make the API request
  result = get("/patsearch/v0.2/datasets/tree")

  # Cache the result for longer since datasets don't change often
  @cache.set(cache_key, result, ttl: 3600) # Cache for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#get(endpoint, params = {}) ⇒ Hash

Execute a GET request to the API

Parameters:

  • endpoint (String)

    API endpoint

  • params (Hash) (defaults to: {})

    Query parameters (optional)

Returns:

  • (Hash)

    Response data



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
# File 'lib/rospatent/client.rb', line 390

def get(endpoint, params = {})
  start_time = Time.now
  request_id = generate_request_id

  @logger.log_request("GET", endpoint, params, connection.headers)
  @request_count += 1

  response = connection.get(endpoint, params) do |req|
    req.headers["Accept"] = "application/json"
    req.headers["Content-Type"] = "application/json"
    req.headers["X-Request-ID"] = request_id
  end

  duration = Time.now - start_time
  @total_duration += duration

  @logger.log_response("GET", endpoint, response.status, duration,
                       response_size: response.body&.bytesize, request_id: request_id)

  handle_response(response, request_id)
rescue Faraday::Error => e
  @logger.log_error(e, { endpoint: endpoint, params: params, request_id: request_id })
  handle_error(e)
end

#parse_abstract(patent_data, format: :text, language: "ru") ⇒ String?

Extract and parse the abstract content from a patent document Delegates to PatentParser.parse_abstract

Examples:

Get plain text abstract

abstract = client.parse_abstract(patent_doc)

Get HTML abstract in English

abstract_html = client.parse_abstract(patent_doc, format: :html, language: "en")

Parameters:

  • patent_data (Hash)

    The patent document data returned by #patent method

  • format (Symbol) (defaults to: :text)

    The desired output format (:text or :html)

  • language (String) (defaults to: "ru")

    The language code (e.g., “ru”, “en”)

Returns:

  • (String, nil)

    The parsed abstract content in the requested format or nil if not found



276
277
278
279
280
281
282
# File 'lib/rospatent/client.rb', line 276

def parse_abstract(patent_data, format: :text, language: "ru")
  # Validate inputs
  validate_enum(format, %i[text html], "format")
  validate_string(language, "language", max_length: 5) if language

  PatentParser.parse_abstract(patent_data, format: format, language: language)
end

#parse_description(patent_data, format: :text, language: "ru") ⇒ String, ...

Extract and parse the description content from a patent document Delegates to PatentParser.parse_description

Examples:

Get plain text description

description = client.parse_description(patent_doc)

Get HTML description

description_html = client.parse_description(patent_doc, format: :html)

Get description split into sections

sections = client.parse_description(patent_doc, format: :sections)

Parameters:

  • patent_data (Hash)

    The patent document data returned by #patent method

  • format (Symbol) (defaults to: :text)

    The desired output format (:text, :html, or :sections)

  • language (String) (defaults to: "ru")

    The language code (e.g., “ru”, “en”)

Returns:

  • (String, Array, nil)

    The parsed description content in the requested format or nil if not found



297
298
299
300
301
302
303
# File 'lib/rospatent/client.rb', line 297

def parse_description(patent_data, format: :text, language: "ru")
  # Validate inputs
  validate_enum(format, %i[text html sections], "format")
  validate_string(language, "language", max_length: 5) if language

  PatentParser.parse_description(patent_data, format: format, language: language)
end

#patent(document_id) ⇒ Hash

Fetch a specific patent by its document ID using dedicated endpoint The document_id must follow one of these formats:

  • Published documents: codenumbertype code_ date YYYYMMDD Example: RU134694U1_20131120

  • Unpublished applications: codenumbertype code_ date YYYYMMDD

Parameters:

  • document_id (String)

    The document ID to retrieve

Returns:

  • (Hash)

    The patent document data

Raises:



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rospatent/client.rb', line 55

def patent(document_id)
  # Validate input
  validated_id = validate_patent_id(document_id)

  # Check cache first
  cache_key = "patent:#{validated_id}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Make a GET request to the docs endpoint
  result = get("/patsearch/v0.2/docs/#{validated_id}")

  # Cache the result
  @cache.set(cache_key, result, ttl: 3600) # Cache patents for 1 hour
  @logger.log_cache("set", cache_key, ttl: 3600)

  result
end

#patent_by_components(country_code, number, doc_type, date) ⇒ Hash

Retrieve document by document components

Parameters:

  • country_code (String)

    Country code (e.g., “RU”)

  • number (String)

    Patent number

  • doc_type (String)

    Document type (e.g., “A1”)

  • date (String, Date)

    Publication date

Returns:

  • (Hash)

    Document data

Raises:



86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/rospatent/client.rb', line 86

def patent_by_components(country_code, number, doc_type, date)
  # Validate and normalize inputs
  validated_country = validate_string(country_code, "country_code", max_length: 2)
  validated_number = validate_string(number, "number")
  validated_doc_type = validate_string(doc_type, "doc_type", max_length: 3)
  validated_date = validate_date(date, "date")

  formatted_date = validated_date.strftime("%Y%m%d")
  document_id = "#{validated_country}#{validated_number}#{validated_doc_type}_#{formatted_date}"

  patent(document_id)
end

#patent_media(collection_id, country_code, doc_type, pub_date, pub_number, filename) ⇒ String

Retrieve media data (PDF, images, 3D objects) for a patent document

Parameters:

  • collection_id (String)

    Dataset/collection identifier (e.g., “National”)

  • country_code (String)

    Country code of publication (e.g., “RU”)

  • doc_type (String)

    Document type code (e.g., “U1”)

  • pub_date (String, Date)

    Publication date in format YYYY/MM/DD

  • pub_number (String)

    Publication number

  • filename (String)

    Media file name (e.g., “document.pdf”)

Returns:

  • (String)

    Binary content of the requested file

Raises:



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/rospatent/client.rb', line 220

def patent_media(collection_id, country_code, doc_type, pub_date, pub_number,
                 filename)
  # Validate and normalize inputs
  validated_collection = validate_required_string(collection_id, "collection_id")
  validated_country = validate_required_string(country_code, "country_code", max_length: 2)
  validated_doc_type = validate_required_string(doc_type, "doc_type", max_length: 3)
  validated_date = validate_required_date(pub_date, "pub_date")
  validated_number = validate_required_string(pub_number, "pub_number")
  validated_filename = validate_required_string(filename, "filename")

  # Format publication date
  formatted_date = validated_date.strftime("%Y/%m/%d")

  # Construct the path
  path = "/media/#{validated_collection}/#{validated_country}/" \
         "#{validated_doc_type}/#{formatted_date}/#{validated_number}/" \
         "#{validated_filename}"

  # Make a GET request to retrieve the media file
  get(path)
end

#patent_media_by_id(document_id, collection_id, filename) ⇒ String

Simplified method to retrieve media data by patent ID and collection ID

Parameters:

  • document_id (String)

    The patent document ID (e.g., “RU134694U1_20131120”)

  • collection_id (String)

    Dataset/collection identifier (e.g., “National”)

  • filename (String)

    Media file name (e.g., “document.pdf”)

Returns:

  • (String)

    Binary content of the requested file

Raises:



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/rospatent/client.rb', line 249

def patent_media_by_id(document_id, collection_id, filename)
  # Validate inputs
  validated_id = validate_patent_id(document_id)
  validated_collection = validate_required_string(collection_id, "collection_id")
  validated_filename = validate_required_string(filename, "filename")

  # Parse the patent ID to extract components
  id_parts = parse_patent_id(validated_id)

  # Format the date from YYYYMMDD to YYYY/MM/DD
  formatted_date = id_parts[:date].gsub(/^(\d{4})(\d{2})(\d{2})$/, '\1/\2/\3')

  # Call the base method with extracted components
  patent_media(validated_collection, id_parts[:country_code], id_parts[:doc_type],
               formatted_date, id_parts[:number], validated_filename)
end

#post(endpoint, payload) ⇒ Hash

Execute a POST request to the API

Parameters:

  • endpoint (String)

    API endpoint

  • payload (Hash)

    Request payload

Returns:

  • (Hash)

    Response data



419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/rospatent/client.rb', line 419

def post(endpoint, payload)
  start_time = Time.now
  request_id = generate_request_id

  @logger.log_request("POST", endpoint, payload, connection.headers)
  @request_count += 1

  response = connection.post(endpoint) do |req|
    req.headers["Accept"] = "application/json"
    req.headers["Content-Type"] = "application/json"
    req.headers["X-Request-ID"] = request_id
    req.body = payload.to_json
  end

  duration = Time.now - start_time
  @total_duration += duration

  @logger.log_response("POST", endpoint, response.status, duration,
                       response_size: response.body&.bytesize, request_id: request_id)

  handle_response(response, request_id)
rescue Faraday::Error => e
  @logger.log_error(e, { endpoint: endpoint, payload: payload, request_id: request_id })
  handle_error(e)
end

#search(**params) ⇒ Rospatent::SearchResult

Execute a search against the Rospatent API

Parameters:

  • params (Hash)

    Search parameters

Returns:



37
38
39
40
41
# File 'lib/rospatent/client.rb', line 37

def search(**params)
  # Validate search parameters
  validated_params = validate_search_params(params)
  Search.new(self).execute(**validated_params)
end

#similar_patents_by_id(document_id, count: 100) ⇒ Hash

Find patents similar to a given document ID This method uses the Rospatent API’s similar search endpoint to find patents similar to the given document ID. The document ID should be in the format ‘XX12345Y1_YYYYMMDD’, where ‘XX’ is the country code, ‘12345’ is the publication number, ‘Y1’ is the document type, and ‘YYYYMMDD’ is the publication date.

The method returns a hash containing the similar search results, which includes the patent IDs, titles, and other relevant information.

If the document ID is not provided, the method raises an InvalidRequestError. If the API request fails, the method raises an ApiError.

Parameters:

  • document_id (String)

    The document ID to find similar patents to

  • count (Integer) (defaults to: 100)

    Maximum number of results to return (default: 100)

Returns:

  • (Hash)

    The similar search results

Raises:



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/rospatent/client.rb', line 117

def similar_patents_by_id(document_id, count: 100)
  # Validate inputs
  validated_id = validate_patent_id(document_id)
  validated_count = validate_positive_integer(count, "count", max_value: 1000)

  # Check cache first
  cache_key = "similar:id:#{validated_id}:#{validated_count}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload according to API spec
  payload = {
    type_search: "id_search",
    pat_id: validated_id,
    count: validated_count
  }

  # Make the API request with redirect handling
  result = post_with_redirects("/patsearch/v0.2/similar_search", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#similar_patents_by_text(text, count: 100) ⇒ Hash

Find patents similar to a given text

Parameters:

  • text (String)

    The text to find similar patents to (minimum 50 words required)

  • count (Integer) (defaults to: 100)

    Maximum number of results to return (default: 100)

Returns:

  • (Hash)

    The similar search results

Raises:



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/rospatent/client.rb', line 154

def similar_patents_by_text(text, count: 100)
  # Validate inputs - text must have at least 50 words for the API
  validated_text = validate_text_with_word_count(text, "search_text", min_words: 50,
                                                                      max_length: 10_000)
  validated_count = validate_positive_integer(count, "count", max_value: 1000)

  # Check cache first (using hash of text for key)
  text_hash = validated_text.hash.abs.to_s(16)
  cache_key = "similar:text:#{text_hash}:#{validated_count}"
  cached_result = @cache.get(cache_key)
  if cached_result
    @logger.log_cache("hit", cache_key)
    return cached_result
  end

  @logger.log_cache("miss", cache_key)

  # Build the payload according to API spec
  payload = {
    type_search: "text_search",
    pat_text: validated_text,
    count: validated_count
  }

  # Make the API request with redirect handling
  result = post_with_redirects("/patsearch/v0.2/similar_search", payload)

  # Cache the result
  @cache.set(cache_key, result, ttl: 1800) # Cache for 30 minutes
  @logger.log_cache("set", cache_key, ttl: 1800)

  result
end

#statisticsHash

Get client statistics

Returns:

  • (Hash)

    Client usage statistics



471
472
473
474
475
476
477
478
479
480
481
482
# File 'lib/rospatent/client.rb', line 471

def statistics
  {
    requests_made: @request_count,
    total_duration_seconds: @total_duration.round(3),
    average_request_time: if @request_count.positive?
                            (@total_duration / @request_count).round(3)
                          else
                            0
                          end,
    cache_stats: @cache.statistics
  }
end