Class: Retab::Extractions

Inherits:
Object
  • Object
show all
Defined in:
lib/retab/extractions.rb

Instance Method Summary collapse

Constructor Details

#initialize(client) ⇒ Extractions

Returns a new instance of Extractions.



9
10
11
# File 'lib/retab/extractions.rb', line 9

def initialize(client)
  @client = client
end

Instance Method Details

#create(document:, json_schema:, model: nil, image_resolution_dpi: nil, instructions: nil, n_consensus: nil, metadata: nil, additional_messages: nil, bust_cache: nil, stream: nil, request_options: {}) ⇒ Retab::Extraction

Create Extraction

Parameters:

  • document (Retab::MimeData, Pathname, IO, String, Hash)
  • json_schema (Hash{String => Object})

    JSON schema describing the structured output

  • model (String, nil) (defaults to: nil)

    The model to use for the extraction

  • image_resolution_dpi (Integer, nil) (defaults to: nil)

    Resolution of the image sent to the LLM

  • instructions (String, nil) (defaults to: nil)

    Free-form instructions appended to the system prompt to steer the extraction.

  • n_consensus (Integer, nil) (defaults to: nil)

    Number of consensus extraction runs to perform. Uses deterministic single-pass when set to 1.

  • metadata (Hash{String => String}, nil) (defaults to: nil)

    User-defined metadata to associate with this extraction

  • additional_messages (Array<Hash{String => Object}>, nil) (defaults to: nil)

    Additional chat messages forwarded to the extraction model.

  • bust_cache (Boolean, nil) (defaults to: nil)

    If true, skip the LLM cache and force a fresh completion

  • stream (Boolean, nil) (defaults to: nil)
  • request_options (Hash) (defaults to: {})

    (see Retab::Types::RequestOptions)

Returns:



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/retab/extractions.rb', line 98

def create(
  document:,
  json_schema:,
  model: nil,
  image_resolution_dpi: nil,
  instructions: nil,
  n_consensus: nil,
  metadata: nil,
  additional_messages: nil,
  bust_cache: nil,
  stream: nil,
  request_options: {}
)
  document = Retab::MimeData.coerce(document) unless document.nil?
  body = {
    'document' => document,
    'json_schema' => json_schema,
    'model' => model,
    'image_resolution_dpi' => image_resolution_dpi,
    'instructions' => instructions,
    'n_consensus' => n_consensus,
    'metadata' => ,
    'additional_messages' => additional_messages,
    'bust_cache' => bust_cache,
    'stream' => stream
  }.compact
  response = @client.request(
    method: :post,
    path: '/v1/extractions',
    auth: true,
    body: body,
    request_options: request_options
  )
  result = Retab::Extraction.new(response.body)
  result.last_response = Retab::Types::ApiResponse.new(http_status: response.code.to_i, http_headers: response.each_header.to_h, request_id: response["x-request-id"])
  result
end

#delete(extraction_id:, request_options: {}) ⇒ void

This method returns an undefined value.

Delete Extraction

Parameters:

  • extraction_id (String)
  • request_options (Hash) (defaults to: {})

    (see Retab::Types::RequestOptions)



159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/retab/extractions.rb', line 159

def delete(
  extraction_id:,
  request_options: {}
)
  response = @client.request(
    method: :delete,
    path: "/v1/extractions/#{Retab::Util.encode_path(extraction_id)}",
    auth: true,
    request_options: request_options
  )
  nil
end

#get(extraction_id:, request_options: {}) ⇒ Retab::Extraction

Get Extraction

Parameters:

  • extraction_id (String)
  • request_options (Hash) (defaults to: {})

    (see Retab::Types::RequestOptions)

Returns:



140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/retab/extractions.rb', line 140

def get(
  extraction_id:,
  request_options: {}
)
  response = @client.request(
    method: :get,
    path: "/v1/extractions/#{Retab::Util.encode_path(extraction_id)}",
    auth: true,
    request_options: request_options
  )
  result = Retab::Extraction.new(response.body)
  result.last_response = Retab::Types::ApiResponse.new(http_status: response.code.to_i, http_headers: response.each_header.to_h, request_id: response["x-request-id"])
  result
end

#list(before: nil, after: nil, limit: 10, order: 'desc', filename: nil, filename_regex: nil, filename_contains: nil, document_type: nil, from_date: nil, to_date: nil, metadata: nil, request_options: {}) ⇒ Retab::Types::ListStruct<Retab::Extraction>

List Extractions

Parameters:

  • before (String, nil) (defaults to: nil)
  • after (String, nil) (defaults to: nil)
  • limit (Integer, nil) (defaults to: 10)
  • order (Retab::Types::ExtractionsOrder, nil) (defaults to: 'desc')
  • filename (String, nil) (defaults to: nil)
  • filename_regex (String, nil) (defaults to: nil)

    Deprecated alias for prefix filename filtering. Regex patterns are rejected.

  • filename_contains (String, nil) (defaults to: nil)

    Plain text filename text search powered by Atlas Search when available.

  • document_type (Array<String>, nil) (defaults to: nil)

    Filter by document type. Can be repeated. Accepted values: bmp, csv, doc, docm, docx, dotm, dotx, eml, gif, heic, heif, htm, html, jpeg, jpg, json, md, mhtml, msg, odp, ods, odt, ots, ott, pdf, png, ppt, pptx, rtf, svg, tif, tiff, tsv, txt, webp, xlam, xls, xlsb, xlsm, xlsx, xltm, xltx, xml, yaml, yml.

  • from_date (String, nil) (defaults to: nil)
  • to_date (String, nil) (defaults to: nil)
  • metadata (String, nil) (defaults to: nil)
  • request_options (Hash) (defaults to: {})

    (see Retab::Types::RequestOptions)

Returns:



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/retab/extractions.rb', line 27

def list(
  before: nil,
  after: nil,
  limit: 10,
  order: 'desc',
  filename: nil,
  filename_regex: nil,
  filename_contains: nil,
  document_type: nil,
  from_date: nil,
  to_date: nil,
  metadata: nil,
  request_options: {}
)
  params = {
    'before' => before,
    'after' => after,
    'limit' => limit,
    'order' => order,
    'filename' => filename,
    'filename_regex' => filename_regex,
    'filename_contains' => filename_contains,
    'document_type' => document_type,
    'from_date' => from_date,
    'to_date' => to_date,
    'metadata' => 
  }.compact
  response = @client.request(
    method: :get,
    path: '/v1/extractions',
    auth: true,
    params: params,
    request_options: request_options
  )
  fetch_next = ->(cursor) {
    list(
      before: before,
      after: cursor,
      limit: limit,
      order: order,
      filename: filename,
      filename_regex: filename_regex,
      filename_contains: filename_contains,
      document_type: document_type,
      from_date: from_date,
      to_date: to_date,
      metadata: ,
      request_options: request_options
    )
  }
  Retab::Types::ListStruct.from_response(
    response,
    model: Retab::Extraction,
    filters: { before: before, limit: limit, order: order, filename: filename, filename_regex: filename_regex, filename_contains: filename_contains, document_type: document_type, from_date: from_date, to_date: to_date, metadata:  },
    fetch_next: fetch_next
  )
end

#sources(extraction_id:, request_options: {}) ⇒ Retab::SourcesResponse

Get Extraction Sources

Parameters:

  • extraction_id (String)
  • request_options (Hash) (defaults to: {})

    (see Retab::Types::RequestOptions)

Returns:



176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/retab/extractions.rb', line 176

def sources(
  extraction_id:,
  request_options: {}
)
  response = @client.request(
    method: :get,
    path: "/v1/extractions/#{Retab::Util.encode_path(extraction_id)}/sources",
    auth: true,
    request_options: request_options
  )
  result = Retab::SourcesResponse.new(response.body)
  result.last_response = Retab::Types::ApiResponse.new(http_status: response.code.to_i, http_headers: response.each_header.to_h, request_id: response["x-request-id"])
  result
end