Class: ContextDev::Models::WebWebCrawlMdParams

Inherits:
Internal::Type::BaseModel show all
Extended by:
Internal::Type::RequestParameters::Converter
Includes:
Internal::Type::RequestParameters
Defined in:
lib/context_dev/models/web_web_crawl_md_params.rb

Overview

Defined Under Namespace

Classes: Pdf

Instance Attribute Summary collapse

Attributes included from Internal::Type::RequestParameters

#request_options

Method Summary

Methods included from Internal::Type::RequestParameters::Converter

dump_request

Methods included from Internal::Type::RequestParameters

included

Methods inherited from Internal::Type::BaseModel

==, #==, #[], coerce, #deconstruct_keys, #deep_to_h, dump, fields, hash, #hash, inherited, #initialize, inspect, #inspect, known_fields, optional, recursively_to_h, required, #to_h, #to_json, #to_s, to_sorbet_type, #to_yaml

Methods included from Internal::Type::Converter

#coerce, coerce, #dump, dump, #inspect, inspect, meta_info, new_coerce_state, type_info

Methods included from Internal::Util::SorbetRuntimeSupport

#const_missing, #define_sorbet_constant!, #sorbet_constant_defined?, #to_sorbet_type, to_sorbet_type

Constructor Details

This class inherits a constructor from ContextDev::Internal::Type::BaseModel

Instance Attribute Details

#follow_subdomainsBoolean?

When true, follow links on subdomains of the starting URL’s domain (e.g. docs.example.com when starting from example.com). www and apex are always treated as equivalent.

Returns:

  • (Boolean, nil)


22
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 22

optional :follow_subdomains, ContextDev::Internal::Type::Boolean, api_name: :followSubdomains

#include_framesBoolean?

When true, the contents of iframes are rendered to Markdown for each crawled page.

Returns:

  • (Boolean, nil)


29
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 29

optional :include_frames, ContextDev::Internal::Type::Boolean, api_name: :includeFrames

#include_imagesBoolean?

Include image references in the Markdown output

Returns:

  • (Boolean, nil)


35
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 35

optional :include_images, ContextDev::Internal::Type::Boolean, api_name: :includeImages

Preserve hyperlinks in the Markdown output

Returns:

  • (Boolean, nil)


41
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 41

optional :include_links, ContextDev::Internal::Type::Boolean, api_name: :includeLinks

#max_age_msInteger?

Return a cached result if a prior scrape for the same parameters exists and is younger than this many milliseconds. Defaults to 1 day (86400000 ms) when omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.

Returns:

  • (Integer, nil)


49
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 49

optional :max_age_ms, Integer, api_name: :maxAgeMs

#max_depthInteger?

Maximum link depth from the starting URL (0 = only the starting page)

Returns:

  • (Integer, nil)


55
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 55

optional :max_depth, Integer, api_name: :maxDepth

#max_pagesInteger?

Maximum number of pages to crawl. Hard cap: 500.

Returns:

  • (Integer, nil)


61
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 61

optional :max_pages, Integer, api_name: :maxPages

#pdfContextDev::Models::WebWebCrawlMdParams::Pdf?

PDF parsing controls. Use start/end to limit text extraction and OCR to an inclusive 1-based page range.



68
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 68

optional :pdf, -> { ContextDev::WebWebCrawlMdParams::Pdf }

#shorten_base64_imagesBoolean?

Truncate base64-encoded image data in the Markdown output

Returns:

  • (Boolean, nil)


74
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 74

optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images

#stop_after_msInteger?

Soft time budget for the crawl in milliseconds. After each scrape, the crawler checks the elapsed time and, if exceeded, returns the pages collected so far instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2 min).

Returns:

  • (Integer, nil)


83
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 83

optional :stop_after_ms, Integer, api_name: :stopAfterMs

#timeout_msInteger?

Optional timeout in milliseconds for the request. If the request takes longer than this value, it will be aborted with a 408 status code. Maximum allowed value is 300000ms (5 minutes).

Returns:

  • (Integer, nil)


91
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 91

optional :timeout_ms, Integer, api_name: :timeoutMS

#urlString

The starting URL for the crawl (must include http:// or https:// protocol)

Returns:

  • (String)


14
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 14

required :url, String

#url_regexString?

Regex pattern. Only URLs matching this pattern will be followed and scraped.

Returns:

  • (String, nil)


97
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 97

optional :url_regex, String, api_name: :urlRegex

#use_main_content_onlyBoolean?

Extract only the main content, stripping headers, footers, sidebars, and navigation

Returns:

  • (Boolean, nil)


104
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 104

optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly

#wait_for_msInteger?

Optional browser wait time in milliseconds after initial page load for each crawled page. Min: 0. Max: 30000 (30 seconds).

Returns:

  • (Integer, nil)


111
# File 'lib/context_dev/models/web_web_crawl_md_params.rb', line 111

optional :wait_for_ms, Integer, api_name: :waitForMs