Kreuzcrawl.CrawlConfig (kreuzcrawl v0.3.0-rc.43)

Copy Markdown

Configuration for crawl, scrape, and map operations.

Summary

Types

t()

Configuration for crawl, scrape, and map operations.

Types

t()

@type t() :: %Kreuzcrawl.CrawlConfig{
  allow_subdomains: boolean(),
  asset_types: [String.t() | nil],
  auth: String.t() | nil | nil,
  browser: map(),
  browser_profile: String.t() | nil,
  capture_screenshot: boolean(),
  content: map(),
  cookies_enabled: boolean(),
  custom_headers: map(),
  document_max_size: non_neg_integer() | nil,
  document_mime_types: [String.t()],
  download_assets: boolean(),
  download_documents: boolean(),
  exclude_paths: [String.t()],
  include_paths: [String.t()],
  map_limit: non_neg_integer() | nil,
  map_search: String.t() | nil,
  max_asset_size: non_neg_integer() | nil,
  max_body_size: non_neg_integer() | nil,
  max_concurrent: non_neg_integer() | nil,
  max_depth: non_neg_integer() | nil,
  max_pages: non_neg_integer() | nil,
  max_redirects: non_neg_integer(),
  proxy: map() | nil,
  rate_limit_ms: non_neg_integer() | nil,
  remove_tags: [String.t()],
  request_timeout: non_neg_integer(),
  respect_robots_txt: boolean(),
  retry_codes: [non_neg_integer()],
  retry_count: non_neg_integer(),
  save_browser_profile: boolean(),
  soft_http_errors: boolean(),
  stay_on_domain: boolean(),
  user_agent: String.t() | nil,
  user_agents: [String.t()],
  warc_output: String.t() | nil
}

Configuration for crawl, scrape, and map operations.