From 6191590a10a54703475ed0628eca5235ee5ce7bf Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 11:59:57 -0700 Subject: [PATCH 1/6] feat(brightdata): add Bright Data integration with 8 tools Add complete Bright Data integration supporting Web Unlocker, SERP API, Discover API, and Web Scraper dataset operations. Includes scrape URL, SERP search, discover, sync scrape, scrape dataset, snapshot status, download snapshot, and cancel snapshot tools. Co-Authored-By: Claude Opus 4.6 --- apps/docs/components/icons.tsx | 15 + apps/docs/components/ui/icon-mapping.ts | 2 + .../docs/content/docs/en/tools/brightdata.mdx | 201 ++++++++++ apps/docs/content/docs/en/tools/meta.json | 1 + .../integrations/data/icon-mapping.ts | 2 + .../integrations/data/integrations.json | 51 +++ apps/sim/blocks/blocks/brightdata.ts | 346 ++++++++++++++++++ apps/sim/blocks/registry.ts | 2 + apps/sim/components/icons.tsx | 15 + apps/sim/tools/brightdata/cancel_snapshot.ts | 67 ++++ apps/sim/tools/brightdata/discover.ts | 158 ++++++++ .../sim/tools/brightdata/download_snapshot.ts | 116 ++++++ apps/sim/tools/brightdata/index.ts | 19 + apps/sim/tools/brightdata/scrape_dataset.ts | 97 +++++ apps/sim/tools/brightdata/scrape_url.ts | 103 ++++++ apps/sim/tools/brightdata/serp_search.ts | 181 +++++++++ apps/sim/tools/brightdata/snapshot_status.ts | 74 ++++ apps/sim/tools/brightdata/sync_scrape.ts | 131 +++++++ apps/sim/tools/brightdata/types.ts | 145 ++++++++ apps/sim/tools/registry.ts | 18 + 20 files changed, 1744 insertions(+) create mode 100644 apps/docs/content/docs/en/tools/brightdata.mdx create mode 100644 apps/sim/blocks/blocks/brightdata.ts create mode 100644 apps/sim/tools/brightdata/cancel_snapshot.ts create mode 100644 apps/sim/tools/brightdata/discover.ts create mode 100644 apps/sim/tools/brightdata/download_snapshot.ts create mode 100644 apps/sim/tools/brightdata/index.ts create mode 100644 apps/sim/tools/brightdata/scrape_dataset.ts create mode 100644 apps/sim/tools/brightdata/scrape_url.ts create mode 100644 apps/sim/tools/brightdata/serp_search.ts create mode 100644 apps/sim/tools/brightdata/snapshot_status.ts create mode 100644 apps/sim/tools/brightdata/sync_scrape.ts create mode 100644 apps/sim/tools/brightdata/types.ts diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index bde3013c6a..d7ae05105d 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -2087,6 +2087,21 @@ export function BrandfetchIcon(props: SVGProps) { ) } +export function BrightDataIcon(props: SVGProps) { + return ( + + + + + ) +} + export function BrowserUseIcon(props: SVGProps) { return ( = { attio: AttioIcon, box: BoxCompanyIcon, brandfetch: BrandfetchIcon, + brightdata: BrightDataIcon, browser_use: BrowserUseIcon, calcom: CalComIcon, calendly: CalendlyIcon, diff --git a/apps/docs/content/docs/en/tools/brightdata.mdx b/apps/docs/content/docs/en/tools/brightdata.mdx new file mode 100644 index 0000000000..49ede0ef4a --- /dev/null +++ b/apps/docs/content/docs/en/tools/brightdata.mdx @@ -0,0 +1,201 @@ +--- +title: Bright Data +description: Scrape websites, search engines, and extract structured data +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Usage Instructions + +Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction. + + + +## Tools + +### `brightdata_scrape_url` + +Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `zone` | string | Yes | Web Unlocker zone name from your Bright Data dashboard \(e.g., "web_unlocker1"\) | +| `url` | string | Yes | The URL to scrape \(e.g., "https://example.com/page"\) | +| `format` | string | No | Response format: "raw" for HTML or "json" for parsed content. Defaults to "raw" | +| `country` | string | No | Two-letter country code for geo-targeting \(e.g., "us", "gb"\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `content` | string | The scraped page content \(HTML or JSON depending on format\) | +| `url` | string | The URL that was scraped | +| `statusCode` | number | HTTP status code of the response | + +### `brightdata_serp_search` + +Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `zone` | string | Yes | SERP API zone name from your Bright Data dashboard \(e.g., "serp_api1"\) | +| `query` | string | Yes | The search query \(e.g., "best project management tools"\) | +| `searchEngine` | string | No | Search engine to use: "google", "bing", "duckduckgo", or "yandex". Defaults to "google" | +| `country` | string | No | Two-letter country code for localized results \(e.g., "us", "gb"\) | +| `language` | string | No | Two-letter language code \(e.g., "en", "es"\) | +| `numResults` | number | No | Number of results to return \(e.g., 10, 20\). Defaults to 10 | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `results` | array | Array of search results | +| ↳ `title` | string | Title of the search result | +| ↳ `url` | string | URL of the search result | +| ↳ `description` | string | Snippet or description of the result | +| ↳ `rank` | number | Position in search results | +| `query` | string | The search query that was executed | +| `searchEngine` | string | The search engine that was used | + +### `brightdata_discover` + +AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `query` | string | Yes | The search query \(e.g., "competitor pricing changes enterprise plan"\) | +| `numResults` | number | No | Number of results to return, up to 1000. Defaults to 10 | +| `intent` | string | No | Describes what the agent is trying to accomplish, used to rank results by relevance \(e.g., "find official pricing pages and change notes"\) | +| `includeContent` | boolean | No | Whether to include cleaned page content in results | +| `format` | string | No | Response format: "json" or "markdown". Defaults to "json" | +| `language` | string | No | Search language code \(e.g., "en", "es", "fr"\). Defaults to "en" | +| `country` | string | No | Two-letter ISO country code for localized results \(e.g., "us", "gb"\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `results` | array | Array of discovered web results ranked by intent relevance | +| ↳ `url` | string | URL of the discovered page | +| ↳ `title` | string | Page title | +| ↳ `description` | string | Page description or snippet | +| ↳ `relevanceScore` | number | AI-calculated relevance score for intent-based ranking | +| ↳ `content` | string | Cleaned page content in the requested format \(when includeContent is true\) | +| `query` | string | The search query that was executed | +| `totalResults` | number | Total number of results returned | + +### `brightdata_sync_scrape` + +Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) | +| `urls` | string | Yes | JSON array of URL objects to scrape, up to 20 \(e.g., \[\{"url": "https://example.com/product"\}\]\) | +| `format` | string | No | Output format: "json", "ndjson", or "csv". Defaults to "json" | +| `includeErrors` | boolean | No | Whether to include error reports in results | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `data` | array | Array of scraped result objects with fields specific to the dataset scraper used | +| `snapshotId` | string | Snapshot ID returned if the request exceeded the 1-minute timeout and switched to async processing | +| `isAsync` | boolean | Whether the request fell back to async mode \(true means use snapshot ID to retrieve results\) | + +### `brightdata_scrape_dataset` + +Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) | +| `urls` | string | Yes | JSON array of URL objects to scrape \(e.g., \[\{"url": "https://example.com/product"\}\]\) | +| `format` | string | No | Output format: "json" or "csv". Defaults to "json" | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `snapshotId` | string | The snapshot ID to retrieve results later | +| `status` | string | Status of the scraping job \(e.g., "triggered", "running"\) | + +### `brightdata_snapshot_status` + +Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `snapshotId` | string | The snapshot ID that was queried | +| `datasetId` | string | The dataset ID associated with this snapshot | +| `status` | string | Current status of the snapshot: "starting", "running", "ready", or "failed" | + +### `brightdata_download_snapshot` + +Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) | +| `format` | string | No | Output format: "json", "ndjson", "jsonl", or "csv". Defaults to "json" | +| `compress` | boolean | No | Whether to compress the results | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `data` | array | Array of scraped result records | +| `format` | string | The content type of the downloaded data | +| `snapshotId` | string | The snapshot ID that was downloaded | + +### `brightdata_cancel_snapshot` + +Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress. + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `apiKey` | string | Yes | Bright Data API token | +| `snapshotId` | string | Yes | The snapshot ID of the collection to cancel \(e.g., "s_m4x7enmven8djfqak"\) | + +#### Output + +| Parameter | Type | Description | +| --------- | ---- | ----------- | +| `snapshotId` | string | The snapshot ID that was cancelled | +| `cancelled` | boolean | Whether the cancellation was successful | + + diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json index d11a49bfd7..6f77170985 100644 --- a/apps/docs/content/docs/en/tools/meta.json +++ b/apps/docs/content/docs/en/tools/meta.json @@ -18,6 +18,7 @@ "attio", "box", "brandfetch", + "brightdata", "browser_use", "calcom", "calendly", diff --git a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts index 1eb1d47659..0dec44c268 100644 --- a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts +++ b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts @@ -23,6 +23,7 @@ import { BoxCompanyIcon, BrainIcon, BrandfetchIcon, + BrightDataIcon, BrowserUseIcon, CalComIcon, CalendlyIcon, @@ -215,6 +216,7 @@ export const blockTypeToIconMap: Record = { attio: AttioIcon, box: BoxCompanyIcon, brandfetch: BrandfetchIcon, + brightdata: BrightDataIcon, browser_use: BrowserUseIcon, calcom: CalComIcon, calendly: CalendlyIcon, diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index ec437abe8d..ac09e122fc 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -1743,6 +1743,57 @@ "integrationTypes": ["sales", "analytics"], "tags": ["enrichment", "marketing"] }, + { + "type": "brightdata", + "slug": "bright-data", + "name": "Bright Data", + "description": "Scrape websites, search engines, and extract structured data", + "longDescription": "Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.", + "bgColor": "#FFFFFF", + "iconName": "BrightDataIcon", + "docsUrl": "https://docs.sim.ai/tools/brightdata", + "operations": [ + { + "name": "Scrape URL", + "description": "Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically." + }, + { + "name": "SERP Search", + "description": "Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API." + }, + { + "name": "Discover", + "description": "AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification." + }, + { + "name": "Sync Scrape", + "description": "Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout." + }, + { + "name": "Scrape Dataset", + "description": "Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more." + }, + { + "name": "Snapshot Status", + "description": "Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed." + }, + { + "name": "Download Snapshot", + "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status " + }, + { + "name": "Cancel Snapshot", + "description": "Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress." + } + ], + "operationCount": 8, + "triggers": [], + "triggerCount": 0, + "authType": "api-key", + "category": "tools", + "integrationTypes": ["search", "developer-tools"], + "tags": ["web-scraping", "automation"] + }, { "type": "browser_use", "slug": "browser-use", diff --git a/apps/sim/blocks/blocks/brightdata.ts b/apps/sim/blocks/blocks/brightdata.ts new file mode 100644 index 0000000000..ffc0dc1c38 --- /dev/null +++ b/apps/sim/blocks/blocks/brightdata.ts @@ -0,0 +1,346 @@ +import { BrightDataIcon } from '@/components/icons' +import type { BlockConfig } from '@/blocks/types' +import { AuthMode, IntegrationType } from '@/blocks/types' +import type { BrightDataResponse } from '@/tools/brightdata/types' + +export const BrightDataBlock: BlockConfig = { + type: 'brightdata', + name: 'Bright Data', + description: 'Scrape websites, search engines, and extract structured data', + authMode: AuthMode.ApiKey, + longDescription: + 'Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.', + docsLink: 'https://docs.sim.ai/tools/brightdata', + category: 'tools', + integrationType: IntegrationType.Search, + tags: ['web-scraping', 'automation'], + bgColor: '#FFFFFF', + icon: BrightDataIcon, + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + options: [ + { label: 'Scrape URL', id: 'scrape_url' }, + { label: 'SERP Search', id: 'serp_search' }, + { label: 'Discover', id: 'discover' }, + { label: 'Sync Scrape', id: 'sync_scrape' }, + { label: 'Scrape Dataset', id: 'scrape_dataset' }, + { label: 'Snapshot Status', id: 'snapshot_status' }, + { label: 'Download Snapshot', id: 'download_snapshot' }, + { label: 'Cancel Snapshot', id: 'cancel_snapshot' }, + ], + value: () => 'scrape_url', + }, + { + id: 'zone', + title: 'Zone', + type: 'short-input', + placeholder: 'e.g., web_unlocker1', + condition: { field: 'operation', value: ['scrape_url', 'serp_search'] }, + required: { field: 'operation', value: ['scrape_url', 'serp_search'] }, + }, + { + id: 'url', + title: 'URL', + type: 'short-input', + placeholder: 'https://example.com/page', + condition: { field: 'operation', value: 'scrape_url' }, + required: { field: 'operation', value: 'scrape_url' }, + }, + { + id: 'format', + title: 'Format', + type: 'dropdown', + options: [ + { label: 'Raw HTML', id: 'raw' }, + { label: 'JSON', id: 'json' }, + ], + value: () => 'raw', + condition: { field: 'operation', value: 'scrape_url' }, + }, + { + id: 'country', + title: 'Country', + type: 'short-input', + placeholder: 'e.g., us, gb', + mode: 'advanced', + condition: { field: 'operation', value: ['scrape_url', 'serp_search', 'discover'] }, + }, + { + id: 'query', + title: 'Search Query', + type: 'short-input', + placeholder: 'e.g., best project management tools', + condition: { field: 'operation', value: 'serp_search' }, + required: { field: 'operation', value: 'serp_search' }, + }, + { + id: 'searchEngine', + title: 'Search Engine', + type: 'dropdown', + options: [ + { label: 'Google', id: 'google' }, + { label: 'Bing', id: 'bing' }, + { label: 'DuckDuckGo', id: 'duckduckgo' }, + { label: 'Yandex', id: 'yandex' }, + ], + value: () => 'google', + condition: { field: 'operation', value: 'serp_search' }, + }, + { + id: 'language', + title: 'Language', + type: 'short-input', + placeholder: 'e.g., en, es', + mode: 'advanced', + condition: { field: 'operation', value: ['serp_search', 'discover'] }, + }, + { + id: 'numResults', + title: 'Number of Results', + type: 'short-input', + placeholder: '10', + mode: 'advanced', + condition: { field: 'operation', value: ['serp_search', 'discover'] }, + }, + { + id: 'discoverQuery', + title: 'Search Query', + type: 'short-input', + placeholder: 'e.g., competitor pricing changes', + condition: { field: 'operation', value: 'discover' }, + required: { field: 'operation', value: 'discover' }, + }, + { + id: 'intent', + title: 'Intent', + type: 'long-input', + placeholder: + 'Describe what you are looking for (e.g., "find official pricing pages and change notes")', + condition: { field: 'operation', value: 'discover' }, + }, + { + id: 'includeContent', + title: 'Include Page Content', + type: 'switch', + mode: 'advanced', + condition: { field: 'operation', value: 'discover' }, + }, + { + id: 'contentFormat', + title: 'Response Format', + type: 'dropdown', + options: [ + { label: 'JSON', id: 'json' }, + { label: 'Markdown', id: 'markdown' }, + ], + value: () => 'json', + mode: 'advanced', + condition: { field: 'operation', value: 'discover' }, + }, + { + id: 'syncDatasetId', + title: 'Dataset ID', + type: 'short-input', + placeholder: 'e.g., gd_l1viktl72bvl7bjuj0', + condition: { field: 'operation', value: 'sync_scrape' }, + required: { field: 'operation', value: 'sync_scrape' }, + }, + { + id: 'syncUrls', + title: 'URLs (max 20)', + type: 'long-input', + placeholder: '[{"url": "https://example.com/product"}]', + condition: { field: 'operation', value: 'sync_scrape' }, + required: { field: 'operation', value: 'sync_scrape' }, + }, + { + id: 'syncFormat', + title: 'Output Format', + type: 'dropdown', + options: [ + { label: 'JSON', id: 'json' }, + { label: 'NDJSON', id: 'ndjson' }, + { label: 'CSV', id: 'csv' }, + ], + value: () => 'json', + condition: { field: 'operation', value: 'sync_scrape' }, + }, + { + id: 'datasetId', + title: 'Dataset ID', + type: 'short-input', + placeholder: 'e.g., gd_l1viktl72bvl7bjuj0', + condition: { field: 'operation', value: 'scrape_dataset' }, + required: { field: 'operation', value: 'scrape_dataset' }, + }, + { + id: 'urls', + title: 'URLs', + type: 'long-input', + placeholder: '[{"url": "https://example.com/product"}]', + condition: { field: 'operation', value: 'scrape_dataset' }, + required: { field: 'operation', value: 'scrape_dataset' }, + }, + { + id: 'datasetFormat', + title: 'Output Format', + type: 'dropdown', + options: [ + { label: 'JSON', id: 'json' }, + { label: 'CSV', id: 'csv' }, + ], + value: () => 'json', + condition: { field: 'operation', value: 'scrape_dataset' }, + }, + { + id: 'snapshotId', + title: 'Snapshot ID', + type: 'short-input', + placeholder: 'e.g., s_m4x7enmven8djfqak', + condition: { + field: 'operation', + value: ['snapshot_status', 'download_snapshot', 'cancel_snapshot'], + }, + required: { + field: 'operation', + value: ['snapshot_status', 'download_snapshot', 'cancel_snapshot'], + }, + }, + { + id: 'downloadFormat', + title: 'Download Format', + type: 'dropdown', + options: [ + { label: 'JSON', id: 'json' }, + { label: 'NDJSON', id: 'ndjson' }, + { label: 'CSV', id: 'csv' }, + ], + value: () => 'json', + condition: { field: 'operation', value: 'download_snapshot' }, + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input', + placeholder: 'Enter your Bright Data API token', + password: true, + required: true, + }, + ], + tools: { + access: [ + 'brightdata_scrape_url', + 'brightdata_serp_search', + 'brightdata_discover', + 'brightdata_sync_scrape', + 'brightdata_scrape_dataset', + 'brightdata_snapshot_status', + 'brightdata_download_snapshot', + 'brightdata_cancel_snapshot', + ], + config: { + tool: (params) => `brightdata_${params.operation}`, + params: (params) => { + const result: Record = { apiKey: params.apiKey } + + switch (params.operation) { + case 'scrape_url': + result.zone = params.zone + result.url = params.url + if (params.format) result.format = params.format + if (params.country) result.country = params.country + break + + case 'serp_search': + result.zone = params.zone + result.query = params.query + if (params.searchEngine) result.searchEngine = params.searchEngine + if (params.country) result.country = params.country + if (params.language) result.language = params.language + if (params.numResults) result.numResults = Number(params.numResults) + break + + case 'discover': + result.query = params.discoverQuery + if (params.numResults) result.numResults = Number(params.numResults) + if (params.intent) result.intent = params.intent + if (params.includeContent != null) result.includeContent = params.includeContent + if (params.contentFormat) result.format = params.contentFormat + if (params.language) result.language = params.language + if (params.country) result.country = params.country + break + + case 'sync_scrape': + result.datasetId = params.syncDatasetId + result.urls = params.syncUrls + if (params.syncFormat) result.format = params.syncFormat + break + + case 'scrape_dataset': + result.datasetId = params.datasetId + result.urls = params.urls + if (params.datasetFormat) result.format = params.datasetFormat + break + + case 'snapshot_status': + result.snapshotId = params.snapshotId + break + + case 'download_snapshot': + result.snapshotId = params.snapshotId + if (params.downloadFormat) result.format = params.downloadFormat + break + + case 'cancel_snapshot': + result.snapshotId = params.snapshotId + break + } + + return result + }, + }, + }, + inputs: { + operation: { type: 'string', description: 'Operation to perform' }, + apiKey: { type: 'string', description: 'Bright Data API token' }, + zone: { type: 'string', description: 'Bright Data zone name' }, + url: { type: 'string', description: 'URL to scrape' }, + format: { type: 'string', description: 'Response format' }, + country: { type: 'string', description: 'Country code for geo-targeting' }, + query: { type: 'string', description: 'Search query' }, + searchEngine: { type: 'string', description: 'Search engine to use' }, + language: { type: 'string', description: 'Language code' }, + numResults: { type: 'number', description: 'Number of results' }, + discoverQuery: { type: 'string', description: 'Discover search query' }, + intent: { type: 'string', description: 'Intent for ranking results' }, + includeContent: { type: 'boolean', description: 'Include page content in discover results' }, + contentFormat: { type: 'string', description: 'Content format for discover results' }, + syncDatasetId: { type: 'string', description: 'Dataset scraper ID for sync scrape' }, + syncUrls: { type: 'string', description: 'JSON array of URL objects for sync scrape' }, + syncFormat: { type: 'string', description: 'Output format for sync scrape' }, + datasetId: { type: 'string', description: 'Dataset scraper ID' }, + urls: { type: 'string', description: 'JSON array of URL objects to scrape' }, + datasetFormat: { type: 'string', description: 'Dataset output format' }, + snapshotId: { type: 'string', description: 'Snapshot ID for status/download/cancel' }, + downloadFormat: { type: 'string', description: 'Download output format' }, + }, + outputs: { + content: { type: 'string', description: 'Scraped page content' }, + url: { type: 'string', description: 'URL that was scraped' }, + statusCode: { type: 'number', description: 'HTTP status code' }, + results: { type: 'json', description: 'Search or discover results array' }, + query: { type: 'string', description: 'Search query executed' }, + searchEngine: { type: 'string', description: 'Search engine used' }, + totalResults: { type: 'number', description: 'Total number of discover results' }, + data: { type: 'json', description: 'Scraped data records' }, + snapshotId: { type: 'string', description: 'Snapshot ID' }, + isAsync: { type: 'boolean', description: 'Whether sync scrape fell back to async' }, + status: { type: 'string', description: 'Job status' }, + datasetId: { type: 'string', description: 'Dataset ID of the snapshot' }, + format: { type: 'string', description: 'Content type of downloaded data' }, + cancelled: { type: 'boolean', description: 'Whether cancellation was successful' }, + }, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 4ab0d88a16..270bea945c 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -18,6 +18,7 @@ import { AthenaBlock } from '@/blocks/blocks/athena' import { AttioBlock } from '@/blocks/blocks/attio' import { BoxBlock } from '@/blocks/blocks/box' import { BrandfetchBlock } from '@/blocks/blocks/brandfetch' +import { BrightDataBlock } from '@/blocks/blocks/brightdata' import { BrowserUseBlock } from '@/blocks/blocks/browser_use' import { CalComBlock } from '@/blocks/blocks/calcom' import { CalendlyBlock } from '@/blocks/blocks/calendly' @@ -245,6 +246,7 @@ export const registry: Record = { athena: AthenaBlock, attio: AttioBlock, brandfetch: BrandfetchBlock, + brightdata: BrightDataBlock, box: BoxBlock, browser_use: BrowserUseBlock, calcom: CalComBlock, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index bde3013c6a..d7ae05105d 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -2087,6 +2087,21 @@ export function BrandfetchIcon(props: SVGProps) { ) } +export function BrightDataIcon(props: SVGProps) { + return ( + + + + + ) +} + export function BrowserUseIcon(props: SVGProps) { return ( = { + id: 'brightdata_cancel_snapshot', + name: 'Bright Data Cancel Snapshot', + description: + 'Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + snapshotId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The snapshot ID of the collection to cancel (e.g., "s_m4x7enmven8djfqak")', + }, + }, + + request: { + method: 'POST', + url: (params) => + `https://api.brightdata.com/datasets/v3/snapshot/${params.snapshotId?.trim()}/cancel`, + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Cancel snapshot failed with status ${response.status}`) + } + + return { + success: true, + output: { + snapshotId: null, + cancelled: true, + }, + } + }, + + outputs: { + snapshotId: { + type: 'string', + description: 'The snapshot ID that was cancelled', + optional: true, + }, + cancelled: { + type: 'boolean', + description: 'Whether the cancellation was successful', + }, + }, +} diff --git a/apps/sim/tools/brightdata/discover.ts b/apps/sim/tools/brightdata/discover.ts new file mode 100644 index 0000000000..8f0f8ced6b --- /dev/null +++ b/apps/sim/tools/brightdata/discover.ts @@ -0,0 +1,158 @@ +import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataDiscoverTool: ToolConfig< + BrightDataDiscoverParams, + BrightDataDiscoverResponse +> = { + id: 'brightdata_discover', + name: 'Bright Data Discover', + description: + 'AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + query: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The search query (e.g., "competitor pricing changes enterprise plan")', + }, + numResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Number of results to return, up to 1000. Defaults to 10', + }, + intent: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'Describes what the agent is trying to accomplish, used to rank results by relevance (e.g., "find official pricing pages and change notes")', + }, + includeContent: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to include cleaned page content in results', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Response format: "json" or "markdown". Defaults to "json"', + }, + language: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Search language code (e.g., "en", "es", "fr"). Defaults to "en"', + }, + country: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Two-letter ISO country code for localized results (e.g., "us", "gb")', + }, + }, + + request: { + method: 'POST', + url: 'https://api.brightdata.com/discover', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + const body: Record = { + query: params.query, + } + if (params.numResults) body.num_results = params.numResults + if (params.intent) body.intent = params.intent + if (params.includeContent != null) body.include_content = params.includeContent + if (params.format) body.format = params.format + if (params.language) body.language = params.language + if (params.country) body.country = params.country + return body + }, + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Discover request failed with status ${response.status}`) + } + + const data = await response.json() + + let results: Array<{ + url: string | null + title: string | null + description: string | null + relevanceScore: number | null + content: string | null + }> = [] + + const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? []) + + if (Array.isArray(items)) { + results = items.map((item: Record) => ({ + url: (item.link as string) ?? (item.url as string) ?? null, + title: (item.title as string) ?? null, + description: (item.description as string) ?? (item.snippet as string) ?? null, + relevanceScore: (item.relevance_score as number) ?? null, + content: + (item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null, + })) + } + + return { + success: true, + output: { + results, + query: null, + totalResults: results.length, + }, + } + }, + + outputs: { + results: { + type: 'array', + description: 'Array of discovered web results ranked by intent relevance', + items: { + type: 'object', + description: 'A discovered result', + properties: { + url: { type: 'string', description: 'URL of the discovered page', optional: true }, + title: { type: 'string', description: 'Page title', optional: true }, + description: { + type: 'string', + description: 'Page description or snippet', + optional: true, + }, + relevanceScore: { + type: 'number', + description: 'AI-calculated relevance score for intent-based ranking', + optional: true, + }, + content: { + type: 'string', + description: + 'Cleaned page content in the requested format (when includeContent is true)', + optional: true, + }, + }, + }, + }, + query: { type: 'string', description: 'The search query that was executed', optional: true }, + totalResults: { type: 'number', description: 'Total number of results returned' }, + }, +} diff --git a/apps/sim/tools/brightdata/download_snapshot.ts b/apps/sim/tools/brightdata/download_snapshot.ts new file mode 100644 index 0000000000..28a0212eb9 --- /dev/null +++ b/apps/sim/tools/brightdata/download_snapshot.ts @@ -0,0 +1,116 @@ +import type { + BrightDataDownloadSnapshotParams, + BrightDataDownloadSnapshotResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataDownloadSnapshotTool: ToolConfig< + BrightDataDownloadSnapshotParams, + BrightDataDownloadSnapshotResponse +> = { + id: 'brightdata_download_snapshot', + name: 'Bright Data Download Snapshot', + description: + 'Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status "ready".', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + snapshotId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'The snapshot ID returned when the collection was triggered (e.g., "s_m4x7enmven8djfqak")', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output format: "json", "ndjson", "jsonl", or "csv". Defaults to "json"', + }, + compress: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to compress the results', + }, + }, + + request: { + method: 'GET', + url: (params) => { + const queryParams = new URLSearchParams() + if (params.format) queryParams.set('format', params.format) + if (params.compress) queryParams.set('compress', 'true') + const qs = queryParams.toString() + return `https://api.brightdata.com/datasets/v3/snapshot/${params.snapshotId?.trim()}${qs ? `?${qs}` : ''}` + }, + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + if (response.status === 409) { + throw new Error( + 'Snapshot is not ready for download. Check the snapshot status first and wait until it is "ready".' + ) + } + + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Snapshot download failed with status ${response.status}`) + } + + const contentType = response.headers.get('content-type') || '' + let data: Array> + + if (contentType.includes('application/json')) { + const parsed = await response.json() + data = Array.isArray(parsed) ? parsed : [parsed] + } else { + const text = await response.text() + try { + const parsed = JSON.parse(text) + data = Array.isArray(parsed) ? parsed : [parsed] + } catch { + data = [{ raw: text }] + } + } + + return { + success: true, + output: { + data, + format: contentType, + snapshotId: null, + }, + } + }, + + outputs: { + data: { + type: 'array', + description: 'Array of scraped result records', + items: { + type: 'json', + description: 'A scraped record with dataset-specific fields', + }, + }, + format: { + type: 'string', + description: 'The content type of the downloaded data', + }, + snapshotId: { + type: 'string', + description: 'The snapshot ID that was downloaded', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/index.ts b/apps/sim/tools/brightdata/index.ts new file mode 100644 index 0000000000..9e4a7fc713 --- /dev/null +++ b/apps/sim/tools/brightdata/index.ts @@ -0,0 +1,19 @@ +import { brightDataCancelSnapshotTool } from '@/tools/brightdata/cancel_snapshot' +import { brightDataDiscoverTool } from '@/tools/brightdata/discover' +import { brightDataDownloadSnapshotTool } from '@/tools/brightdata/download_snapshot' +import { brightDataScrapeDatasetTool } from '@/tools/brightdata/scrape_dataset' +import { brightDataScrapeUrlTool } from '@/tools/brightdata/scrape_url' +import { brightDataSerpSearchTool } from '@/tools/brightdata/serp_search' +import { brightDataSnapshotStatusTool } from '@/tools/brightdata/snapshot_status' +import { brightDataSyncScrapeTool } from '@/tools/brightdata/sync_scrape' + +export { + brightDataCancelSnapshotTool, + brightDataDiscoverTool, + brightDataDownloadSnapshotTool, + brightDataScrapeDatasetTool, + brightDataScrapeUrlTool, + brightDataSerpSearchTool, + brightDataSnapshotStatusTool, + brightDataSyncScrapeTool, +} diff --git a/apps/sim/tools/brightdata/scrape_dataset.ts b/apps/sim/tools/brightdata/scrape_dataset.ts new file mode 100644 index 0000000000..f53891d99f --- /dev/null +++ b/apps/sim/tools/brightdata/scrape_dataset.ts @@ -0,0 +1,97 @@ +import type { + BrightDataScrapeDatasetParams, + BrightDataScrapeDatasetResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataScrapeDatasetTool: ToolConfig< + BrightDataScrapeDatasetParams, + BrightDataScrapeDatasetResponse +> = { + id: 'brightdata_scrape_dataset', + name: 'Bright Data Scrape Dataset', + description: + 'Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'Dataset scraper ID from your Bright Data dashboard (e.g., "gd_l1viktl72bvl7bjuj0")', + }, + urls: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'JSON array of URL objects to scrape (e.g., [{"url": "https://example.com/product"}])', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output format: "json" or "csv". Defaults to "json"', + }, + }, + + request: { + method: 'POST', + url: (params) => { + const queryParams = new URLSearchParams() + queryParams.set('dataset_id', params.datasetId) + queryParams.set('format', params.format || 'json') + return `https://api.brightdata.com/datasets/v3/trigger?${queryParams.toString()}` + }, + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + if (typeof params.urls === 'string') { + try { + return JSON.parse(params.urls) + } catch { + return [{ url: params.urls }] + } + } + return params.urls + }, + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Dataset trigger failed with status ${response.status}`) + } + + const data = await response.json() + + return { + success: true, + output: { + snapshotId: data.snapshot_id ?? data.snapshotId ?? '', + status: data.status ?? 'triggered', + }, + } + }, + + outputs: { + snapshotId: { + type: 'string', + description: 'The snapshot ID to retrieve results later', + }, + status: { + type: 'string', + description: 'Status of the scraping job (e.g., "triggered", "running")', + }, + }, +} diff --git a/apps/sim/tools/brightdata/scrape_url.ts b/apps/sim/tools/brightdata/scrape_url.ts new file mode 100644 index 0000000000..1fe284cd31 --- /dev/null +++ b/apps/sim/tools/brightdata/scrape_url.ts @@ -0,0 +1,103 @@ +import type { + BrightDataScrapeUrlParams, + BrightDataScrapeUrlResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataScrapeUrlTool: ToolConfig< + BrightDataScrapeUrlParams, + BrightDataScrapeUrlResponse +> = { + id: 'brightdata_scrape_url', + name: 'Bright Data Scrape URL', + description: + 'Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + zone: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Web Unlocker zone name from your Bright Data dashboard (e.g., "web_unlocker1")', + }, + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The URL to scrape (e.g., "https://example.com/page")', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'Response format: "raw" for HTML or "json" for parsed content. Defaults to "raw"', + }, + country: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Two-letter country code for geo-targeting (e.g., "us", "gb")', + }, + }, + + request: { + method: 'POST', + url: 'https://api.brightdata.com/request', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + const body: Record = { + zone: params.zone, + url: params.url, + format: params.format || 'raw', + } + if (params.country) body.country = params.country + return body + }, + }, + + transformResponse: async (response: Response) => { + const contentType = response.headers.get('content-type') || '' + + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Request failed with status ${response.status}`) + } + + let content: string + if (contentType.includes('application/json')) { + const data = await response.json() + content = typeof data === 'string' ? data : JSON.stringify(data) + } else { + content = await response.text() + } + + return { + success: true, + output: { + content, + url: null, + statusCode: response.status, + }, + } + }, + + outputs: { + content: { + type: 'string', + description: 'The scraped page content (HTML or JSON depending on format)', + }, + url: { type: 'string', description: 'The URL that was scraped', optional: true }, + statusCode: { type: 'number', description: 'HTTP status code of the response', optional: true }, + }, +} diff --git a/apps/sim/tools/brightdata/serp_search.ts b/apps/sim/tools/brightdata/serp_search.ts new file mode 100644 index 0000000000..f524e2cc31 --- /dev/null +++ b/apps/sim/tools/brightdata/serp_search.ts @@ -0,0 +1,181 @@ +import type { + BrightDataSerpSearchParams, + BrightDataSerpSearchResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +const SEARCH_ENGINE_URLS: Record = { + google: 'https://www.google.com/search', + bing: 'https://www.bing.com/search', + duckduckgo: 'https://duckduckgo.com/', + yandex: 'https://yandex.com/search/', +} as const + +export const brightDataSerpSearchTool: ToolConfig< + BrightDataSerpSearchParams, + BrightDataSerpSearchResponse +> = { + id: 'brightdata_serp_search', + name: 'Bright Data SERP Search', + description: + 'Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + zone: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'SERP API zone name from your Bright Data dashboard (e.g., "serp_api1")', + }, + query: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The search query (e.g., "best project management tools")', + }, + searchEngine: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'Search engine to use: "google", "bing", "duckduckgo", or "yandex". Defaults to "google"', + }, + country: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Two-letter country code for localized results (e.g., "us", "gb")', + }, + language: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Two-letter language code (e.g., "en", "es")', + }, + numResults: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Number of results to return (e.g., 10, 20). Defaults to 10', + }, + }, + + request: { + method: 'POST', + url: 'https://api.brightdata.com/request', + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + const engine = params.searchEngine || 'google' + const baseUrl = SEARCH_ENGINE_URLS[engine] || SEARCH_ENGINE_URLS.google + + const searchParams = new URLSearchParams() + searchParams.set('q', params.query) + if (params.numResults) searchParams.set('num', String(params.numResults)) + if (params.language) searchParams.set('hl', params.language) + if (params.country) searchParams.set('gl', params.country) + + searchParams.set('brd_json', '1') + + const body: Record = { + zone: params.zone, + url: `${baseUrl}?${searchParams.toString()}`, + format: 'raw', + } + if (params.country) body.country = params.country + return body + }, + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `SERP request failed with status ${response.status}`) + } + + const contentType = response.headers.get('content-type') || '' + let results: Array<{ + title: string | null + url: string | null + description: string | null + rank: number | null + }> = [] + let data: Record | null = null + + if (contentType.includes('application/json')) { + data = await response.json() + + if (Array.isArray(data?.organic)) { + results = data.organic.map((item: Record, index: number) => ({ + title: (item.title as string) ?? null, + url: (item.link as string) ?? (item.url as string) ?? null, + description: (item.description as string) ?? (item.snippet as string) ?? null, + rank: index + 1, + })) + } else if (Array.isArray(data)) { + results = data.map((item: Record, index: number) => ({ + title: (item.title as string) ?? null, + url: (item.link as string) ?? (item.url as string) ?? null, + description: (item.description as string) ?? (item.snippet as string) ?? null, + rank: index + 1, + })) + } + } else { + const text = await response.text() + results = [ + { + title: 'Raw SERP Response', + url: null, + description: text.slice(0, 500), + rank: 1, + }, + ] + } + + return { + success: true, + output: { + results, + query: ((data?.general as Record | undefined)?.query as string) ?? null, + searchEngine: + ((data?.general as Record | undefined)?.search_engine as string) ?? null, + }, + } + }, + + outputs: { + results: { + type: 'array', + description: 'Array of search results', + items: { + type: 'object', + description: 'A search result entry', + properties: { + title: { type: 'string', description: 'Title of the search result', optional: true }, + url: { type: 'string', description: 'URL of the search result', optional: true }, + description: { + type: 'string', + description: 'Snippet or description of the result', + optional: true, + }, + rank: { type: 'number', description: 'Position in search results', optional: true }, + }, + }, + }, + query: { type: 'string', description: 'The search query that was executed', optional: true }, + searchEngine: { + type: 'string', + description: 'The search engine that was used', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/snapshot_status.ts b/apps/sim/tools/brightdata/snapshot_status.ts new file mode 100644 index 0000000000..d6fe69bd3d --- /dev/null +++ b/apps/sim/tools/brightdata/snapshot_status.ts @@ -0,0 +1,74 @@ +import type { + BrightDataSnapshotStatusParams, + BrightDataSnapshotStatusResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataSnapshotStatusTool: ToolConfig< + BrightDataSnapshotStatusParams, + BrightDataSnapshotStatusResponse +> = { + id: 'brightdata_snapshot_status', + name: 'Bright Data Snapshot Status', + description: + 'Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + snapshotId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'The snapshot ID returned when the collection was triggered (e.g., "s_m4x7enmven8djfqak")', + }, + }, + + request: { + method: 'GET', + url: (params) => `https://api.brightdata.com/datasets/v3/progress/${params.snapshotId?.trim()}`, + headers: (params) => ({ + Authorization: `Bearer ${params.apiKey}`, + }), + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Snapshot status check failed with status ${response.status}`) + } + + const data = await response.json() + + return { + success: true, + output: { + snapshotId: data.snapshot_id ?? null, + datasetId: data.dataset_id ?? null, + status: data.status ?? 'unknown', + }, + } + }, + + outputs: { + snapshotId: { + type: 'string', + description: 'The snapshot ID that was queried', + }, + datasetId: { + type: 'string', + description: 'The dataset ID associated with this snapshot', + optional: true, + }, + status: { + type: 'string', + description: 'Current status of the snapshot: "starting", "running", "ready", or "failed"', + }, + }, +} diff --git a/apps/sim/tools/brightdata/sync_scrape.ts b/apps/sim/tools/brightdata/sync_scrape.ts new file mode 100644 index 0000000000..8ac8d0108c --- /dev/null +++ b/apps/sim/tools/brightdata/sync_scrape.ts @@ -0,0 +1,131 @@ +import type { + BrightDataSyncScrapeParams, + BrightDataSyncScrapeResponse, +} from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +export const brightDataSyncScrapeTool: ToolConfig< + BrightDataSyncScrapeParams, + BrightDataSyncScrapeResponse +> = { + id: 'brightdata_sync_scrape', + name: 'Bright Data Sync Scrape', + description: + 'Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout.', + version: '1.0.0', + + params: { + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + datasetId: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'Dataset scraper ID from your Bright Data dashboard (e.g., "gd_l1viktl72bvl7bjuj0")', + }, + urls: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'JSON array of URL objects to scrape, up to 20 (e.g., [{"url": "https://example.com/product"}])', + }, + format: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output format: "json", "ndjson", or "csv". Defaults to "json"', + }, + includeErrors: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Whether to include error reports in results', + }, + }, + + request: { + method: 'POST', + url: (params) => { + const queryParams = new URLSearchParams() + queryParams.set('dataset_id', params.datasetId) + queryParams.set('format', params.format || 'json') + if (params.includeErrors) queryParams.set('include_errors', 'true') + return `https://api.brightdata.com/datasets/v3/scrape?${queryParams.toString()}` + }, + headers: (params) => ({ + 'Content-Type': 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + if (typeof params.urls === 'string') { + try { + const parsed = JSON.parse(params.urls) + return { input: Array.isArray(parsed) ? parsed : [parsed] } + } catch { + return { input: [{ url: params.urls }] } + } + } + return { input: params.urls } + }, + }, + + transformResponse: async (response: Response) => { + if (!response.ok) { + const errorText = await response.text() + throw new Error(errorText || `Sync scrape failed with status ${response.status}`) + } + + if (response.status === 202) { + const data = await response.json() + return { + success: true, + output: { + data: [], + snapshotId: data.snapshot_id ?? null, + isAsync: true, + }, + } + } + + const data = await response.json() + const results = Array.isArray(data) ? data : [data] + + return { + success: true, + output: { + data: results, + snapshotId: null, + isAsync: false, + }, + } + }, + + outputs: { + data: { + type: 'array', + description: + 'Array of scraped result objects with fields specific to the dataset scraper used', + items: { + type: 'json', + description: 'A scraped record with dataset-specific fields', + }, + }, + snapshotId: { + type: 'string', + description: + 'Snapshot ID returned if the request exceeded the 1-minute timeout and switched to async processing', + optional: true, + }, + isAsync: { + type: 'boolean', + description: + 'Whether the request fell back to async mode (true means use snapshot ID to retrieve results)', + }, + }, +} diff --git a/apps/sim/tools/brightdata/types.ts b/apps/sim/tools/brightdata/types.ts new file mode 100644 index 0000000000..3197826996 --- /dev/null +++ b/apps/sim/tools/brightdata/types.ts @@ -0,0 +1,145 @@ +import type { ToolResponse } from '@/tools/types' + +export interface BrightDataScrapeUrlParams { + apiKey: string + zone: string + url: string + format?: string + country?: string +} + +export interface BrightDataScrapeUrlResponse extends ToolResponse { + output: { + content: string + url: string | null + statusCode: number | null + } +} + +export interface BrightDataSerpSearchParams { + apiKey: string + zone: string + query: string + searchEngine?: string + country?: string + language?: string + numResults?: number +} + +export interface BrightDataSerpSearchResponse extends ToolResponse { + output: { + results: Array<{ + title: string | null + url: string | null + description: string | null + rank: number | null + }> + query: string | null + searchEngine: string | null + } +} + +export interface BrightDataScrapeDatasetParams { + apiKey: string + datasetId: string + urls: string + format?: string +} + +export interface BrightDataScrapeDatasetResponse extends ToolResponse { + output: { + snapshotId: string + status: string + } +} + +export interface BrightDataSyncScrapeParams { + apiKey: string + datasetId: string + urls: string + format?: string + includeErrors?: boolean +} + +export interface BrightDataSyncScrapeResponse extends ToolResponse { + output: { + data: Array> + snapshotId: string | null + isAsync: boolean + } +} + +export interface BrightDataSnapshotStatusParams { + apiKey: string + snapshotId: string +} + +export interface BrightDataSnapshotStatusResponse extends ToolResponse { + output: { + snapshotId: string | null + datasetId: string | null + status: string + } +} + +export interface BrightDataDownloadSnapshotParams { + apiKey: string + snapshotId: string + format?: string + compress?: boolean +} + +export interface BrightDataDownloadSnapshotResponse extends ToolResponse { + output: { + data: Array> + format: string + snapshotId: string | null + } +} + +export interface BrightDataCancelSnapshotParams { + apiKey: string + snapshotId: string +} + +export interface BrightDataCancelSnapshotResponse extends ToolResponse { + output: { + snapshotId: string | null + cancelled: boolean + } +} + +export interface BrightDataDiscoverParams { + apiKey: string + query: string + numResults?: number + intent?: string + includeContent?: boolean + format?: string + language?: string + country?: string +} + +export interface BrightDataDiscoverResponse extends ToolResponse { + output: { + results: Array<{ + url: string | null + title: string | null + description: string | null + relevanceScore: number | null + content: string | null + }> + query: string | null + totalResults: number + } +} + +export type BrightDataResponse = + | BrightDataScrapeUrlResponse + | BrightDataSerpSearchResponse + | BrightDataScrapeDatasetResponse + | BrightDataSyncScrapeResponse + | BrightDataSnapshotStatusResponse + | BrightDataDownloadSnapshotResponse + | BrightDataCancelSnapshotResponse + | BrightDataDiscoverResponse diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index c2177e1505..44144459a4 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -236,6 +236,16 @@ import { boxSignResendRequestTool, } from '@/tools/box_sign' import { brandfetchGetBrandTool, brandfetchSearchTool } from '@/tools/brandfetch' +import { + brightDataCancelSnapshotTool, + brightDataDiscoverTool, + brightDataDownloadSnapshotTool, + brightDataScrapeDatasetTool, + brightDataScrapeUrlTool, + brightDataSerpSearchTool, + brightDataSnapshotStatusTool, + brightDataSyncScrapeTool, +} from '@/tools/brightdata' import { browserUseRunTaskTool } from '@/tools/browser_use' import { calcomCancelBookingTool, @@ -2921,6 +2931,14 @@ export const tools: Record = { athena_stop_query: athenaStopQueryTool, brandfetch_get_brand: brandfetchGetBrandTool, brandfetch_search: brandfetchSearchTool, + brightdata_cancel_snapshot: brightDataCancelSnapshotTool, + brightdata_discover: brightDataDiscoverTool, + brightdata_download_snapshot: brightDataDownloadSnapshotTool, + brightdata_scrape_dataset: brightDataScrapeDatasetTool, + brightdata_scrape_url: brightDataScrapeUrlTool, + brightdata_serp_search: brightDataSerpSearchTool, + brightdata_snapshot_status: brightDataSnapshotStatusTool, + brightdata_sync_scrape: brightDataSyncScrapeTool, box_copy_file: boxCopyFileTool, box_create_folder: boxCreateFolderTool, box_delete_file: boxDeleteFileTool, From d710533f7e2878789d785a9cfc338154e97ac1e8 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 12:09:58 -0700 Subject: [PATCH 2/6] fix(brightdata): address PR review feedback - Fix truncated "Download Snapshot" description in integrations.json and docs - Map engine-specific query params (num/count/numdoc, hl/setLang/lang/kl, gl/cc/lr) per search engine instead of using Google-specific params for all - Attempt to parse snapshot_id from cancel/download response bodies instead of hardcoding null Co-Authored-By: Claude Opus 4.6 --- .../docs/content/docs/en/tools/brightdata.mdx | 2 +- .../integrations/data/integrations.json | 2 +- apps/sim/tools/brightdata/cancel_snapshot.ts | 3 +- .../sim/tools/brightdata/download_snapshot.ts | 2 +- apps/sim/tools/brightdata/serp_search.ts | 31 ++++++++++++------- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/apps/docs/content/docs/en/tools/brightdata.mdx b/apps/docs/content/docs/en/tools/brightdata.mdx index 49ede0ef4a..7a2205a1bf 100644 --- a/apps/docs/content/docs/en/tools/brightdata.mdx +++ b/apps/docs/content/docs/en/tools/brightdata.mdx @@ -161,7 +161,7 @@ Check the progress of an async Bright Data scraping job. Returns status: startin ### `brightdata_download_snapshot` -Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status +Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status ready. #### Input diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index ac09e122fc..533d2488fc 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -1779,7 +1779,7 @@ }, { "name": "Download Snapshot", - "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status " + "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status ready." }, { "name": "Cancel Snapshot", diff --git a/apps/sim/tools/brightdata/cancel_snapshot.ts b/apps/sim/tools/brightdata/cancel_snapshot.ts index ffa7dd3845..a5a2328979 100644 --- a/apps/sim/tools/brightdata/cancel_snapshot.ts +++ b/apps/sim/tools/brightdata/cancel_snapshot.ts @@ -44,10 +44,11 @@ export const brightDataCancelSnapshotTool: ToolConfig< throw new Error(errorText || `Cancel snapshot failed with status ${response.status}`) } + const data = (await response.json().catch(() => null)) as Record | null return { success: true, output: { - snapshotId: null, + snapshotId: (data?.snapshot_id as string) ?? null, cancelled: true, }, } diff --git a/apps/sim/tools/brightdata/download_snapshot.ts b/apps/sim/tools/brightdata/download_snapshot.ts index 28a0212eb9..e32ddc79e3 100644 --- a/apps/sim/tools/brightdata/download_snapshot.ts +++ b/apps/sim/tools/brightdata/download_snapshot.ts @@ -89,7 +89,7 @@ export const brightDataDownloadSnapshotTool: ToolConfig< output: { data, format: contentType, - snapshotId: null, + snapshotId: (data[0]?.snapshot_id as string) ?? null, }, } }, diff --git a/apps/sim/tools/brightdata/serp_search.ts b/apps/sim/tools/brightdata/serp_search.ts index f524e2cc31..9a12f47045 100644 --- a/apps/sim/tools/brightdata/serp_search.ts +++ b/apps/sim/tools/brightdata/serp_search.ts @@ -4,11 +4,14 @@ import type { } from '@/tools/brightdata/types' import type { ToolConfig } from '@/tools/types' -const SEARCH_ENGINE_URLS: Record = { - google: 'https://www.google.com/search', - bing: 'https://www.bing.com/search', - duckduckgo: 'https://duckduckgo.com/', - yandex: 'https://yandex.com/search/', +const SEARCH_ENGINE_CONFIG: Record< + string, + { url: string; queryKey: string; numKey: string; langKey: string; countryKey: string } +> = { + google: { url: 'https://www.google.com/search', queryKey: 'q', numKey: 'num', langKey: 'hl', countryKey: 'gl' }, + bing: { url: 'https://www.bing.com/search', queryKey: 'q', numKey: 'count', langKey: 'setLang', countryKey: 'cc' }, + duckduckgo: { url: 'https://duckduckgo.com/', queryKey: 'q', numKey: '', langKey: 'kl', countryKey: '' }, + yandex: { url: 'https://yandex.com/search/', queryKey: 'text', numKey: 'numdoc', langKey: 'lang', countryKey: 'lr' }, } as const export const brightDataSerpSearchTool: ToolConfig< @@ -76,19 +79,25 @@ export const brightDataSerpSearchTool: ToolConfig< }), body: (params) => { const engine = params.searchEngine || 'google' - const baseUrl = SEARCH_ENGINE_URLS[engine] || SEARCH_ENGINE_URLS.google + const config = SEARCH_ENGINE_CONFIG[engine] || SEARCH_ENGINE_CONFIG.google const searchParams = new URLSearchParams() - searchParams.set('q', params.query) - if (params.numResults) searchParams.set('num', String(params.numResults)) - if (params.language) searchParams.set('hl', params.language) - if (params.country) searchParams.set('gl', params.country) + searchParams.set(config.queryKey, params.query) + if (params.numResults && config.numKey) { + searchParams.set(config.numKey, String(params.numResults)) + } + if (params.language && config.langKey) { + searchParams.set(config.langKey, params.language) + } + if (params.country && config.countryKey) { + searchParams.set(config.countryKey, params.country) + } searchParams.set('brd_json', '1') const body: Record = { zone: params.zone, - url: `${baseUrl}?${searchParams.toString()}`, + url: `${config.url}?${searchParams.toString()}`, format: 'raw', } if (params.country) body.country = params.country From 12fed93c462073151f7eee4a0e05cca76171aa3f Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 12:12:13 -0700 Subject: [PATCH 3/6] lint --- .../docs/content/docs/en/tools/brightdata.mdx | 2 +- .../integrations/data/integrations.json | 2 +- apps/sim/tools/brightdata/serp_search.ts | 32 ++++++++++++++++--- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/apps/docs/content/docs/en/tools/brightdata.mdx b/apps/docs/content/docs/en/tools/brightdata.mdx index 7a2205a1bf..49ede0ef4a 100644 --- a/apps/docs/content/docs/en/tools/brightdata.mdx +++ b/apps/docs/content/docs/en/tools/brightdata.mdx @@ -161,7 +161,7 @@ Check the progress of an async Bright Data scraping job. Returns status: startin ### `brightdata_download_snapshot` -Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status ready. +Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status #### Input diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index 533d2488fc..ac09e122fc 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -1779,7 +1779,7 @@ }, { "name": "Download Snapshot", - "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status ready." + "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status " }, { "name": "Cancel Snapshot", diff --git a/apps/sim/tools/brightdata/serp_search.ts b/apps/sim/tools/brightdata/serp_search.ts index 9a12f47045..5f599241cb 100644 --- a/apps/sim/tools/brightdata/serp_search.ts +++ b/apps/sim/tools/brightdata/serp_search.ts @@ -8,10 +8,34 @@ const SEARCH_ENGINE_CONFIG: Record< string, { url: string; queryKey: string; numKey: string; langKey: string; countryKey: string } > = { - google: { url: 'https://www.google.com/search', queryKey: 'q', numKey: 'num', langKey: 'hl', countryKey: 'gl' }, - bing: { url: 'https://www.bing.com/search', queryKey: 'q', numKey: 'count', langKey: 'setLang', countryKey: 'cc' }, - duckduckgo: { url: 'https://duckduckgo.com/', queryKey: 'q', numKey: '', langKey: 'kl', countryKey: '' }, - yandex: { url: 'https://yandex.com/search/', queryKey: 'text', numKey: 'numdoc', langKey: 'lang', countryKey: 'lr' }, + google: { + url: 'https://www.google.com/search', + queryKey: 'q', + numKey: 'num', + langKey: 'hl', + countryKey: 'gl', + }, + bing: { + url: 'https://www.bing.com/search', + queryKey: 'q', + numKey: 'count', + langKey: 'setLang', + countryKey: 'cc', + }, + duckduckgo: { + url: 'https://duckduckgo.com/', + queryKey: 'q', + numKey: '', + langKey: 'kl', + countryKey: '', + }, + yandex: { + url: 'https://yandex.com/search/', + queryKey: 'text', + numKey: 'numdoc', + langKey: 'lang', + countryKey: 'lr', + }, } as const export const brightDataSerpSearchTool: ToolConfig< From 703058c096e3fb3874205672f24baaf468dd35b1 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 12:17:32 -0700 Subject: [PATCH 4/6] fix(agiloft): change bgColor to white; fix docs truncation Co-Authored-By: Claude Opus 4.6 --- apps/docs/content/docs/en/tools/brightdata.mdx | 2 +- apps/sim/blocks/blocks/agiloft.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/docs/content/docs/en/tools/brightdata.mdx b/apps/docs/content/docs/en/tools/brightdata.mdx index 49ede0ef4a..37681a207b 100644 --- a/apps/docs/content/docs/en/tools/brightdata.mdx +++ b/apps/docs/content/docs/en/tools/brightdata.mdx @@ -161,7 +161,7 @@ Check the progress of an async Bright Data scraping job. Returns status: startin ### `brightdata_download_snapshot` -Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status +Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status "ready". #### Input diff --git a/apps/sim/blocks/blocks/agiloft.ts b/apps/sim/blocks/blocks/agiloft.ts index 35af080fb9..36e571dad9 100644 --- a/apps/sim/blocks/blocks/agiloft.ts +++ b/apps/sim/blocks/blocks/agiloft.ts @@ -13,7 +13,7 @@ export const AgiloftBlock: BlockConfig = { category: 'tools', integrationType: IntegrationType.Productivity, tags: ['automation'], - bgColor: '#263A5C', + bgColor: '#FFFFFF', icon: AgiloftIcon, authMode: AuthMode.ApiKey, From bf0286a7e6f4588cef2ab7ac4072d8e2aac121e4 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 12:21:46 -0700 Subject: [PATCH 5/6] fix(brightdata): avoid inner quotes in description to fix docs generation The docs generator regex truncates at inner quotes. Reword the download_snapshot description to avoid embedded double quotes. Co-Authored-By: Claude Opus 4.6 --- apps/docs/content/docs/en/tools/brightdata.mdx | 2 +- apps/sim/app/(landing)/integrations/data/integrations.json | 4 ++-- apps/sim/tools/brightdata/download_snapshot.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/docs/content/docs/en/tools/brightdata.mdx b/apps/docs/content/docs/en/tools/brightdata.mdx index 37681a207b..65f8327e86 100644 --- a/apps/docs/content/docs/en/tools/brightdata.mdx +++ b/apps/docs/content/docs/en/tools/brightdata.mdx @@ -161,7 +161,7 @@ Check the progress of an async Bright Data scraping job. Returns status: startin ### `brightdata_download_snapshot` -Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status "ready". +Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status. #### Input diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index ac09e122fc..7a2f6cd107 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -214,7 +214,7 @@ "name": "Agiloft", "description": "Manage records in Agiloft CLM", "longDescription": "Integrate with Agiloft contract lifecycle management to create, read, update, delete, and search records. Supports file attachments, SQL-based selection, saved searches, and record locking across any table in your knowledge base.", - "bgColor": "#263A5C", + "bgColor": "#FFFFFF", "iconName": "AgiloftIcon", "docsUrl": "https://docs.sim.ai/tools/agiloft", "operations": [ @@ -1779,7 +1779,7 @@ }, { "name": "Download Snapshot", - "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status " + "description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status." }, { "name": "Cancel Snapshot", diff --git a/apps/sim/tools/brightdata/download_snapshot.ts b/apps/sim/tools/brightdata/download_snapshot.ts index e32ddc79e3..c62cfc4c68 100644 --- a/apps/sim/tools/brightdata/download_snapshot.ts +++ b/apps/sim/tools/brightdata/download_snapshot.ts @@ -11,7 +11,7 @@ export const brightDataDownloadSnapshotTool: ToolConfig< id: 'brightdata_download_snapshot', name: 'Bright Data Download Snapshot', description: - 'Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have status "ready".', + 'Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status.', version: '1.0.0', params: { From eb50282b4b55dca2a0c1bc10a60c6a1b5656a481 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 15 Apr 2026 12:36:35 -0700 Subject: [PATCH 6/6] fix(brightdata): disable incompatible DuckDuckGo and Yandex URL params DuckDuckGo kl expects region-language format (us-en) and Yandex lr expects numeric region IDs (213), not plain two-letter codes. Disable these URL-level params since Bright Data normalizes localization through the body-level country param. Co-Authored-By: Claude Opus 4.6 --- apps/sim/tools/brightdata/serp_search.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/tools/brightdata/serp_search.ts b/apps/sim/tools/brightdata/serp_search.ts index 5f599241cb..e9ed8ef1de 100644 --- a/apps/sim/tools/brightdata/serp_search.ts +++ b/apps/sim/tools/brightdata/serp_search.ts @@ -26,7 +26,7 @@ const SEARCH_ENGINE_CONFIG: Record< url: 'https://duckduckgo.com/', queryKey: 'q', numKey: '', - langKey: 'kl', + langKey: '', countryKey: '', }, yandex: { @@ -34,7 +34,7 @@ const SEARCH_ENGINE_CONFIG: Record< queryKey: 'text', numKey: 'numdoc', langKey: 'lang', - countryKey: 'lr', + countryKey: '', }, } as const