#!/usr/bin/env node const fs = require("fs"); const path = require("path"); const DEFAULT_WAIT_MS = 5000; const MAX_WAIT_MS = 20000; const NAV_TIMEOUT_MS = 30000; const EXTRA_CHALLENGE_WAIT_MS = 8000; const CONTENT_LIMIT = 12000; const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"; function fail(message, details) { const payload = { error: message }; if (details) payload.details = details; process.stderr.write(`${JSON.stringify(payload)}\n`); process.exit(1); } function parseWaitTime(raw) { const value = Number.parseInt(raw || `${DEFAULT_WAIT_MS}`, 10); if (!Number.isFinite(value) || value < 0) return DEFAULT_WAIT_MS; return Math.min(value, MAX_WAIT_MS); } function parseTarget(rawUrl) { if (!rawUrl) { fail("Missing URL. Usage: node scripts/playwright-safe.js "); } let parsed; try { parsed = new URL(rawUrl); } catch (error) { fail("Invalid URL.", error.message); } if (!["http:", "https:"].includes(parsed.protocol)) { fail("Only http and https URLs are allowed."); } return parsed.toString(); } function ensureParentDir(filePath) { if (!filePath) return; fs.mkdirSync(path.dirname(filePath), { recursive: true }); } async function detectChallenge(page) { try { return await page.evaluate(() => { const text = (document.body?.innerText || "").toLowerCase(); return ( text.includes("checking your browser") || text.includes("just a moment") || text.includes("verify you are human") || text.includes("press and hold") || document.querySelector('iframe[src*="challenge"]') !== null || document.querySelector('iframe[src*="cloudflare"]') !== null ); }); } catch { return false; } } async function main() { const requestedUrl = parseTarget(process.argv[2]); const waitTime = parseWaitTime(process.env.WAIT_TIME); const screenshotPath = process.env.SCREENSHOT_PATH || ""; const saveHtml = process.env.SAVE_HTML === "true"; const headless = process.env.HEADLESS !== "false"; const userAgent = process.env.USER_AGENT || DEFAULT_USER_AGENT; const startedAt = Date.now(); let chromium; try { ({ chromium } = require("playwright")); } catch (error) { fail( "Playwright is not installed for this skill. Run npm install and npx playwright install chromium first.", error.message ); } let browser; try { browser = await chromium.launch({ headless, ignoreDefaultArgs: ["--enable-automation"], args: [ "--disable-blink-features=AutomationControlled", "--disable-features=IsolateOrigins,site-per-process" ] }); const context = await browser.newContext({ userAgent, locale: "en-US", viewport: { width: 1440, height: 900 }, extraHTTPHeaders: { "Accept-Language": "en-US,en;q=0.9", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" } }); await context.addInitScript(() => { Object.defineProperty(navigator, "webdriver", { get: () => false }); Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en"] }); Object.defineProperty(navigator, "plugins", { get: () => [1, 2, 3, 4, 5] }); window.chrome = window.chrome || { runtime: {} }; const originalQuery = window.navigator.permissions?.query?.bind(window.navigator.permissions); if (originalQuery) { window.navigator.permissions.query = (parameters) => { if (parameters?.name === "notifications") { return Promise.resolve({ state: Notification.permission }); } return originalQuery(parameters); }; } }); const page = await context.newPage(); const response = await page.goto(requestedUrl, { waitUntil: "domcontentloaded", timeout: NAV_TIMEOUT_MS }); await page.waitForTimeout(waitTime); let challengeDetected = await detectChallenge(page); if (challengeDetected) { await page.waitForTimeout(EXTRA_CHALLENGE_WAIT_MS); challengeDetected = await detectChallenge(page); } const extracted = await page.evaluate((contentLimit) => { const bodyText = document.body?.innerText || ""; return { finalUrl: window.location.href, title: document.title || "", content: bodyText.slice(0, contentLimit), metaDescription: document.querySelector('meta[name="description"]')?.content || document.querySelector('meta[property="og:description"]')?.content || "" }; }, CONTENT_LIMIT); const result = { requestedUrl, finalUrl: extracted.finalUrl, title: extracted.title, content: extracted.content, metaDescription: extracted.metaDescription, status: response ? response.status() : null, challengeDetected, elapsedSeconds: ((Date.now() - startedAt) / 1000).toFixed(2) }; if (screenshotPath) { ensureParentDir(screenshotPath); await page.screenshot({ path: screenshotPath, fullPage: false, timeout: 10000 }); result.screenshot = screenshotPath; } if (saveHtml) { const htmlTarget = screenshotPath ? screenshotPath.replace(/\.[^.]+$/, ".html") : path.resolve(`page-${Date.now()}.html`); ensureParentDir(htmlTarget); fs.writeFileSync(htmlTarget, await page.content()); result.htmlFile = htmlTarget; } process.stdout.write(`${JSON.stringify(result, null, 2)}\n`); await browser.close(); } catch (error) { if (browser) { try { await browser.close(); } catch { // Ignore close errors after the primary failure. } } fail("Scrape failed.", error.message); } } main();