#!/usr/bin/env node const DEFAULT_WAIT_MS = 4000; const NAV_TIMEOUT_MS = 45000; const CLICK_TIMEOUT_MS = 15000; const MAX_SCROLL_PASSES = 12; const SCROLL_PAUSE_MS = 900; const LARGE_IMAGE_MIN_WIDTH = 300; const LARGE_IMAGE_MIN_HEIGHT = 200; export function fail(message, details) { const payload = { error: message }; if (details) payload.details = details; process.stderr.write(`${JSON.stringify(payload)}\n`); process.exit(1); } export function parseTarget(rawUrl) { if (!rawUrl) { fail("Missing URL."); } let parsed; try { parsed = new URL(rawUrl); } catch (error) { fail("Invalid URL.", error instanceof Error ? error.message : String(error)); } if (!["http:", "https:"].includes(parsed.protocol)) { fail("Only http and https URLs are allowed."); } return parsed.toString(); } export function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } export async function loadCloakBrowser() { try { return await import("cloakbrowser"); } catch (error) { fail( "CloakBrowser is not installed for this skill. Run pnpm install in skills/web-automation/scripts first.", error instanceof Error ? error.message : String(error) ); } } export async function runWithStderrLogs(fn) { const originalLog = console.log; const originalError = console.error; console.log = (...args) => process.stderr.write(`${args.join(" ")}\n`); console.error = (...args) => process.stderr.write(`${args.join(" ")}\n`); try { return await fn(); } finally { console.log = originalLog; console.error = originalError; } } export async function createPageSession({ headless = true } = {}) { const { ensureBinary, launchContext } = await loadCloakBrowser(); await runWithStderrLogs(() => ensureBinary()); const context = await runWithStderrLogs(() => launchContext({ headless, humanize: true, locale: "en-US", viewport: { width: 1440, height: 900 }, }) ); const page = await context.newPage(); page.setDefaultTimeout(CLICK_TIMEOUT_MS); page.setDefaultNavigationTimeout(NAV_TIMEOUT_MS); return { context, page }; } export async function gotoListing(page, url, waitMs = DEFAULT_WAIT_MS) { await page.goto(url, { waitUntil: "domcontentloaded", timeout: NAV_TIMEOUT_MS }); await page.waitForLoadState("networkidle", { timeout: 15000 }).catch(() => {}); await sleep(waitMs); } export async function dismissCommonOverlays(page) { const dismissLabels = [ /accept/i, /agree/i, /close/i, /got it/i, /continue/i, /dismiss/i, /not now/i, ]; for (const label of dismissLabels) { const targets = [ page.getByRole("button", { name: label }).first(), page.getByRole("link", { name: label }).first(), ]; for (const target of targets) { try { if (await target.count()) { await target.click({ timeout: 2500 }); await sleep(300); } } catch { // Best-effort overlay dismissal only. } } } } export async function clickPhotoEntryPoint(page, labels) { for (const label of labels) { const targets = [ page.getByRole("button", { name: label }).first(), page.getByRole("link", { name: label }).first(), page.getByText(label).first(), ]; for (const target of targets) { try { if (await target.count()) { await target.scrollIntoViewIfNeeded().catch(() => {}); await target.click({ timeout: CLICK_TIMEOUT_MS }); return label.toString(); } } catch { // Keep trying the next candidate. } } } throw new Error("Could not find a photo entry point."); } export async function waitForPhotoExperience(page, previousUrl, waitMs = DEFAULT_WAIT_MS) { await Promise.race([ page.waitForURL((url) => url.toString() !== previousUrl, { timeout: NAV_TIMEOUT_MS }).catch(() => {}), page.waitForLoadState("networkidle", { timeout: 15000 }).catch(() => {}), sleep(waitMs), ]); await sleep(waitMs); } export async function scrollUntilSettled(page, passes = MAX_SCROLL_PASSES) { let previousHeight = 0; for (let i = 0; i < passes; i += 1) { const currentHeight = await page.evaluate(() => { const root = document.scrollingElement || document.documentElement || document.body; return root ? root.scrollHeight : 0; }); await page.evaluate(() => { const root = document.scrollingElement || document.documentElement || document.body; if (root) root.scrollTo({ top: root.scrollHeight, behavior: "instant" }); }); await sleep(SCROLL_PAUSE_MS); if (currentHeight === previousHeight) { break; } previousHeight = currentHeight; } await page.evaluate(() => { const root = document.scrollingElement || document.documentElement || document.body; if (root) root.scrollTo({ top: 0, behavior: "instant" }); }); await sleep(250); } export function normalizeImageCandidates(candidates, options = {}) { const { hostIncludes = [], hostExcludes = [], pathnameIncludes = [], minWidth = LARGE_IMAGE_MIN_WIDTH, minHeight = LARGE_IMAGE_MIN_HEIGHT, } = options; const seen = new Set(); const normalized = []; for (const candidate of candidates || []) { const rawUrl = typeof candidate?.url === "string" ? candidate.url.trim() : ""; if (!rawUrl || rawUrl.startsWith("data:")) continue; let parsed; try { parsed = new URL(rawUrl); } catch { continue; } const host = parsed.hostname.toLowerCase(); const pathname = parsed.pathname.toLowerCase(); const width = Number(candidate.width || candidate.naturalWidth || 0); const height = Number(candidate.height || candidate.naturalHeight || 0); if (hostIncludes.length && !hostIncludes.some((part) => host.includes(part))) continue; if (hostExcludes.some((part) => host.includes(part))) continue; if (pathnameIncludes.length && !pathnameIncludes.some((part) => pathname.includes(part))) continue; if (width && width < minWidth) continue; if (height && height < minHeight) continue; parsed.hash = ""; parsed.search = ""; const canonical = parsed.toString(); if (seen.has(canonical)) continue; seen.add(canonical); normalized.push({ url: canonical, width, height, host, pathname, }); } return normalized; } export async function collectRenderedImageCandidates(page) { return page.evaluate(() => { const out = []; const addUrl = (url, width, height) => { if (!url) return; out.push({ url, width: Number(width || 0), height: Number(height || 0) }); }; const parseSrcset = (srcset) => { if (!srcset) return []; return srcset .split(",") .map((entry) => entry.trim().split(/\s+/)[0]) .filter(Boolean); }; const all = Array.from(document.querySelectorAll("img, source")); for (const node of all) { if (node instanceof HTMLImageElement) { addUrl(node.currentSrc || node.src, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight); for (const url of parseSrcset(node.srcset)) { addUrl(url, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight); } } else if (node instanceof HTMLSourceElement) { for (const url of parseSrcset(node.srcset)) { addUrl(url, 0, 0); } } } for (const anchor of Array.from(document.querySelectorAll("a[href]"))) { const href = anchor.getAttribute("href") || ""; if (/\.(?:jpg|jpeg|png|webp)(?:$|\?)/i.test(href)) { addUrl(href, 0, 0); } } return out; }); } export function buildResult({ requestedUrl, page, clickedLabel, imageUrls, source, notes = [], }) { return { source, requestedUrl, finalUrl: page.url(), title: null, clickedLabel, photoCount: imageUrls.length, imageUrls, notes, }; }