#!/usr/bin/env node import { clickPhotoEntryPoint, createPageSession, dismissCommonOverlays, fail, gotoListing, normalizeImageCandidates, parseTarget, scrollUntilSettled, sleep, waitForPhotoExperience, } from "./real-estate-photo-common.js"; import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js"; const ZILLOW_LABELS = [ /^See all(?: \d+)? photos$/i, /^See all photos$/i, /^Photos$/i, ]; async function getAnnouncedPhotoCount(page) { return page.evaluate(() => { const text = document.body?.innerText || ""; const match = text.match(/See all\s+(\d+)\s+photos/i); return match ? Number(match[1]) : null; }); } function collapseZillowPhotos(candidates) { const byBaseId = new Map(); for (const candidate of candidates) { const filename = candidate.pathname.split("/").pop() || ""; const baseId = filename.split("-")[0]; const sizeScore = (candidate.width || 0) * (candidate.height || 0) || candidate.width || candidate.height || 0; const preference = /-p_d\.(?:jpe?g|webp)$/i.test(candidate.url) ? 10_000_000 : Number(candidate.url.match(/-cc_ft_(\d+)\./i)?.[1] || 0); const score = preference + sizeScore; const existing = byBaseId.get(baseId); const existingSizeScore = existing ? (existing.width || 0) * (existing.height || 0) || existing.width || existing.height || 0 : -1; const existingPreference = existing ? /-p_d\.(?:jpe?g|webp)$/i.test(existing.url) ? 10_000_000 : Number(existing.url.match(/-cc_ft_(\d+)\./i)?.[1] || 0) : 0; const existingScore = existing ? existingPreference + existingSizeScore : -1; if (!existing || score > existingScore) { byBaseId.set(baseId, candidate); } } return Array.from(byBaseId.values()).sort((a, b) => a.url.localeCompare(b.url)); } async function collectZillowPhotoCandidates(page) { return page.evaluate(() => { const out = []; const add = (url, width, height) => { if (url) out.push({ url, width: Number(width || 0), height: Number(height || 0) }); }; const parseSrcset = (srcset) => (srcset || "") .split(",") .map((entry) => entry.trim().split(/\s+/)[0]) .filter(Boolean); const selectors = [ ".media-stream-tile img", ".media-stream-tile source", '[class*="media-stream"] img', '[class*="media-stream"] source', 'img[alt*="image of "]', 'img[alt*="image of this home"]', ]; const nodes = selectors.flatMap((selector) => Array.from(document.querySelectorAll(selector))); for (const node of nodes) { if (node instanceof HTMLImageElement) { add(node.currentSrc || node.src, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight); for (const url of parseSrcset(node.srcset)) { add(url, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight); } } else if (node instanceof HTMLSourceElement) { for (const url of parseSrcset(node.srcset)) { add(url, 0, 0); } } } return out; }); } async function collectZillowStructuredPhotoCandidates(page) { const scriptText = await page.locator("#__NEXT_DATA__").textContent().catch(() => null); return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || ""); } async function main() { const requestedUrl = parseTarget(process.argv[2]); const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); try { await gotoListing(page, requestedUrl); await dismissCommonOverlays(page); const expectedPhotoCount = await getAnnouncedPhotoCount(page); const beforeUrl = page.url(); let clickedLabel = null; let clickError = null; try { clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS); await waitForPhotoExperience(page, beforeUrl); await scrollUntilSettled(page); await sleep(1200); } catch (error) { clickError = error instanceof Error ? error.message : String(error); } const [structuredCandidates, renderedCandidates] = await Promise.all([ collectZillowStructuredPhotoCandidates(page), collectZillowPhotoCandidates(page), ]); const candidates = [...structuredCandidates, ...renderedCandidates]; const normalized = normalizeImageCandidates(candidates, { hostIncludes: ["photos.zillowstatic.com"], minWidth: 240, minHeight: 180, }); const photos = collapseZillowPhotos(normalized); if (!photos.length) { fail( "Zillow photo extraction failed.", clickError || "No Zillow image URLs were found on the rendered listing page." ); } const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true; const notes = []; if (clickedLabel) { notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs."); } else { notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path."); } if (clickError) { notes.push(`All-photos click path was not required: ${clickError}`); } const result = { source: "zillow", requestedUrl, finalUrl: page.url(), title: await page.title(), clickedLabel, expectedPhotoCount, complete, photoCount: photos.length, imageUrls: photos.map((photo) => photo.url), notes, }; process.stdout.write(`${JSON.stringify(result, null, 2)}\n`); await context.close(); } catch (error) { try { await context.close(); } catch { // Ignore close errors after the primary failure. } fail("Zillow photo extraction failed.", error instanceof Error ? error.message : String(error)); } } main();