Files
stef-openclaw-skills/skills/web-automation/scripts/zillow-photos.js

196 lines
6.2 KiB
JavaScript

#!/usr/bin/env node
import { pathToFileURL } from "node:url";
import {
clickPhotoEntryPoint,
createPageSession,
dismissCommonOverlays,
fail,
gotoListing,
normalizeImageCandidates,
parseTarget,
scrollUntilSettled,
sleep,
waitForPhotoExperience,
} from "./real-estate-photo-common.js";
import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js";
const ZILLOW_LABELS = [
/^See all(?: \d+)? photos$/i,
/^See all photos$/i,
/^Photos$/i,
];
async function getAnnouncedPhotoCount(page) {
return page.evaluate(() => {
const text = document.body?.innerText || "";
const match = text.match(/See all\s+(\d+)\s+photos/i);
return match ? Number(match[1]) : null;
});
}
function collapseZillowPhotos(candidates) {
const byBaseId = new Map();
for (const candidate of candidates) {
const filename = candidate.pathname.split("/").pop() || "";
const baseId = filename.split("-")[0];
const sizeScore = (candidate.width || 0) * (candidate.height || 0) || candidate.width || candidate.height || 0;
const preference = /-p_d\.(?:jpe?g|webp)$/i.test(candidate.url)
? 10_000_000
: Number(candidate.url.match(/-cc_ft_(\d+)\./i)?.[1] || 0);
const score = preference + sizeScore;
const existing = byBaseId.get(baseId);
const existingSizeScore = existing
? (existing.width || 0) * (existing.height || 0) || existing.width || existing.height || 0
: -1;
const existingPreference = existing
? /-p_d\.(?:jpe?g|webp)$/i.test(existing.url)
? 10_000_000
: Number(existing.url.match(/-cc_ft_(\d+)\./i)?.[1] || 0)
: 0;
const existingScore = existing ? existingPreference + existingSizeScore : -1;
if (!existing || score > existingScore) {
byBaseId.set(baseId, candidate);
}
}
return Array.from(byBaseId.values()).sort((a, b) => a.url.localeCompare(b.url));
}
async function collectZillowPhotoCandidates(page) {
return page.evaluate(() => {
const out = [];
const add = (url, width, height) => {
if (url) out.push({ url, width: Number(width || 0), height: Number(height || 0) });
};
const parseSrcset = (srcset) =>
(srcset || "")
.split(",")
.map((entry) => entry.trim().split(/\s+/)[0])
.filter(Boolean);
const selectors = [
".media-stream-tile img",
".media-stream-tile source",
'[class*="media-stream"] img',
'[class*="media-stream"] source',
'img[alt*="image of "]',
'img[alt*="image of this home"]',
];
const nodes = selectors.flatMap((selector) => Array.from(document.querySelectorAll(selector)));
for (const node of nodes) {
if (node instanceof HTMLImageElement) {
add(node.currentSrc || node.src, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight);
for (const url of parseSrcset(node.srcset)) {
add(url, node.naturalWidth || node.clientWidth, node.naturalHeight || node.clientHeight);
}
} else if (node instanceof HTMLSourceElement) {
for (const url of parseSrcset(node.srcset)) {
add(url, 0, 0);
}
}
}
return out;
});
}
async function collectZillowStructuredPhotoCandidates(page) {
const scriptText = await page.locator("#__NEXT_DATA__").textContent().catch(() => null);
return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || "");
}
export async function extractZillowPhotos(rawUrl) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
try {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const beforeUrl = page.url();
let clickedLabel = null;
let clickError = null;
try {
clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS);
await waitForPhotoExperience(page, beforeUrl);
await scrollUntilSettled(page);
await sleep(1200);
} catch (error) {
clickError = error instanceof Error ? error.message : String(error);
}
const [structuredCandidates, renderedCandidates] = await Promise.all([
collectZillowStructuredPhotoCandidates(page),
collectZillowPhotoCandidates(page),
]);
const candidates = [...structuredCandidates, ...renderedCandidates];
const normalized = normalizeImageCandidates(candidates, {
hostIncludes: ["photos.zillowstatic.com"],
minWidth: 240,
minHeight: 180,
});
const photos = collapseZillowPhotos(normalized);
if (!photos.length) {
fail(
"Zillow photo extraction failed.",
clickError || "No Zillow image URLs were found on the rendered listing page."
);
}
const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true;
const notes = [];
if (clickedLabel) {
notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs.");
} else {
notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path.");
}
if (clickError) {
notes.push(`All-photos click path was not required: ${clickError}`);
}
const result = {
source: "zillow",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel,
expectedPhotoCount,
complete,
photoCount: photos.length,
imageUrls: photos.map((photo) => photo.url),
notes,
};
await context.close();
return result;
} catch (error) {
try {
await context.close();
} catch {
// Ignore close errors after the primary failure.
}
throw new Error(error instanceof Error ? error.message : String(error));
}
}
async function main() {
try {
const result = await extractZillowPhotos(process.argv[2]);
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} catch (error) {
fail("Zillow photo extraction failed.", error instanceof Error ? error.message : String(error));
}
}
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
main();
}