Add Zillow and HAR photo extractors

This commit is contained in:
2026-03-27 17:35:46 -05:00
parent e7c56fe760
commit eeea0c8ef1
11 changed files with 873 additions and 8 deletions

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env node
import {
clickPhotoEntryPoint,
collectRenderedImageCandidates,
createPageSession,
dismissCommonOverlays,
fail,
gotoListing,
normalizeImageCandidates,
parseTarget,
scrollUntilSettled,
sleep,
waitForPhotoExperience,
} from "./real-estate-photo-common.js";
const HAR_LABELS = [
/^Show all photos$/i,
/^View all photos$/i,
/^All photos$/i,
/^Photos$/i,
];
async function getAnnouncedPhotoCount(page) {
return page.evaluate(() => {
const text = document.body?.innerText || "";
const match = text.match(/(\d+)\s+photos/i);
return match ? Number(match[1]) : null;
});
}
async function main() {
const requestedUrl = parseTarget(process.argv[2]);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
try {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const beforeUrl = page.url();
const clickedLabel = await clickPhotoEntryPoint(page, HAR_LABELS);
await waitForPhotoExperience(page, beforeUrl);
await scrollUntilSettled(page);
await sleep(1200);
const candidates = await collectRenderedImageCandidates(page);
const photos = normalizeImageCandidates(candidates, {
hostIncludes: ["pics.harstatic.com", "photos.harstatic.com"],
minWidth: 240,
minHeight: 180,
});
if (!photos.length) {
fail("HAR photo extraction failed.", "No large image URLs were found after opening the HAR all-photos view.");
}
const result = {
source: "har",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel,
expectedPhotoCount,
complete: expectedPhotoCount ? photos.length >= expectedPhotoCount : true,
photoCount: photos.length,
imageUrls: photos.map((photo) => photo.url),
notes: ["Opened HAR all-photos flow and extracted large rendered image URLs from the photo page."],
};
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
await context.close();
} catch (error) {
try {
await context.close();
} catch {
// Ignore close errors after the primary failure.
}
fail("HAR photo extraction failed.", error instanceof Error ? error.message : String(error));
}
}
main();