Prefer structured Zillow photo data before click path

This commit is contained in:
2026-03-28 03:30:42 -05:00
parent 54854edfc6
commit 446d43cc78
5 changed files with 75 additions and 13 deletions

View File

@@ -16,7 +16,10 @@ import {
sleep,
waitForPhotoExperience,
} from "./real-estate-photo-common.js";
import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js";
import {
extractZillowStructuredPhotoCandidatesFromNextDataScript,
shouldUseStructuredZillowPhotos,
} from "./zillow-photo-data.js";
const ZILLOW_LABELS = [
/^See all(?: \d+)? photos$/i,
@@ -106,6 +109,15 @@ async function collectZillowStructuredPhotoCandidates(page) {
return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || "");
}
function normalizeZillowPhotos(candidates) {
const normalized = normalizeImageCandidates(candidates, {
hostIncludes: ["photos.zillowstatic.com"],
minWidth: 240,
minHeight: 180,
});
return collapseZillowPhotos(normalized);
}
export async function extractZillowPhotos(rawUrl, options = {}) {
const requestedUrl = parseTarget(rawUrl);
const maxAttempts = 2;
@@ -125,6 +137,33 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
await dismissCommonOverlays(page);
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const initialStructuredPhotos = normalizeZillowPhotos(
await collectZillowStructuredPhotoCandidates(page)
);
if (shouldUseStructuredZillowPhotos(initialStructuredPhotos, expectedPhotoCount)) {
const notes = [
"The rendered Zillow listing shell already exposed a complete structured photo set, so extraction completed without relying on the all-photos click path.",
];
if (attempt > 1) {
notes.push(
"Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly."
);
}
return {
source: "zillow",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel: null,
expectedPhotoCount,
complete: true,
photoCount: initialStructuredPhotos.length,
imageUrls: initialStructuredPhotos.map((photo) => photo.url),
notes,
};
}
const beforeUrl = page.url();
let clickedLabel = null;
let clickError = null;
@@ -142,13 +181,7 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
collectZillowStructuredPhotoCandidates(page),
collectZillowPhotoCandidates(page),
]);
const candidates = [...structuredCandidates, ...renderedCandidates];
const normalized = normalizeImageCandidates(candidates, {
hostIncludes: ["photos.zillowstatic.com"],
minWidth: 240,
minHeight: 180,
});
const photos = collapseZillowPhotos(normalized);
const photos = normalizeZillowPhotos([...structuredCandidates, ...renderedCandidates]);
if (!photos.length) {
fail(