Harden assessor fallback after Zillow photo failure

This commit is contained in:
2026-03-28 03:17:51 -05:00
parent 3335e96d35
commit 54854edfc6
5 changed files with 115 additions and 74 deletions

View File

@@ -8,6 +8,7 @@ import {
dismissCommonOverlays,
fail,
gotoListing,
isPageClosedError,
normalizeImageCandidates,
parseTarget,
runWithOperationTimeout,
@@ -107,85 +108,98 @@ async function collectZillowStructuredPhotoCandidates(page) {
export async function extractZillowPhotos(rawUrl, options = {}) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
await context.close().catch(() => {});
};
const maxAttempts = 2;
let lastError = null;
try {
return await runWithOperationTimeout(
"Zillow photo extraction",
async () => {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
await context.close().catch(() => {});
};
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const beforeUrl = page.url();
let clickedLabel = null;
let clickError = null;
try {
return await runWithOperationTimeout(
"Zillow photo extraction",
async () => {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
try {
clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS);
await waitForPhotoExperience(page, beforeUrl);
await scrollUntilSettled(page);
await sleep(1200);
} catch (error) {
clickError = error instanceof Error ? error.message : String(error);
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const beforeUrl = page.url();
let clickedLabel = null;
let clickError = null;
try {
clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS);
await waitForPhotoExperience(page, beforeUrl);
await scrollUntilSettled(page);
await sleep(1200);
} catch (error) {
clickError = error instanceof Error ? error.message : String(error);
}
const [structuredCandidates, renderedCandidates] = await Promise.all([
collectZillowStructuredPhotoCandidates(page),
collectZillowPhotoCandidates(page),
]);
const candidates = [...structuredCandidates, ...renderedCandidates];
const normalized = normalizeImageCandidates(candidates, {
hostIncludes: ["photos.zillowstatic.com"],
minWidth: 240,
minHeight: 180,
});
const photos = collapseZillowPhotos(normalized);
if (!photos.length) {
fail(
"Zillow photo extraction failed.",
clickError || "No Zillow image URLs were found on the rendered listing page."
);
}
const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true;
const notes = [];
if (clickedLabel) {
notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs.");
} else {
notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path.");
}
if (clickError) {
notes.push(`All-photos click path was not required: ${clickError}`);
}
if (attempt > 1) {
notes.push(`Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly.`);
}
return {
source: "zillow",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel,
expectedPhotoCount,
complete,
photoCount: photos.length,
imageUrls: photos.map((photo) => photo.url),
notes,
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
}
const [structuredCandidates, renderedCandidates] = await Promise.all([
collectZillowStructuredPhotoCandidates(page),
collectZillowPhotoCandidates(page),
]);
const candidates = [...structuredCandidates, ...renderedCandidates];
const normalized = normalizeImageCandidates(candidates, {
hostIncludes: ["photos.zillowstatic.com"],
minWidth: 240,
minHeight: 180,
});
const photos = collapseZillowPhotos(normalized);
if (!photos.length) {
fail(
"Zillow photo extraction failed.",
clickError || "No Zillow image URLs were found on the rendered listing page."
);
}
const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true;
const notes = [];
if (clickedLabel) {
notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs.");
} else {
notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path.");
}
if (clickError) {
notes.push(`All-photos click path was not required: ${clickError}`);
}
return {
source: "zillow",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel,
expectedPhotoCount,
complete,
photoCount: photos.length,
imageUrls: photos.map((photo) => photo.url),
notes,
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
);
} catch (error) {
lastError = error;
if (!(attempt < maxAttempts && isPageClosedError(error))) {
throw new Error(error instanceof Error ? error.message : String(error));
}
);
} catch (error) {
throw new Error(error instanceof Error ? error.message : String(error));
} finally {
await closeContext();
} finally {
await closeContext();
}
}
throw new Error(lastError instanceof Error ? lastError.message : String(lastError));
}
async function main() {