Prefer structured Zillow photo data before click path
This commit is contained in:
@@ -16,7 +16,10 @@ import {
|
||||
sleep,
|
||||
waitForPhotoExperience,
|
||||
} from "./real-estate-photo-common.js";
|
||||
import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js";
|
||||
import {
|
||||
extractZillowStructuredPhotoCandidatesFromNextDataScript,
|
||||
shouldUseStructuredZillowPhotos,
|
||||
} from "./zillow-photo-data.js";
|
||||
|
||||
const ZILLOW_LABELS = [
|
||||
/^See all(?: \d+)? photos$/i,
|
||||
@@ -106,6 +109,15 @@ async function collectZillowStructuredPhotoCandidates(page) {
|
||||
return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || "");
|
||||
}
|
||||
|
||||
function normalizeZillowPhotos(candidates) {
|
||||
const normalized = normalizeImageCandidates(candidates, {
|
||||
hostIncludes: ["photos.zillowstatic.com"],
|
||||
minWidth: 240,
|
||||
minHeight: 180,
|
||||
});
|
||||
return collapseZillowPhotos(normalized);
|
||||
}
|
||||
|
||||
export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
const requestedUrl = parseTarget(rawUrl);
|
||||
const maxAttempts = 2;
|
||||
@@ -125,6 +137,33 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
await dismissCommonOverlays(page);
|
||||
|
||||
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
|
||||
const initialStructuredPhotos = normalizeZillowPhotos(
|
||||
await collectZillowStructuredPhotoCandidates(page)
|
||||
);
|
||||
|
||||
if (shouldUseStructuredZillowPhotos(initialStructuredPhotos, expectedPhotoCount)) {
|
||||
const notes = [
|
||||
"The rendered Zillow listing shell already exposed a complete structured photo set, so extraction completed without relying on the all-photos click path.",
|
||||
];
|
||||
if (attempt > 1) {
|
||||
notes.push(
|
||||
"Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly."
|
||||
);
|
||||
}
|
||||
return {
|
||||
source: "zillow",
|
||||
requestedUrl,
|
||||
finalUrl: page.url(),
|
||||
title: await page.title(),
|
||||
clickedLabel: null,
|
||||
expectedPhotoCount,
|
||||
complete: true,
|
||||
photoCount: initialStructuredPhotos.length,
|
||||
imageUrls: initialStructuredPhotos.map((photo) => photo.url),
|
||||
notes,
|
||||
};
|
||||
}
|
||||
|
||||
const beforeUrl = page.url();
|
||||
let clickedLabel = null;
|
||||
let clickError = null;
|
||||
@@ -142,13 +181,7 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
collectZillowStructuredPhotoCandidates(page),
|
||||
collectZillowPhotoCandidates(page),
|
||||
]);
|
||||
const candidates = [...structuredCandidates, ...renderedCandidates];
|
||||
const normalized = normalizeImageCandidates(candidates, {
|
||||
hostIncludes: ["photos.zillowstatic.com"],
|
||||
minWidth: 240,
|
||||
minHeight: 180,
|
||||
});
|
||||
const photos = collapseZillowPhotos(normalized);
|
||||
const photos = normalizeZillowPhotos([...structuredCandidates, ...renderedCandidates]);
|
||||
|
||||
if (!photos.length) {
|
||||
fail(
|
||||
|
||||
Reference in New Issue
Block a user