Trust embedded Zillow photo sets without visible count
This commit is contained in:
@@ -35,6 +35,17 @@ async function getAnnouncedPhotoCount(page) {
|
||||
});
|
||||
}
|
||||
|
||||
async function getMetaDescriptionPhotoCount(page) {
|
||||
return page.evaluate(() => {
|
||||
const content =
|
||||
document.querySelector('meta[name="description"]')?.getAttribute("content") ||
|
||||
document.querySelector('meta[property="og:description"]')?.getAttribute("content") ||
|
||||
"";
|
||||
const match = content.match(/\b(\d+)\s+photos\b/i);
|
||||
return match ? Number(match[1]) : null;
|
||||
});
|
||||
}
|
||||
|
||||
function collapseZillowPhotos(candidates) {
|
||||
const byBaseId = new Map();
|
||||
|
||||
@@ -137,14 +148,29 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
await dismissCommonOverlays(page);
|
||||
|
||||
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
|
||||
const fallbackPhotoCount = await getMetaDescriptionPhotoCount(page);
|
||||
const initialStructuredPhotos = normalizeZillowPhotos(
|
||||
await collectZillowStructuredPhotoCandidates(page)
|
||||
);
|
||||
|
||||
if (shouldUseStructuredZillowPhotos(initialStructuredPhotos, expectedPhotoCount)) {
|
||||
if (
|
||||
shouldUseStructuredZillowPhotos(initialStructuredPhotos, {
|
||||
expectedPhotoCount,
|
||||
fallbackPhotoCount,
|
||||
})
|
||||
) {
|
||||
const notes = [
|
||||
"The rendered Zillow listing shell already exposed a complete structured photo set, so extraction completed without relying on the all-photos click path.",
|
||||
];
|
||||
if (!expectedPhotoCount && fallbackPhotoCount) {
|
||||
notes.push(
|
||||
`Trusted the embedded Zillow photo set using the page metadata count of ${fallbackPhotoCount} photos.`
|
||||
);
|
||||
} else if (!expectedPhotoCount && !fallbackPhotoCount) {
|
||||
notes.push(
|
||||
"Trusted a substantial embedded Zillow photo set even though the page did not expose an explicit photo count."
|
||||
);
|
||||
}
|
||||
if (attempt > 1) {
|
||||
notes.push(
|
||||
"Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly."
|
||||
@@ -156,7 +182,7 @@ export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
finalUrl: page.url(),
|
||||
title: await page.title(),
|
||||
clickedLabel: null,
|
||||
expectedPhotoCount,
|
||||
expectedPhotoCount: expectedPhotoCount || fallbackPhotoCount || null,
|
||||
complete: true,
|
||||
photoCount: initialStructuredPhotos.length,
|
||||
imageUrls: initialStructuredPhotos.map((photo) => photo.url),
|
||||
|
||||
Reference in New Issue
Block a user