From ece8fc548f2e192389a0c3d249107a792388b701 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sat, 28 Mar 2026 03:40:50 -0500 Subject: [PATCH] Trust embedded Zillow photo sets without visible count --- docs/web-automation.md | 1 + skills/web-automation/SKILL.md | 1 + .../scripts/zillow-photo-data.js | 26 +++++++++++--- .../scripts/zillow-photo-data.test.mjs | 35 ++++++++++++++++++- .../web-automation/scripts/zillow-photos.js | 30 ++++++++++++++-- 5 files changed, 86 insertions(+), 7 deletions(-) diff --git a/docs/web-automation.md b/docs/web-automation.md index 8a5395d..62c0286 100644 --- a/docs/web-automation.md +++ b/docs/web-automation.md @@ -182,6 +182,7 @@ node zillow-photos.js "https://www.zillow.com/homedetails/4141-Whiteley-Dr-Corpu What it does: - opens the listing page with CloakBrowser - first checks whether the rendered listing shell already exposes a complete photo set in Zillow's embedded `__NEXT_DATA__` payload +- if the visible `See all XX photos` count is missing, still trusts the embedded set when the page metadata confirms the count or when the embedded set is already clearly substantial - only tries the `See all photos` / `See all X photos` entry point when the initial structured data is incomplete - returns direct `photos.zillowstatic.com` image URLs as JSON - fails fast with a timeout if the browser-backed extraction stalls diff --git a/skills/web-automation/SKILL.md b/skills/web-automation/SKILL.md index 6821aa2..a99649f 100644 --- a/skills/web-automation/SKILL.md +++ b/skills/web-automation/SKILL.md @@ -158,6 +158,7 @@ The discovery scripts are purpose-built for the common address-to-listing workfl The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow: - open the listing page - on Zillow, first inspect the rendered listing shell for a complete structured `__NEXT_DATA__` photo set +- if the visible page count is missing, trust the structured Zillow photo set when page metadata confirms the count or when the embedded set is already clearly substantial - only force the all-photos click path when the initial Zillow page data is incomplete - wait for the resulting photo page or scroller view when the click path is actually needed - extract direct image URLs from the rendered page diff --git a/skills/web-automation/scripts/zillow-photo-data.js b/skills/web-automation/scripts/zillow-photo-data.js index 8917aa9..f1d35ae 100644 --- a/skills/web-automation/scripts/zillow-photo-data.js +++ b/skills/web-automation/scripts/zillow-photo-data.js @@ -58,11 +58,29 @@ export function extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptT return out; } -export function shouldUseStructuredZillowPhotos(candidates, expectedPhotoCount) { +const DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT = 12; + +export function shouldUseStructuredZillowPhotos(candidates, options = {}) { const count = Array.isArray(candidates) ? candidates.length : 0; - if (!Number.isFinite(expectedPhotoCount) || expectedPhotoCount <= 0) { - return false; + const normalizedOptions = + typeof options === "number" + ? { expectedPhotoCount: options } + : options && typeof options === "object" + ? options + : {}; + const expectedPhotoCount = Number(normalizedOptions.expectedPhotoCount || 0); + const fallbackPhotoCount = Number(normalizedOptions.fallbackPhotoCount || 0); + const minimumTrustCount = Number( + normalizedOptions.minimumTrustCount || DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT + ); + + if (Number.isFinite(expectedPhotoCount) && expectedPhotoCount > 0) { + return count >= expectedPhotoCount; } - return count >= expectedPhotoCount; + if (Number.isFinite(fallbackPhotoCount) && fallbackPhotoCount > 0) { + return count >= fallbackPhotoCount; + } + + return count >= minimumTrustCount; } diff --git a/skills/web-automation/scripts/zillow-photo-data.test.mjs b/skills/web-automation/scripts/zillow-photo-data.test.mjs index 0db501f..75b8bff 100644 --- a/skills/web-automation/scripts/zillow-photo-data.test.mjs +++ b/skills/web-automation/scripts/zillow-photo-data.test.mjs @@ -84,5 +84,38 @@ test("shouldUseStructuredZillowPhotos returns false when structured photos are i url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, })); - assert.equal(shouldUseStructuredZillowPhotos(candidates, 29), false); + assert.equal(shouldUseStructuredZillowPhotos(candidates, { expectedPhotoCount: 29 }), false); +}); + +test("shouldUseStructuredZillowPhotos returns true when meta-description count matches even without visible page count", () => { + const candidates = Array.from({ length: 29 }, (_, index) => ({ + url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, + })); + + assert.equal( + shouldUseStructuredZillowPhotos(candidates, { expectedPhotoCount: null, fallbackPhotoCount: 29 }), + true + ); +}); + +test("shouldUseStructuredZillowPhotos returns true for a substantial structured set when no count signal is available", () => { + const candidates = Array.from({ length: 18 }, (_, index) => ({ + url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, + })); + + assert.equal( + shouldUseStructuredZillowPhotos(candidates, { expectedPhotoCount: null, fallbackPhotoCount: null }), + true + ); +}); + +test("shouldUseStructuredZillowPhotos returns false for a tiny structured set when no count signal is available", () => { + const candidates = Array.from({ length: 5 }, (_, index) => ({ + url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, + })); + + assert.equal( + shouldUseStructuredZillowPhotos(candidates, { expectedPhotoCount: null, fallbackPhotoCount: null }), + false + ); }); diff --git a/skills/web-automation/scripts/zillow-photos.js b/skills/web-automation/scripts/zillow-photos.js index 7a73813..7f3bd5f 100644 --- a/skills/web-automation/scripts/zillow-photos.js +++ b/skills/web-automation/scripts/zillow-photos.js @@ -35,6 +35,17 @@ async function getAnnouncedPhotoCount(page) { }); } +async function getMetaDescriptionPhotoCount(page) { + return page.evaluate(() => { + const content = + document.querySelector('meta[name="description"]')?.getAttribute("content") || + document.querySelector('meta[property="og:description"]')?.getAttribute("content") || + ""; + const match = content.match(/\b(\d+)\s+photos\b/i); + return match ? Number(match[1]) : null; + }); +} + function collapseZillowPhotos(candidates) { const byBaseId = new Map(); @@ -137,14 +148,29 @@ export async function extractZillowPhotos(rawUrl, options = {}) { await dismissCommonOverlays(page); const expectedPhotoCount = await getAnnouncedPhotoCount(page); + const fallbackPhotoCount = await getMetaDescriptionPhotoCount(page); const initialStructuredPhotos = normalizeZillowPhotos( await collectZillowStructuredPhotoCandidates(page) ); - if (shouldUseStructuredZillowPhotos(initialStructuredPhotos, expectedPhotoCount)) { + if ( + shouldUseStructuredZillowPhotos(initialStructuredPhotos, { + expectedPhotoCount, + fallbackPhotoCount, + }) + ) { const notes = [ "The rendered Zillow listing shell already exposed a complete structured photo set, so extraction completed without relying on the all-photos click path.", ]; + if (!expectedPhotoCount && fallbackPhotoCount) { + notes.push( + `Trusted the embedded Zillow photo set using the page metadata count of ${fallbackPhotoCount} photos.` + ); + } else if (!expectedPhotoCount && !fallbackPhotoCount) { + notes.push( + "Trusted a substantial embedded Zillow photo set even though the page did not expose an explicit photo count." + ); + } if (attempt > 1) { notes.push( "Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly." @@ -156,7 +182,7 @@ export async function extractZillowPhotos(rawUrl, options = {}) { finalUrl: page.url(), title: await page.title(), clickedLabel: null, - expectedPhotoCount, + expectedPhotoCount: expectedPhotoCount || fallbackPhotoCount || null, complete: true, photoCount: initialStructuredPhotos.length, imageUrls: initialStructuredPhotos.map((photo) => photo.url),