From 446d43cc78eb803f7b4f56af4a77956ca98ac511 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sat, 28 Mar 2026 03:30:42 -0500 Subject: [PATCH] Prefer structured Zillow photo data before click path --- docs/web-automation.md | 4 +- skills/web-automation/SKILL.md | 5 +- .../scripts/zillow-photo-data.js | 9 ++++ .../scripts/zillow-photo-data.test.mjs | 21 +++++++- .../web-automation/scripts/zillow-photos.js | 49 ++++++++++++++++--- 5 files changed, 75 insertions(+), 13 deletions(-) diff --git a/docs/web-automation.md b/docs/web-automation.md index d3dee51..8a5395d 100644 --- a/docs/web-automation.md +++ b/docs/web-automation.md @@ -181,8 +181,8 @@ node zillow-photos.js "https://www.zillow.com/homedetails/4141-Whiteley-Dr-Corpu What it does: - opens the listing page with CloakBrowser -- tries the `See all photos` / `See all X photos` entry point -- if Zillow keeps the click path flaky, falls back to the listing's embedded `__NEXT_DATA__` payload +- first checks whether the rendered listing shell already exposes a complete photo set in Zillow's embedded `__NEXT_DATA__` payload +- only tries the `See all photos` / `See all X photos` entry point when the initial structured data is incomplete - returns direct `photos.zillowstatic.com` image URLs as JSON - fails fast with a timeout if the browser-backed extraction stalls diff --git a/skills/web-automation/SKILL.md b/skills/web-automation/SKILL.md index 8cc18a3..6821aa2 100644 --- a/skills/web-automation/SKILL.md +++ b/skills/web-automation/SKILL.md @@ -157,8 +157,9 @@ The discovery scripts are purpose-built for the common address-to-listing workfl The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow: - open the listing page -- click the all-photos entry point -- wait for the resulting photo page or scroller view +- on Zillow, first inspect the rendered listing shell for a complete structured `__NEXT_DATA__` photo set +- only force the all-photos click path when the initial Zillow page data is incomplete +- wait for the resulting photo page or scroller view when the click path is actually needed - extract direct image URLs from the rendered page - fail fast with a timeout instead of hanging indefinitely when the browser-backed extraction stalls - support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow renders diff --git a/skills/web-automation/scripts/zillow-photo-data.js b/skills/web-automation/scripts/zillow-photo-data.js index e870929..8917aa9 100644 --- a/skills/web-automation/scripts/zillow-photo-data.js +++ b/skills/web-automation/scripts/zillow-photo-data.js @@ -57,3 +57,12 @@ export function extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptT return out; } + +export function shouldUseStructuredZillowPhotos(candidates, expectedPhotoCount) { + const count = Array.isArray(candidates) ? candidates.length : 0; + if (!Number.isFinite(expectedPhotoCount) || expectedPhotoCount <= 0) { + return false; + } + + return count >= expectedPhotoCount; +} diff --git a/skills/web-automation/scripts/zillow-photo-data.test.mjs b/skills/web-automation/scripts/zillow-photo-data.test.mjs index 15594d7..0db501f 100644 --- a/skills/web-automation/scripts/zillow-photo-data.test.mjs +++ b/skills/web-automation/scripts/zillow-photo-data.test.mjs @@ -1,7 +1,10 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js"; +import { + extractZillowStructuredPhotoCandidatesFromNextDataScript, + shouldUseStructuredZillowPhotos, +} from "./zillow-photo-data.js"; test("extractZillowStructuredPhotoCandidatesFromNextDataScript reads responsivePhotos", () => { const scriptText = JSON.stringify({ @@ -67,3 +70,19 @@ test("extractZillowStructuredPhotoCandidatesFromNextDataScript falls back to mix { url: "https://photos.zillowstatic.com/fp/photo-one-cc_ft_1536.jpg", width: 1536 }, ]); }); + +test("shouldUseStructuredZillowPhotos returns true when structured photos already match the announced count", () => { + const candidates = Array.from({ length: 29 }, (_, index) => ({ + url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, + })); + + assert.equal(shouldUseStructuredZillowPhotos(candidates, 29), true); +}); + +test("shouldUseStructuredZillowPhotos returns false when structured photos are incomplete for the announced count", () => { + const candidates = Array.from({ length: 12 }, (_, index) => ({ + url: `https://photos.zillowstatic.com/fp/photo-${index + 1}-p_d.jpg`, + })); + + assert.equal(shouldUseStructuredZillowPhotos(candidates, 29), false); +}); diff --git a/skills/web-automation/scripts/zillow-photos.js b/skills/web-automation/scripts/zillow-photos.js index a0d92e1..7a73813 100644 --- a/skills/web-automation/scripts/zillow-photos.js +++ b/skills/web-automation/scripts/zillow-photos.js @@ -16,7 +16,10 @@ import { sleep, waitForPhotoExperience, } from "./real-estate-photo-common.js"; -import { extractZillowStructuredPhotoCandidatesFromNextDataScript } from "./zillow-photo-data.js"; +import { + extractZillowStructuredPhotoCandidatesFromNextDataScript, + shouldUseStructuredZillowPhotos, +} from "./zillow-photo-data.js"; const ZILLOW_LABELS = [ /^See all(?: \d+)? photos$/i, @@ -106,6 +109,15 @@ async function collectZillowStructuredPhotoCandidates(page) { return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || ""); } +function normalizeZillowPhotos(candidates) { + const normalized = normalizeImageCandidates(candidates, { + hostIncludes: ["photos.zillowstatic.com"], + minWidth: 240, + minHeight: 180, + }); + return collapseZillowPhotos(normalized); +} + export async function extractZillowPhotos(rawUrl, options = {}) { const requestedUrl = parseTarget(rawUrl); const maxAttempts = 2; @@ -125,6 +137,33 @@ export async function extractZillowPhotos(rawUrl, options = {}) { await dismissCommonOverlays(page); const expectedPhotoCount = await getAnnouncedPhotoCount(page); + const initialStructuredPhotos = normalizeZillowPhotos( + await collectZillowStructuredPhotoCandidates(page) + ); + + if (shouldUseStructuredZillowPhotos(initialStructuredPhotos, expectedPhotoCount)) { + const notes = [ + "The rendered Zillow listing shell already exposed a complete structured photo set, so extraction completed without relying on the all-photos click path.", + ]; + if (attempt > 1) { + notes.push( + "Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly." + ); + } + return { + source: "zillow", + requestedUrl, + finalUrl: page.url(), + title: await page.title(), + clickedLabel: null, + expectedPhotoCount, + complete: true, + photoCount: initialStructuredPhotos.length, + imageUrls: initialStructuredPhotos.map((photo) => photo.url), + notes, + }; + } + const beforeUrl = page.url(); let clickedLabel = null; let clickError = null; @@ -142,13 +181,7 @@ export async function extractZillowPhotos(rawUrl, options = {}) { collectZillowStructuredPhotoCandidates(page), collectZillowPhotoCandidates(page), ]); - const candidates = [...structuredCandidates, ...renderedCandidates]; - const normalized = normalizeImageCandidates(candidates, { - hostIncludes: ["photos.zillowstatic.com"], - minWidth: 240, - minHeight: 180, - }); - const photos = collapseZillowPhotos(normalized); + const photos = normalizeZillowPhotos([...structuredCandidates, ...renderedCandidates]); if (!photos.length) { fail(