From 8fe451e8d0180841116b001f9e83ecb44b9d9b2c Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sat, 28 Mar 2026 02:28:30 -0500 Subject: [PATCH] Fix slower Zillow unit photo discovery path --- docs/property-assessor.md | 2 + docs/web-automation.md | 6 + skills/property-assessor/SKILL.md | 2 + .../src/listing-discovery.ts | 105 +++++++++++------- skills/property-assessor/src/photo-review.ts | 22 +++- .../tests/timeout-guards.test.ts | 65 +++++++++++ skills/web-automation/SKILL.md | 2 + skills/web-automation/scripts/har-discover.js | 3 +- skills/web-automation/scripts/har-photos.js | 3 +- .../web-automation/scripts/zillow-discover.js | 3 +- .../web-automation/scripts/zillow-photos.js | 3 +- 11 files changed, 167 insertions(+), 49 deletions(-) diff --git a/docs/property-assessor.md b/docs/property-assessor.md index db440e0..9a3871c 100644 --- a/docs/property-assessor.md +++ b/docs/property-assessor.md @@ -95,7 +95,9 @@ Current behavior: - keeps CAD-site selection address-driven and jurisdiction-specific; it does not hardcode one county's CAD as the global source - when a supported official CAD detail host is found, captures direct property facts such as property ID/account, owner, legal description, assessed value, exemptions, and the official property-detail URL - automatically tries to discover Zillow and HAR listing URLs from the address when no listing URL is provided +- starts Zillow and HAR listing discovery in parallel so HAR can already be in flight if Zillow misses or stalls - runs Zillow photo extraction first, then HAR as fallback when available +- gives Zillow a longer source-specific discovery/photo window than the generic fallback path, because some exact-unit Zillow pages resolve more slowly than HAR or public-record lookups - reuses the OpenClaw web-automation logic in-process instead of spawning nested helper commands - fails fast when Zillow/HAR discovery or photo extraction stalls instead of hanging indefinitely - returns a structured preliminary report payload diff --git a/docs/web-automation.md b/docs/web-automation.md index 6cb9f5d..d3dee51 100644 --- a/docs/web-automation.md +++ b/docs/web-automation.md @@ -155,6 +155,9 @@ What it does: - returns the discovered listing URL as JSON - fails fast with a timeout if the browser-backed discovery stalls +Operational note: +- when imported by `property-assessor`, Zillow discovery is allowed a longer source-specific timeout than the generic helper default, because some exact-unit Zillow pages resolve more slowly than the basic search/listing flow + ### HAR discovery ```bash @@ -183,6 +186,9 @@ What it does: - returns direct `photos.zillowstatic.com` image URLs as JSON - fails fast with a timeout if the browser-backed extraction stalls +Operational note: +- when imported by `property-assessor`, Zillow photo extraction is allowed a longer source-specific timeout than the generic helper default, because some exact-unit Zillow listings expose the correct photo set only after a slower render path + Expected success shape: - `complete: true` - `expectedPhotoCount` matches `photoCount` diff --git a/skills/property-assessor/SKILL.md b/skills/property-assessor/SKILL.md index 6855acb..4ec5447 100644 --- a/skills/property-assessor/SKILL.md +++ b/skills/property-assessor/SKILL.md @@ -115,7 +115,9 @@ scripts/property-assessor render-report --input "" --output - resolve official public-record jurisdiction automatically from the address - keep CAD discovery jurisdiction-specific from the address; do not hardcode one county CAD for every property - try to discover Zillow and HAR listing URLs from the address when no listing URL is provided +- start Zillow and HAR discovery in parallel, while still preferring Zillow first for the photo-review path - run the approval-safe Zillow/HAR photo extractor chain automatically +- allow slower exact-unit Zillow pages a longer source-specific discovery/photo window before giving up and falling back - build a purpose-aware report payload - complete the analysis without requiring recipient email(s) - only stop and ask for recipient email(s) when the user is explicitly rendering or sending the PDF diff --git a/skills/property-assessor/src/listing-discovery.ts b/skills/property-assessor/src/listing-discovery.ts index 0d4acf4..8e2ca31 100644 --- a/skills/property-assessor/src/listing-discovery.ts +++ b/skills/property-assessor/src/listing-discovery.ts @@ -10,6 +10,8 @@ export interface ListingDiscoveryResult { interface ListingDiscoveryDeps { timeoutMs?: number; + zillowTimeoutMs?: number; + harTimeoutMs?: number; discoverZillowListingFn?: typeof discoverZillowListing; discoverHarListingFn?: typeof discoverHarListing; } @@ -17,61 +19,82 @@ interface ListingDiscoveryDeps { const DEFAULT_DISCOVERY_TIMEOUT_MS = Number( process.env.PROPERTY_ASSESSOR_DISCOVERY_TIMEOUT_MS || 20_000 ); +const DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS = Number( + process.env.PROPERTY_ASSESSOR_ZILLOW_DISCOVERY_TIMEOUT_MS || 60_000 +); +const DEFAULT_HAR_DISCOVERY_TIMEOUT_MS = Number( + process.env.PROPERTY_ASSESSOR_HAR_DISCOVERY_TIMEOUT_MS || DEFAULT_DISCOVERY_TIMEOUT_MS +); + +interface SourceDiscoveryOutcome { + source: "zillow" | "har"; + url: string | null; + attempts: string[]; +} export async function discoverListingSources( address: string, deps: ListingDiscoveryDeps = {} ): Promise { - const attempts: string[] = []; - let zillowUrl: string | null = null; - let harUrl: string | null = null; const timeoutMs = deps.timeoutMs ?? DEFAULT_DISCOVERY_TIMEOUT_MS; + const zillowTimeoutMs = + deps.zillowTimeoutMs ?? + (deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS); + const harTimeoutMs = + deps.harTimeoutMs ?? + (deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_DISCOVERY_TIMEOUT_MS); const discoverZillowListingFn = deps.discoverZillowListingFn || discoverZillowListing; const discoverHarListingFn = deps.discoverHarListingFn || discoverHarListing; - try { - const result = await withTimeout( - () => discoverZillowListingFn(address), - { - operationName: "Zillow discovery", - timeoutMs + const runSource = async ( + source: "zillow" | "har", + timeoutForSourceMs: number, + operation: () => Promise<{ listingUrl: string | null; attempts: string[] }> + ): Promise => { + try { + const result = await withTimeout(operation, { + operationName: `${source === "zillow" ? "Zillow" : "HAR"} discovery`, + timeoutMs: timeoutForSourceMs + }); + return { + source, + url: result.listingUrl, + attempts: result.attempts + }; + } catch (error) { + if (error instanceof TimeoutError) { + return { + source, + url: null, + attempts: [ + `${source === "zillow" ? "Zillow" : "HAR"} discovery timed out after ${timeoutForSourceMs}ms.` + ] + }; } - ); - zillowUrl = result.listingUrl; - attempts.push(...result.attempts); - } catch (error) { - if (error instanceof TimeoutError) { - attempts.push(`Zillow discovery timed out after ${timeoutMs}ms.`); - } else { - attempts.push( - `Zillow discovery failed: ${error instanceof Error ? error.message : String(error)}` - ); - } - } - try { - const result = await withTimeout( - () => discoverHarListingFn(address), - { - operationName: "HAR discovery", - timeoutMs - } - ); - harUrl = result.listingUrl; - attempts.push(...result.attempts); - } catch (error) { - if (error instanceof TimeoutError) { - attempts.push(`HAR discovery timed out after ${timeoutMs}ms.`); - } else { - attempts.push( - `HAR discovery failed: ${error instanceof Error ? error.message : String(error)}` - ); + return { + source, + url: null, + attempts: [ + `${source === "zillow" ? "Zillow" : "HAR"} discovery failed: ${error instanceof Error ? error.message : String(error)}` + ] + }; } - } + }; + + const zillowPromise = runSource("zillow", zillowTimeoutMs, () => + discoverZillowListingFn(address, { timeoutMs: zillowTimeoutMs }) + ); + const harPromise = runSource("har", harTimeoutMs, () => + discoverHarListingFn(address, { timeoutMs: harTimeoutMs }) + ); + + const [zillowResult, harResult] = await Promise.all([zillowPromise, harPromise]); + const attempts = [...zillowResult.attempts, ...harResult.attempts]; return { attempts, - zillowUrl, - harUrl + zillowUrl: zillowResult.url, + harUrl: harResult.url }; } diff --git a/skills/property-assessor/src/photo-review.ts b/skills/property-assessor/src/photo-review.ts index 38868b5..0503fef 100644 --- a/skills/property-assessor/src/photo-review.ts +++ b/skills/property-assessor/src/photo-review.ts @@ -22,6 +22,8 @@ export interface PhotoReviewResolution { interface PhotoReviewDeps { timeoutMs?: number; + zillowTimeoutMs?: number; + harTimeoutMs?: number; extractZillowPhotosFn?: typeof extractZillowPhotos; extractHarPhotosFn?: typeof extractHarPhotos; } @@ -29,6 +31,12 @@ interface PhotoReviewDeps { const DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS = Number( process.env.PROPERTY_ASSESSOR_PHOTO_TIMEOUT_MS || 25_000 ); +const DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS = Number( + process.env.PROPERTY_ASSESSOR_ZILLOW_PHOTO_TIMEOUT_MS || 60_000 +); +const DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS = Number( + process.env.PROPERTY_ASSESSOR_HAR_PHOTO_TIMEOUT_MS || DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS +); export async function extractPhotoData( source: PhotoSource, @@ -36,15 +44,21 @@ export async function extractPhotoData( deps: PhotoReviewDeps = {} ): Promise { const timeoutMs = deps.timeoutMs ?? DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS; + const zillowTimeoutMs = + deps.zillowTimeoutMs ?? + (deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS); + const harTimeoutMs = + deps.harTimeoutMs ?? + (deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS); const extractZillowPhotosFn = deps.extractZillowPhotosFn || extractZillowPhotos; const extractHarPhotosFn = deps.extractHarPhotosFn || extractHarPhotos; if (source === "zillow") { const payload = await withTimeout( - () => extractZillowPhotosFn(url), + () => extractZillowPhotosFn(url, { timeoutMs: zillowTimeoutMs }), { operationName: "Zillow photo extraction", - timeoutMs + timeoutMs: zillowTimeoutMs } ); return { @@ -60,10 +74,10 @@ export async function extractPhotoData( } const payload = await withTimeout( - () => extractHarPhotosFn(url), + () => extractHarPhotosFn(url, { timeoutMs: harTimeoutMs }), { operationName: "HAR photo extraction", - timeoutMs + timeoutMs: harTimeoutMs } ); return { diff --git a/skills/property-assessor/tests/timeout-guards.test.ts b/skills/property-assessor/tests/timeout-guards.test.ts index a84a7aa..27d4484 100644 --- a/skills/property-assessor/tests/timeout-guards.test.ts +++ b/skills/property-assessor/tests/timeout-guards.test.ts @@ -20,6 +20,71 @@ test("discoverListingSources times out stalled Zillow and HAR discovery calls", assert.match(result.attempts.join(" "), /har discovery timed out/i); }); +test("discoverListingSources starts Zillow and HAR discovery in parallel", async () => { + let zillowStarted = false; + let harStarted = false; + + const discoveryPromise = discoverListingSources("1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412", { + timeoutMs: 100, + discoverZillowListingFn: async () => { + zillowStarted = true; + await new Promise((resolve) => setTimeout(resolve, 50)); + return { + source: "zillow", + address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412", + searchUrl: "https://www.zillow.com/example-search", + finalUrl: "https://www.zillow.com/example-search", + title: "Example Zillow Search", + listingUrl: null, + attempts: ["Zillow did not find a confident match."] + }; + }, + discoverHarListingFn: async () => { + harStarted = true; + return { + source: "har", + address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412", + searchUrl: "https://www.har.com/example-search", + finalUrl: "https://www.har.com/example-search", + title: "Example HAR Search", + listingUrl: "https://www.har.com/homedetail/example/123", + attempts: ["HAR found a matching listing quickly."] + }; + } + }); + + await new Promise((resolve) => setTimeout(resolve, 10)); + + assert.equal(zillowStarted, true); + assert.equal(harStarted, true); + + const result = await discoveryPromise; + assert.equal(result.harUrl, "https://www.har.com/homedetail/example/123"); +}); + +test("extractPhotoData honors a longer Zillow timeout override", async () => { + const result = await extractPhotoData("zillow", "https://www.zillow.com/example", { + timeoutMs: 20, + zillowTimeoutMs: 80, + extractZillowPhotosFn: async () => { + await new Promise((resolve) => setTimeout(resolve, 40)); + return { + source: "zillow", + requestedUrl: "https://www.zillow.com/example", + finalUrl: "https://www.zillow.com/example", + expectedPhotoCount: 1, + complete: true, + photoCount: 1, + imageUrls: ["https://photos.example/1.jpg"], + notes: ["Zillow extractor succeeded after a slow page load."] + }; + } + }); + + assert.equal(result.source, "zillow"); + assert.equal(result.photoCount, 1); +}); + test("extractPhotoData times out a stalled photo extraction instead of hanging forever", async () => { await assert.rejects( async () => diff --git a/skills/web-automation/SKILL.md b/skills/web-automation/SKILL.md index 9a3f12e..8cc18a3 100644 --- a/skills/web-automation/SKILL.md +++ b/skills/web-automation/SKILL.md @@ -153,6 +153,7 @@ The discovery scripts are purpose-built for the common address-to-listing workfl - reject a mismatched unit when the requested address includes one - still work normally for single-family / no-unit addresses - return the direct listing URL as JSON +- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow pages The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow: - open the listing page @@ -160,6 +161,7 @@ The photo scripts are purpose-built for the common `See all photos` / `Show all - wait for the resulting photo page or scroller view - extract direct image URLs from the rendered page - fail fast with a timeout instead of hanging indefinitely when the browser-backed extraction stalls +- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow renders Output is JSON with: - `requestedUrl` diff --git a/skills/web-automation/scripts/har-discover.js b/skills/web-automation/scripts/har-discover.js index f5014cf..cf17a36 100644 --- a/skills/web-automation/scripts/har-discover.js +++ b/skills/web-automation/scripts/har-discover.js @@ -60,7 +60,7 @@ async function collectListingUrl(page) { }); } -export async function discoverHarListing(rawAddress) { +export async function discoverHarListing(rawAddress, options = {}) { const address = String(rawAddress || "").trim(); const identity = parseAddressIdentity(address); const searchUrl = buildSearchUrl(address); @@ -121,6 +121,7 @@ export async function discoverHarListing(rawAddress) { }; }, { + timeoutMs: Number(options.timeoutMs || 0) || undefined, onTimeout: closeContext } ); diff --git a/skills/web-automation/scripts/har-photos.js b/skills/web-automation/scripts/har-photos.js index 075e021..864e7e3 100644 --- a/skills/web-automation/scripts/har-photos.js +++ b/skills/web-automation/scripts/har-photos.js @@ -32,7 +32,7 @@ async function getAnnouncedPhotoCount(page) { }); } -export async function extractHarPhotos(rawUrl) { +export async function extractHarPhotos(rawUrl, options = {}) { const requestedUrl = parseTarget(rawUrl); const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); const closeContext = async () => { @@ -78,6 +78,7 @@ export async function extractHarPhotos(rawUrl) { }; }, { + timeoutMs: Number(options.timeoutMs || 0) || undefined, onTimeout: closeContext } ); diff --git a/skills/web-automation/scripts/zillow-discover.js b/skills/web-automation/scripts/zillow-discover.js index eb208d9..31d8733 100644 --- a/skills/web-automation/scripts/zillow-discover.js +++ b/skills/web-automation/scripts/zillow-discover.js @@ -64,7 +64,7 @@ async function collectListingUrl(page) { }); } -export async function discoverZillowListing(rawAddress) { +export async function discoverZillowListing(rawAddress, options = {}) { const address = String(rawAddress || "").trim(); const identity = parseAddressIdentity(address); const searchUrl = `https://www.zillow.com/homes/${encodeURIComponent(buildZillowAddressSlug(address))}_rb/`; @@ -125,6 +125,7 @@ export async function discoverZillowListing(rawAddress) { }; }, { + timeoutMs: Number(options.timeoutMs || 0) || undefined, onTimeout: closeContext } ); diff --git a/skills/web-automation/scripts/zillow-photos.js b/skills/web-automation/scripts/zillow-photos.js index 350987e..f3de385 100644 --- a/skills/web-automation/scripts/zillow-photos.js +++ b/skills/web-automation/scripts/zillow-photos.js @@ -105,7 +105,7 @@ async function collectZillowStructuredPhotoCandidates(page) { return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || ""); } -export async function extractZillowPhotos(rawUrl) { +export async function extractZillowPhotos(rawUrl, options = {}) { const requestedUrl = parseTarget(rawUrl); const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); const closeContext = async () => { @@ -177,6 +177,7 @@ export async function extractZillowPhotos(rawUrl) { }; }, { + timeoutMs: Number(options.timeoutMs || 0) || undefined, onTimeout: closeContext } );