From 54854edfc6cbc0f95e968c2aa74e12e4b6402eed Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sat, 28 Mar 2026 03:17:51 -0500 Subject: [PATCH] Harden assessor fallback after Zillow photo failure --- docs/property-assessor.md | 3 + skills/property-assessor/SKILL.md | 3 + .../scripts/real-estate-photo-common.js | 5 + .../scripts/real-estate-photo-common.test.mjs | 18 +- .../web-automation/scripts/zillow-photos.js | 160 ++++++++++-------- 5 files changed, 115 insertions(+), 74 deletions(-) diff --git a/docs/property-assessor.md b/docs/property-assessor.md index f32c8c4..15a10ad 100644 --- a/docs/property-assessor.md +++ b/docs/property-assessor.md @@ -76,6 +76,7 @@ Operational rule: - In WhatsApp or similar messaging runs, keep the core analysis on `web-automation` plus `web_fetch`. Treat `web_search` as a narrow fallback for alternate-URL discovery only, not the default path for Zillow/HAR/CAD work. - Do not start Zillow/HAR property discovery or photo review from Brave-backed `web_search` when `web-automation` can open the candidate listing source directly. - For CAD/public-record lookup, prefer official assessor/CAD pages via `web_fetch` first and `web-automation` second if the site needs rendered interaction. +- In Texas runs, do not use `https://www.texas.gov/propertytaxes/search/` as the CAD lookup path; use the address-first CAD/helper path or the discovered county CAD pages directly. - In those messaging runs, reserve subprocess use for a single final `render-report` attempt after the verdict and fair-value range are complete. - In those messaging runs, do not start Gmail/email-send skill discovery or delivery tooling until the report content is complete and the PDF is ready to render or already rendered. - Property-assessor delivery emails should be sent as Luke from Luke's Google Workspace account, while still delivering to the user-specified destination. @@ -83,6 +84,8 @@ Operational rule: - Do not route property-assessor delivery through generic `gog` or the Stefano helper `node ~/.openclaw/workspace/integrations/google-workspace/gw.js`. - If the agent needs to confirm Luke auth before sending, use `zsh ~/.openclaw/workspace/bin/gog-luke auth list --check --plain`. - Treat a silent helper as a failed helper in messaging runs. If a helper produces no useful output within a short bound, abandon it and continue with the chat-native path instead of repeatedly polling it. +- If Zillow photo extraction fails, immediately continue with HAR photo fallback or the next available rendered listing/photo source rather than stopping the assessment. +- After a Zillow/HAR photo miss, continue the comp and CAD/public-record work in the same run. A photo-source miss is a fallback event, not a terminal state. - If the original request already authorized sending the finished PDF to a stated email address, do not pause for a redundant send-confirmation prompt after rendering. - If final PDF render/send fails, return the completed decision-grade report in chat and report delivery failure separately rather than restarting the whole assessment. diff --git a/skills/property-assessor/SKILL.md b/skills/property-assessor/SKILL.md index 8d73629..962998c 100644 --- a/skills/property-assessor/SKILL.md +++ b/skills/property-assessor/SKILL.md @@ -60,6 +60,7 @@ Rules: - In WhatsApp or similar messaging runs, keep the core assessment on `web-automation` plus `web_fetch`. Treat `web_search` as a fallback discovery aid, not the primary property-analysis path. - For Zillow/HAR property discovery, photo extraction, and rendered listing review, do **not** start with Brave-backed `web_search` when `web-automation` can open the candidate source directly. - For CAD/public-record enrichment, prefer official assessor/CAD pages via `web_fetch` first. Use `web-automation` when the official site needs rendered interaction. Do **not** start CAD lookup from generic web-search snippets when the official site is already known or derivable from the address. +- In Texas runs, do **not** use `https://www.texas.gov/propertytaxes/search/` as the CAD lookup path. Use the address-first CAD/helper path or the discovered county CAD pages directly. - In those messaging runs, do **not** make `scripts/property-assessor assess`, `scripts/property-assessor locate-public-records`, `node zillow-discover.js`, `node har-discover.js`, `node zillow-photos.js`, `node har-photos.js`, `curl`, or `wget` the default core-analysis path. - From messaging runs, the only subprocess-style step you should attempt by default is the final `scripts/property-assessor render-report` call after the verdict, fair-value range, and report body are complete. - Do **not** inspect Gmail/email-send skills, mail tooling, or delivery integrations until the assessment is complete and the PDF is either already rendered or ready to render immediately. @@ -68,6 +69,8 @@ Rules: - Do **not** use generic `gog` or the Stefano helper `node ~/.openclaw/workspace/integrations/google-workspace/gw.js` for property-assessor report delivery. - If you need to confirm Luke auth before sending, use `zsh ~/.openclaw/workspace/bin/gog-luke auth list --check --plain`. - A silent helper is a failed helper in messaging runs. If a background helper produces no useful stdout/stderr and no result within a short bound, stop polling it, treat that path as failed, and continue on the chat-native assessment path instead of narrating that it is still chewing. +- If Zillow photo extraction fails, immediately continue with HAR photo fallback or the next available rendered listing/photo source. Do **not** stop the assessment because one Zillow photo subprocess failed. +- After a Zillow/HAR photo miss, continue the comp and CAD/public-record work in the same run. A photo-source miss is a fallback event, not a terminal state. - Do **not** leave the user parked behind background helper polling. If a helper has not produced a result quickly, give a concise status update and continue the assessment with the next available non-helper path. - If the user already instructed you to email/send the finished PDF to a specific target, do **not** ask for a second send confirmation after rendering. Render, send, and report the result. - If the final PDF render fails, return the complete decision-grade report in chat and say the render/send step failed. Do not restart the whole assessment. diff --git a/skills/web-automation/scripts/real-estate-photo-common.js b/skills/web-automation/scripts/real-estate-photo-common.js index b61916e..b34f212 100644 --- a/skills/web-automation/scripts/real-estate-photo-common.js +++ b/skills/web-automation/scripts/real-estate-photo-common.js @@ -39,6 +39,11 @@ export function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } +export function isPageClosedError(error) { + const message = error instanceof Error ? error.message : String(error || ""); + return /Target page, context or browser has been closed|Execution context was destroyed/i.test(message); +} + export async function runWithOperationTimeout( operationName, operation, diff --git a/skills/web-automation/scripts/real-estate-photo-common.test.mjs b/skills/web-automation/scripts/real-estate-photo-common.test.mjs index 033afc3..69ee135 100644 --- a/skills/web-automation/scripts/real-estate-photo-common.test.mjs +++ b/skills/web-automation/scripts/real-estate-photo-common.test.mjs @@ -1,7 +1,11 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { normalizeImageCandidates, runWithOperationTimeout } from "./real-estate-photo-common.js"; +import { + isPageClosedError, + normalizeImageCandidates, + runWithOperationTimeout, +} from "./real-estate-photo-common.js"; test("normalizeImageCandidates keeps distinct Zillow photo URLs and strips query strings", () => { const result = normalizeImageCandidates( @@ -85,3 +89,15 @@ test("runWithOperationTimeout rejects stalled work and runs timeout cleanup", as assert.equal(cleanedUp, true); }); + +test("isPageClosedError detects transient browser-session closure errors", () => { + assert.equal( + isPageClosedError(new Error("page.evaluate: Target page, context or browser has been closed")), + true + ); + assert.equal( + isPageClosedError(new Error("Execution context was destroyed, most likely because of a navigation")), + true + ); + assert.equal(isPageClosedError(new Error("No Zillow image URLs were found")), false); +}); diff --git a/skills/web-automation/scripts/zillow-photos.js b/skills/web-automation/scripts/zillow-photos.js index f3de385..a0d92e1 100644 --- a/skills/web-automation/scripts/zillow-photos.js +++ b/skills/web-automation/scripts/zillow-photos.js @@ -8,6 +8,7 @@ import { dismissCommonOverlays, fail, gotoListing, + isPageClosedError, normalizeImageCandidates, parseTarget, runWithOperationTimeout, @@ -107,85 +108,98 @@ async function collectZillowStructuredPhotoCandidates(page) { export async function extractZillowPhotos(rawUrl, options = {}) { const requestedUrl = parseTarget(rawUrl); - const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); - const closeContext = async () => { - await context.close().catch(() => {}); - }; + const maxAttempts = 2; + let lastError = null; - try { - return await runWithOperationTimeout( - "Zillow photo extraction", - async () => { - await gotoListing(page, requestedUrl); - await dismissCommonOverlays(page); + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); + const closeContext = async () => { + await context.close().catch(() => {}); + }; - const expectedPhotoCount = await getAnnouncedPhotoCount(page); - const beforeUrl = page.url(); - let clickedLabel = null; - let clickError = null; + try { + return await runWithOperationTimeout( + "Zillow photo extraction", + async () => { + await gotoListing(page, requestedUrl); + await dismissCommonOverlays(page); - try { - clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS); - await waitForPhotoExperience(page, beforeUrl); - await scrollUntilSettled(page); - await sleep(1200); - } catch (error) { - clickError = error instanceof Error ? error.message : String(error); + const expectedPhotoCount = await getAnnouncedPhotoCount(page); + const beforeUrl = page.url(); + let clickedLabel = null; + let clickError = null; + + try { + clickedLabel = await clickPhotoEntryPoint(page, ZILLOW_LABELS); + await waitForPhotoExperience(page, beforeUrl); + await scrollUntilSettled(page); + await sleep(1200); + } catch (error) { + clickError = error instanceof Error ? error.message : String(error); + } + + const [structuredCandidates, renderedCandidates] = await Promise.all([ + collectZillowStructuredPhotoCandidates(page), + collectZillowPhotoCandidates(page), + ]); + const candidates = [...structuredCandidates, ...renderedCandidates]; + const normalized = normalizeImageCandidates(candidates, { + hostIncludes: ["photos.zillowstatic.com"], + minWidth: 240, + minHeight: 180, + }); + const photos = collapseZillowPhotos(normalized); + + if (!photos.length) { + fail( + "Zillow photo extraction failed.", + clickError || "No Zillow image URLs were found on the rendered listing page." + ); + } + + const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true; + const notes = []; + if (clickedLabel) { + notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs."); + } else { + notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path."); + } + if (clickError) { + notes.push(`All-photos click path was not required: ${clickError}`); + } + if (attempt > 1) { + notes.push(`Recovered after retrying Zillow photo extraction once because the first browser session closed unexpectedly.`); + } + + return { + source: "zillow", + requestedUrl, + finalUrl: page.url(), + title: await page.title(), + clickedLabel, + expectedPhotoCount, + complete, + photoCount: photos.length, + imageUrls: photos.map((photo) => photo.url), + notes, + }; + }, + { + timeoutMs: Number(options.timeoutMs || 0) || undefined, + onTimeout: closeContext } - - const [structuredCandidates, renderedCandidates] = await Promise.all([ - collectZillowStructuredPhotoCandidates(page), - collectZillowPhotoCandidates(page), - ]); - const candidates = [...structuredCandidates, ...renderedCandidates]; - const normalized = normalizeImageCandidates(candidates, { - hostIncludes: ["photos.zillowstatic.com"], - minWidth: 240, - minHeight: 180, - }); - const photos = collapseZillowPhotos(normalized); - - if (!photos.length) { - fail( - "Zillow photo extraction failed.", - clickError || "No Zillow image URLs were found on the rendered listing page." - ); - } - - const complete = expectedPhotoCount ? photos.length >= expectedPhotoCount : true; - const notes = []; - if (clickedLabel) { - notes.push("Opened Zillow all-photos flow and extracted direct Zillow image URLs."); - } else { - notes.push("The rendered Zillow listing shell already exposed the Zillow photo stream, so extraction completed without relying on the all-photos click path."); - } - if (clickError) { - notes.push(`All-photos click path was not required: ${clickError}`); - } - - return { - source: "zillow", - requestedUrl, - finalUrl: page.url(), - title: await page.title(), - clickedLabel, - expectedPhotoCount, - complete, - photoCount: photos.length, - imageUrls: photos.map((photo) => photo.url), - notes, - }; - }, - { - timeoutMs: Number(options.timeoutMs || 0) || undefined, - onTimeout: closeContext + ); + } catch (error) { + lastError = error; + if (!(attempt < maxAttempts && isPageClosedError(error))) { + throw new Error(error instanceof Error ? error.message : String(error)); } - ); - } catch (error) { - throw new Error(error instanceof Error ? error.message : String(error)); - } finally { - await closeContext(); + } finally { + await closeContext(); + } } + + throw new Error(lastError instanceof Error ? lastError.message : String(lastError)); } async function main() {