Fix slower Zillow unit photo discovery path
This commit is contained in:
@@ -95,7 +95,9 @@ Current behavior:
|
||||
- keeps CAD-site selection address-driven and jurisdiction-specific; it does not hardcode one county's CAD as the global source
|
||||
- when a supported official CAD detail host is found, captures direct property facts such as property ID/account, owner, legal description, assessed value, exemptions, and the official property-detail URL
|
||||
- automatically tries to discover Zillow and HAR listing URLs from the address when no listing URL is provided
|
||||
- starts Zillow and HAR listing discovery in parallel so HAR can already be in flight if Zillow misses or stalls
|
||||
- runs Zillow photo extraction first, then HAR as fallback when available
|
||||
- gives Zillow a longer source-specific discovery/photo window than the generic fallback path, because some exact-unit Zillow pages resolve more slowly than HAR or public-record lookups
|
||||
- reuses the OpenClaw web-automation logic in-process instead of spawning nested helper commands
|
||||
- fails fast when Zillow/HAR discovery or photo extraction stalls instead of hanging indefinitely
|
||||
- returns a structured preliminary report payload
|
||||
|
||||
@@ -155,6 +155,9 @@ What it does:
|
||||
- returns the discovered listing URL as JSON
|
||||
- fails fast with a timeout if the browser-backed discovery stalls
|
||||
|
||||
Operational note:
|
||||
- when imported by `property-assessor`, Zillow discovery is allowed a longer source-specific timeout than the generic helper default, because some exact-unit Zillow pages resolve more slowly than the basic search/listing flow
|
||||
|
||||
### HAR discovery
|
||||
|
||||
```bash
|
||||
@@ -183,6 +186,9 @@ What it does:
|
||||
- returns direct `photos.zillowstatic.com` image URLs as JSON
|
||||
- fails fast with a timeout if the browser-backed extraction stalls
|
||||
|
||||
Operational note:
|
||||
- when imported by `property-assessor`, Zillow photo extraction is allowed a longer source-specific timeout than the generic helper default, because some exact-unit Zillow listings expose the correct photo set only after a slower render path
|
||||
|
||||
Expected success shape:
|
||||
- `complete: true`
|
||||
- `expectedPhotoCount` matches `photoCount`
|
||||
|
||||
@@ -115,7 +115,9 @@ scripts/property-assessor render-report --input "<report-payload-json>" --output
|
||||
- resolve official public-record jurisdiction automatically from the address
|
||||
- keep CAD discovery jurisdiction-specific from the address; do not hardcode one county CAD for every property
|
||||
- try to discover Zillow and HAR listing URLs from the address when no listing URL is provided
|
||||
- start Zillow and HAR discovery in parallel, while still preferring Zillow first for the photo-review path
|
||||
- run the approval-safe Zillow/HAR photo extractor chain automatically
|
||||
- allow slower exact-unit Zillow pages a longer source-specific discovery/photo window before giving up and falling back
|
||||
- build a purpose-aware report payload
|
||||
- complete the analysis without requiring recipient email(s)
|
||||
- only stop and ask for recipient email(s) when the user is explicitly rendering or sending the PDF
|
||||
|
||||
@@ -10,6 +10,8 @@ export interface ListingDiscoveryResult {
|
||||
|
||||
interface ListingDiscoveryDeps {
|
||||
timeoutMs?: number;
|
||||
zillowTimeoutMs?: number;
|
||||
harTimeoutMs?: number;
|
||||
discoverZillowListingFn?: typeof discoverZillowListing;
|
||||
discoverHarListingFn?: typeof discoverHarListing;
|
||||
}
|
||||
@@ -17,61 +19,82 @@ interface ListingDiscoveryDeps {
|
||||
const DEFAULT_DISCOVERY_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_DISCOVERY_TIMEOUT_MS || 20_000
|
||||
);
|
||||
const DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_ZILLOW_DISCOVERY_TIMEOUT_MS || 60_000
|
||||
);
|
||||
const DEFAULT_HAR_DISCOVERY_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_HAR_DISCOVERY_TIMEOUT_MS || DEFAULT_DISCOVERY_TIMEOUT_MS
|
||||
);
|
||||
|
||||
interface SourceDiscoveryOutcome {
|
||||
source: "zillow" | "har";
|
||||
url: string | null;
|
||||
attempts: string[];
|
||||
}
|
||||
|
||||
export async function discoverListingSources(
|
||||
address: string,
|
||||
deps: ListingDiscoveryDeps = {}
|
||||
): Promise<ListingDiscoveryResult> {
|
||||
const attempts: string[] = [];
|
||||
let zillowUrl: string | null = null;
|
||||
let harUrl: string | null = null;
|
||||
const timeoutMs = deps.timeoutMs ?? DEFAULT_DISCOVERY_TIMEOUT_MS;
|
||||
const zillowTimeoutMs =
|
||||
deps.zillowTimeoutMs ??
|
||||
(deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS);
|
||||
const harTimeoutMs =
|
||||
deps.harTimeoutMs ??
|
||||
(deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_DISCOVERY_TIMEOUT_MS);
|
||||
const discoverZillowListingFn = deps.discoverZillowListingFn || discoverZillowListing;
|
||||
const discoverHarListingFn = deps.discoverHarListingFn || discoverHarListing;
|
||||
|
||||
try {
|
||||
const result = await withTimeout(
|
||||
() => discoverZillowListingFn(address),
|
||||
{
|
||||
operationName: "Zillow discovery",
|
||||
timeoutMs
|
||||
const runSource = async (
|
||||
source: "zillow" | "har",
|
||||
timeoutForSourceMs: number,
|
||||
operation: () => Promise<{ listingUrl: string | null; attempts: string[] }>
|
||||
): Promise<SourceDiscoveryOutcome> => {
|
||||
try {
|
||||
const result = await withTimeout(operation, {
|
||||
operationName: `${source === "zillow" ? "Zillow" : "HAR"} discovery`,
|
||||
timeoutMs: timeoutForSourceMs
|
||||
});
|
||||
return {
|
||||
source,
|
||||
url: result.listingUrl,
|
||||
attempts: result.attempts
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof TimeoutError) {
|
||||
return {
|
||||
source,
|
||||
url: null,
|
||||
attempts: [
|
||||
`${source === "zillow" ? "Zillow" : "HAR"} discovery timed out after ${timeoutForSourceMs}ms.`
|
||||
]
|
||||
};
|
||||
}
|
||||
);
|
||||
zillowUrl = result.listingUrl;
|
||||
attempts.push(...result.attempts);
|
||||
} catch (error) {
|
||||
if (error instanceof TimeoutError) {
|
||||
attempts.push(`Zillow discovery timed out after ${timeoutMs}ms.`);
|
||||
} else {
|
||||
attempts.push(
|
||||
`Zillow discovery failed: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await withTimeout(
|
||||
() => discoverHarListingFn(address),
|
||||
{
|
||||
operationName: "HAR discovery",
|
||||
timeoutMs
|
||||
}
|
||||
);
|
||||
harUrl = result.listingUrl;
|
||||
attempts.push(...result.attempts);
|
||||
} catch (error) {
|
||||
if (error instanceof TimeoutError) {
|
||||
attempts.push(`HAR discovery timed out after ${timeoutMs}ms.`);
|
||||
} else {
|
||||
attempts.push(
|
||||
`HAR discovery failed: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
return {
|
||||
source,
|
||||
url: null,
|
||||
attempts: [
|
||||
`${source === "zillow" ? "Zillow" : "HAR"} discovery failed: ${error instanceof Error ? error.message : String(error)}`
|
||||
]
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const zillowPromise = runSource("zillow", zillowTimeoutMs, () =>
|
||||
discoverZillowListingFn(address, { timeoutMs: zillowTimeoutMs })
|
||||
);
|
||||
const harPromise = runSource("har", harTimeoutMs, () =>
|
||||
discoverHarListingFn(address, { timeoutMs: harTimeoutMs })
|
||||
);
|
||||
|
||||
const [zillowResult, harResult] = await Promise.all([zillowPromise, harPromise]);
|
||||
const attempts = [...zillowResult.attempts, ...harResult.attempts];
|
||||
|
||||
return {
|
||||
attempts,
|
||||
zillowUrl,
|
||||
harUrl
|
||||
zillowUrl: zillowResult.url,
|
||||
harUrl: harResult.url
|
||||
};
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ export interface PhotoReviewResolution {
|
||||
|
||||
interface PhotoReviewDeps {
|
||||
timeoutMs?: number;
|
||||
zillowTimeoutMs?: number;
|
||||
harTimeoutMs?: number;
|
||||
extractZillowPhotosFn?: typeof extractZillowPhotos;
|
||||
extractHarPhotosFn?: typeof extractHarPhotos;
|
||||
}
|
||||
@@ -29,6 +31,12 @@ interface PhotoReviewDeps {
|
||||
const DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_PHOTO_TIMEOUT_MS || 25_000
|
||||
);
|
||||
const DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_ZILLOW_PHOTO_TIMEOUT_MS || 60_000
|
||||
);
|
||||
const DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
|
||||
process.env.PROPERTY_ASSESSOR_HAR_PHOTO_TIMEOUT_MS || DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS
|
||||
);
|
||||
|
||||
export async function extractPhotoData(
|
||||
source: PhotoSource,
|
||||
@@ -36,15 +44,21 @@ export async function extractPhotoData(
|
||||
deps: PhotoReviewDeps = {}
|
||||
): Promise<PhotoExtractionResult> {
|
||||
const timeoutMs = deps.timeoutMs ?? DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS;
|
||||
const zillowTimeoutMs =
|
||||
deps.zillowTimeoutMs ??
|
||||
(deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS);
|
||||
const harTimeoutMs =
|
||||
deps.harTimeoutMs ??
|
||||
(deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS);
|
||||
const extractZillowPhotosFn = deps.extractZillowPhotosFn || extractZillowPhotos;
|
||||
const extractHarPhotosFn = deps.extractHarPhotosFn || extractHarPhotos;
|
||||
|
||||
if (source === "zillow") {
|
||||
const payload = await withTimeout(
|
||||
() => extractZillowPhotosFn(url),
|
||||
() => extractZillowPhotosFn(url, { timeoutMs: zillowTimeoutMs }),
|
||||
{
|
||||
operationName: "Zillow photo extraction",
|
||||
timeoutMs
|
||||
timeoutMs: zillowTimeoutMs
|
||||
}
|
||||
);
|
||||
return {
|
||||
@@ -60,10 +74,10 @@ export async function extractPhotoData(
|
||||
}
|
||||
|
||||
const payload = await withTimeout(
|
||||
() => extractHarPhotosFn(url),
|
||||
() => extractHarPhotosFn(url, { timeoutMs: harTimeoutMs }),
|
||||
{
|
||||
operationName: "HAR photo extraction",
|
||||
timeoutMs
|
||||
timeoutMs: harTimeoutMs
|
||||
}
|
||||
);
|
||||
return {
|
||||
|
||||
@@ -20,6 +20,71 @@ test("discoverListingSources times out stalled Zillow and HAR discovery calls",
|
||||
assert.match(result.attempts.join(" "), /har discovery timed out/i);
|
||||
});
|
||||
|
||||
test("discoverListingSources starts Zillow and HAR discovery in parallel", async () => {
|
||||
let zillowStarted = false;
|
||||
let harStarted = false;
|
||||
|
||||
const discoveryPromise = discoverListingSources("1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412", {
|
||||
timeoutMs: 100,
|
||||
discoverZillowListingFn: async () => {
|
||||
zillowStarted = true;
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
return {
|
||||
source: "zillow",
|
||||
address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412",
|
||||
searchUrl: "https://www.zillow.com/example-search",
|
||||
finalUrl: "https://www.zillow.com/example-search",
|
||||
title: "Example Zillow Search",
|
||||
listingUrl: null,
|
||||
attempts: ["Zillow did not find a confident match."]
|
||||
};
|
||||
},
|
||||
discoverHarListingFn: async () => {
|
||||
harStarted = true;
|
||||
return {
|
||||
source: "har",
|
||||
address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412",
|
||||
searchUrl: "https://www.har.com/example-search",
|
||||
finalUrl: "https://www.har.com/example-search",
|
||||
title: "Example HAR Search",
|
||||
listingUrl: "https://www.har.com/homedetail/example/123",
|
||||
attempts: ["HAR found a matching listing quickly."]
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
|
||||
assert.equal(zillowStarted, true);
|
||||
assert.equal(harStarted, true);
|
||||
|
||||
const result = await discoveryPromise;
|
||||
assert.equal(result.harUrl, "https://www.har.com/homedetail/example/123");
|
||||
});
|
||||
|
||||
test("extractPhotoData honors a longer Zillow timeout override", async () => {
|
||||
const result = await extractPhotoData("zillow", "https://www.zillow.com/example", {
|
||||
timeoutMs: 20,
|
||||
zillowTimeoutMs: 80,
|
||||
extractZillowPhotosFn: async () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 40));
|
||||
return {
|
||||
source: "zillow",
|
||||
requestedUrl: "https://www.zillow.com/example",
|
||||
finalUrl: "https://www.zillow.com/example",
|
||||
expectedPhotoCount: 1,
|
||||
complete: true,
|
||||
photoCount: 1,
|
||||
imageUrls: ["https://photos.example/1.jpg"],
|
||||
notes: ["Zillow extractor succeeded after a slow page load."]
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
assert.equal(result.source, "zillow");
|
||||
assert.equal(result.photoCount, 1);
|
||||
});
|
||||
|
||||
test("extractPhotoData times out a stalled photo extraction instead of hanging forever", async () => {
|
||||
await assert.rejects(
|
||||
async () =>
|
||||
|
||||
@@ -153,6 +153,7 @@ The discovery scripts are purpose-built for the common address-to-listing workfl
|
||||
- reject a mismatched unit when the requested address includes one
|
||||
- still work normally for single-family / no-unit addresses
|
||||
- return the direct listing URL as JSON
|
||||
- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow pages
|
||||
|
||||
The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow:
|
||||
- open the listing page
|
||||
@@ -160,6 +161,7 @@ The photo scripts are purpose-built for the common `See all photos` / `Show all
|
||||
- wait for the resulting photo page or scroller view
|
||||
- extract direct image URLs from the rendered page
|
||||
- fail fast with a timeout instead of hanging indefinitely when the browser-backed extraction stalls
|
||||
- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow renders
|
||||
|
||||
Output is JSON with:
|
||||
- `requestedUrl`
|
||||
|
||||
@@ -60,7 +60,7 @@ async function collectListingUrl(page) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function discoverHarListing(rawAddress) {
|
||||
export async function discoverHarListing(rawAddress, options = {}) {
|
||||
const address = String(rawAddress || "").trim();
|
||||
const identity = parseAddressIdentity(address);
|
||||
const searchUrl = buildSearchUrl(address);
|
||||
@@ -121,6 +121,7 @@ export async function discoverHarListing(rawAddress) {
|
||||
};
|
||||
},
|
||||
{
|
||||
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||
onTimeout: closeContext
|
||||
}
|
||||
);
|
||||
|
||||
@@ -32,7 +32,7 @@ async function getAnnouncedPhotoCount(page) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function extractHarPhotos(rawUrl) {
|
||||
export async function extractHarPhotos(rawUrl, options = {}) {
|
||||
const requestedUrl = parseTarget(rawUrl);
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
const closeContext = async () => {
|
||||
@@ -78,6 +78,7 @@ export async function extractHarPhotos(rawUrl) {
|
||||
};
|
||||
},
|
||||
{
|
||||
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||
onTimeout: closeContext
|
||||
}
|
||||
);
|
||||
|
||||
@@ -64,7 +64,7 @@ async function collectListingUrl(page) {
|
||||
});
|
||||
}
|
||||
|
||||
export async function discoverZillowListing(rawAddress) {
|
||||
export async function discoverZillowListing(rawAddress, options = {}) {
|
||||
const address = String(rawAddress || "").trim();
|
||||
const identity = parseAddressIdentity(address);
|
||||
const searchUrl = `https://www.zillow.com/homes/${encodeURIComponent(buildZillowAddressSlug(address))}_rb/`;
|
||||
@@ -125,6 +125,7 @@ export async function discoverZillowListing(rawAddress) {
|
||||
};
|
||||
},
|
||||
{
|
||||
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||
onTimeout: closeContext
|
||||
}
|
||||
);
|
||||
|
||||
@@ -105,7 +105,7 @@ async function collectZillowStructuredPhotoCandidates(page) {
|
||||
return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || "");
|
||||
}
|
||||
|
||||
export async function extractZillowPhotos(rawUrl) {
|
||||
export async function extractZillowPhotos(rawUrl, options = {}) {
|
||||
const requestedUrl = parseTarget(rawUrl);
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
const closeContext = async () => {
|
||||
@@ -177,6 +177,7 @@ export async function extractZillowPhotos(rawUrl) {
|
||||
};
|
||||
},
|
||||
{
|
||||
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||
onTimeout: closeContext
|
||||
}
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user