Fix slower Zillow unit photo discovery path

This commit is contained in:
2026-03-28 02:28:30 -05:00
parent 7690dc259b
commit 8fe451e8d0
11 changed files with 167 additions and 49 deletions

View File

@@ -115,7 +115,9 @@ scripts/property-assessor render-report --input "<report-payload-json>" --output
- resolve official public-record jurisdiction automatically from the address
- keep CAD discovery jurisdiction-specific from the address; do not hardcode one county CAD for every property
- try to discover Zillow and HAR listing URLs from the address when no listing URL is provided
- start Zillow and HAR discovery in parallel, while still preferring Zillow first for the photo-review path
- run the approval-safe Zillow/HAR photo extractor chain automatically
- allow slower exact-unit Zillow pages a longer source-specific discovery/photo window before giving up and falling back
- build a purpose-aware report payload
- complete the analysis without requiring recipient email(s)
- only stop and ask for recipient email(s) when the user is explicitly rendering or sending the PDF

View File

@@ -10,6 +10,8 @@ export interface ListingDiscoveryResult {
interface ListingDiscoveryDeps {
timeoutMs?: number;
zillowTimeoutMs?: number;
harTimeoutMs?: number;
discoverZillowListingFn?: typeof discoverZillowListing;
discoverHarListingFn?: typeof discoverHarListing;
}
@@ -17,61 +19,82 @@ interface ListingDiscoveryDeps {
const DEFAULT_DISCOVERY_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_DISCOVERY_TIMEOUT_MS || 20_000
);
const DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_ZILLOW_DISCOVERY_TIMEOUT_MS || 60_000
);
const DEFAULT_HAR_DISCOVERY_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_HAR_DISCOVERY_TIMEOUT_MS || DEFAULT_DISCOVERY_TIMEOUT_MS
);
interface SourceDiscoveryOutcome {
source: "zillow" | "har";
url: string | null;
attempts: string[];
}
export async function discoverListingSources(
address: string,
deps: ListingDiscoveryDeps = {}
): Promise<ListingDiscoveryResult> {
const attempts: string[] = [];
let zillowUrl: string | null = null;
let harUrl: string | null = null;
const timeoutMs = deps.timeoutMs ?? DEFAULT_DISCOVERY_TIMEOUT_MS;
const zillowTimeoutMs =
deps.zillowTimeoutMs ??
(deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_DISCOVERY_TIMEOUT_MS);
const harTimeoutMs =
deps.harTimeoutMs ??
(deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_DISCOVERY_TIMEOUT_MS);
const discoverZillowListingFn = deps.discoverZillowListingFn || discoverZillowListing;
const discoverHarListingFn = deps.discoverHarListingFn || discoverHarListing;
try {
const result = await withTimeout(
() => discoverZillowListingFn(address),
{
operationName: "Zillow discovery",
timeoutMs
const runSource = async (
source: "zillow" | "har",
timeoutForSourceMs: number,
operation: () => Promise<{ listingUrl: string | null; attempts: string[] }>
): Promise<SourceDiscoveryOutcome> => {
try {
const result = await withTimeout(operation, {
operationName: `${source === "zillow" ? "Zillow" : "HAR"} discovery`,
timeoutMs: timeoutForSourceMs
});
return {
source,
url: result.listingUrl,
attempts: result.attempts
};
} catch (error) {
if (error instanceof TimeoutError) {
return {
source,
url: null,
attempts: [
`${source === "zillow" ? "Zillow" : "HAR"} discovery timed out after ${timeoutForSourceMs}ms.`
]
};
}
);
zillowUrl = result.listingUrl;
attempts.push(...result.attempts);
} catch (error) {
if (error instanceof TimeoutError) {
attempts.push(`Zillow discovery timed out after ${timeoutMs}ms.`);
} else {
attempts.push(
`Zillow discovery failed: ${error instanceof Error ? error.message : String(error)}`
);
}
}
try {
const result = await withTimeout(
() => discoverHarListingFn(address),
{
operationName: "HAR discovery",
timeoutMs
}
);
harUrl = result.listingUrl;
attempts.push(...result.attempts);
} catch (error) {
if (error instanceof TimeoutError) {
attempts.push(`HAR discovery timed out after ${timeoutMs}ms.`);
} else {
attempts.push(
`HAR discovery failed: ${error instanceof Error ? error.message : String(error)}`
);
return {
source,
url: null,
attempts: [
`${source === "zillow" ? "Zillow" : "HAR"} discovery failed: ${error instanceof Error ? error.message : String(error)}`
]
};
}
}
};
const zillowPromise = runSource("zillow", zillowTimeoutMs, () =>
discoverZillowListingFn(address, { timeoutMs: zillowTimeoutMs })
);
const harPromise = runSource("har", harTimeoutMs, () =>
discoverHarListingFn(address, { timeoutMs: harTimeoutMs })
);
const [zillowResult, harResult] = await Promise.all([zillowPromise, harPromise]);
const attempts = [...zillowResult.attempts, ...harResult.attempts];
return {
attempts,
zillowUrl,
harUrl
zillowUrl: zillowResult.url,
harUrl: harResult.url
};
}

View File

@@ -22,6 +22,8 @@ export interface PhotoReviewResolution {
interface PhotoReviewDeps {
timeoutMs?: number;
zillowTimeoutMs?: number;
harTimeoutMs?: number;
extractZillowPhotosFn?: typeof extractZillowPhotos;
extractHarPhotosFn?: typeof extractHarPhotos;
}
@@ -29,6 +31,12 @@ interface PhotoReviewDeps {
const DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_PHOTO_TIMEOUT_MS || 25_000
);
const DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_ZILLOW_PHOTO_TIMEOUT_MS || 60_000
);
const DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS = Number(
process.env.PROPERTY_ASSESSOR_HAR_PHOTO_TIMEOUT_MS || DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS
);
export async function extractPhotoData(
source: PhotoSource,
@@ -36,15 +44,21 @@ export async function extractPhotoData(
deps: PhotoReviewDeps = {}
): Promise<PhotoExtractionResult> {
const timeoutMs = deps.timeoutMs ?? DEFAULT_PHOTO_EXTRACTION_TIMEOUT_MS;
const zillowTimeoutMs =
deps.zillowTimeoutMs ??
(deps.timeoutMs != null ? timeoutMs : DEFAULT_ZILLOW_PHOTO_EXTRACTION_TIMEOUT_MS);
const harTimeoutMs =
deps.harTimeoutMs ??
(deps.timeoutMs != null ? timeoutMs : DEFAULT_HAR_PHOTO_EXTRACTION_TIMEOUT_MS);
const extractZillowPhotosFn = deps.extractZillowPhotosFn || extractZillowPhotos;
const extractHarPhotosFn = deps.extractHarPhotosFn || extractHarPhotos;
if (source === "zillow") {
const payload = await withTimeout(
() => extractZillowPhotosFn(url),
() => extractZillowPhotosFn(url, { timeoutMs: zillowTimeoutMs }),
{
operationName: "Zillow photo extraction",
timeoutMs
timeoutMs: zillowTimeoutMs
}
);
return {
@@ -60,10 +74,10 @@ export async function extractPhotoData(
}
const payload = await withTimeout(
() => extractHarPhotosFn(url),
() => extractHarPhotosFn(url, { timeoutMs: harTimeoutMs }),
{
operationName: "HAR photo extraction",
timeoutMs
timeoutMs: harTimeoutMs
}
);
return {

View File

@@ -20,6 +20,71 @@ test("discoverListingSources times out stalled Zillow and HAR discovery calls",
assert.match(result.attempts.join(" "), /har discovery timed out/i);
});
test("discoverListingSources starts Zillow and HAR discovery in parallel", async () => {
let zillowStarted = false;
let harStarted = false;
const discoveryPromise = discoverListingSources("1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412", {
timeoutMs: 100,
discoverZillowListingFn: async () => {
zillowStarted = true;
await new Promise((resolve) => setTimeout(resolve, 50));
return {
source: "zillow",
address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412",
searchUrl: "https://www.zillow.com/example-search",
finalUrl: "https://www.zillow.com/example-search",
title: "Example Zillow Search",
listingUrl: null,
attempts: ["Zillow did not find a confident match."]
};
},
discoverHarListingFn: async () => {
harStarted = true;
return {
source: "har",
address: "1011 Ennis Joslin Rd APT 235, Corpus Christi, TX 78412",
searchUrl: "https://www.har.com/example-search",
finalUrl: "https://www.har.com/example-search",
title: "Example HAR Search",
listingUrl: "https://www.har.com/homedetail/example/123",
attempts: ["HAR found a matching listing quickly."]
};
}
});
await new Promise((resolve) => setTimeout(resolve, 10));
assert.equal(zillowStarted, true);
assert.equal(harStarted, true);
const result = await discoveryPromise;
assert.equal(result.harUrl, "https://www.har.com/homedetail/example/123");
});
test("extractPhotoData honors a longer Zillow timeout override", async () => {
const result = await extractPhotoData("zillow", "https://www.zillow.com/example", {
timeoutMs: 20,
zillowTimeoutMs: 80,
extractZillowPhotosFn: async () => {
await new Promise((resolve) => setTimeout(resolve, 40));
return {
source: "zillow",
requestedUrl: "https://www.zillow.com/example",
finalUrl: "https://www.zillow.com/example",
expectedPhotoCount: 1,
complete: true,
photoCount: 1,
imageUrls: ["https://photos.example/1.jpg"],
notes: ["Zillow extractor succeeded after a slow page load."]
};
}
});
assert.equal(result.source, "zillow");
assert.equal(result.photoCount, 1);
});
test("extractPhotoData times out a stalled photo extraction instead of hanging forever", async () => {
await assert.rejects(
async () =>