Fix slower Zillow unit photo discovery path

This commit is contained in:
2026-03-28 02:28:30 -05:00
parent 7690dc259b
commit 8fe451e8d0
11 changed files with 167 additions and 49 deletions

View File

@@ -153,6 +153,7 @@ The discovery scripts are purpose-built for the common address-to-listing workfl
- reject a mismatched unit when the requested address includes one
- still work normally for single-family / no-unit addresses
- return the direct listing URL as JSON
- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow pages
The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow:
- open the listing page
@@ -160,6 +161,7 @@ The photo scripts are purpose-built for the common `See all photos` / `Show all
- wait for the resulting photo page or scroller view
- extract direct image URLs from the rendered page
- fail fast with a timeout instead of hanging indefinitely when the browser-backed extraction stalls
- support longer source-specific timeouts when a caller such as `property-assessor` imports them for slower exact-unit Zillow renders
Output is JSON with:
- `requestedUrl`

View File

@@ -60,7 +60,7 @@ async function collectListingUrl(page) {
});
}
export async function discoverHarListing(rawAddress) {
export async function discoverHarListing(rawAddress, options = {}) {
const address = String(rawAddress || "").trim();
const identity = parseAddressIdentity(address);
const searchUrl = buildSearchUrl(address);
@@ -121,6 +121,7 @@ export async function discoverHarListing(rawAddress) {
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
}
);

View File

@@ -32,7 +32,7 @@ async function getAnnouncedPhotoCount(page) {
});
}
export async function extractHarPhotos(rawUrl) {
export async function extractHarPhotos(rawUrl, options = {}) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
@@ -78,6 +78,7 @@ export async function extractHarPhotos(rawUrl) {
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
}
);

View File

@@ -64,7 +64,7 @@ async function collectListingUrl(page) {
});
}
export async function discoverZillowListing(rawAddress) {
export async function discoverZillowListing(rawAddress, options = {}) {
const address = String(rawAddress || "").trim();
const identity = parseAddressIdentity(address);
const searchUrl = `https://www.zillow.com/homes/${encodeURIComponent(buildZillowAddressSlug(address))}_rb/`;
@@ -125,6 +125,7 @@ export async function discoverZillowListing(rawAddress) {
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
}
);

View File

@@ -105,7 +105,7 @@ async function collectZillowStructuredPhotoCandidates(page) {
return extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptText || "");
}
export async function extractZillowPhotos(rawUrl) {
export async function extractZillowPhotos(rawUrl, options = {}) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
@@ -177,6 +177,7 @@ export async function extractZillowPhotos(rawUrl) {
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext
}
);