Use Zillow parcel hints for CAD lookup

This commit is contained in:
2026-03-28 03:55:56 -05:00
parent ece8fc548f
commit b77134ced5
11 changed files with 438 additions and 18 deletions

View File

@@ -144,6 +144,7 @@ Use the dedicated extractors before trying a free-form gallery flow.
- Zillow discovery: `node scripts/zillow-discover.js "<street-address>"`
- HAR discovery: `node scripts/har-discover.js "<street-address>"`
- Zillow: `node scripts/zillow-photos.js "<listing-url>"`
- Zillow identifiers: `node scripts/zillow-identifiers.js "<listing-url>"`
- HAR: `node scripts/har-photos.js "<listing-url>"`
The discovery scripts are purpose-built for the common address-to-listing workflow:
@@ -173,6 +174,12 @@ Output is JSON with:
- `imageUrls`
- `notes`
`zillow-identifiers.js` is a lighter helper for CAD/public-record workflows:
- open the Zillow listing shell
- inspect embedded `__NEXT_DATA__` plus visible listing text
- capture parcel/APN-style identifiers when Zillow exposes them
- return those hints so `property-assessor` can use them as stronger CAD lookup keys than listing geo IDs
For property-assessor style workflows, prefer these dedicated commands over generic natural-language gallery automation.
### Gallery/lightbox and all-photos workflows

View File

@@ -0,0 +1,81 @@
#!/usr/bin/env node
import { pathToFileURL } from "node:url";
import {
createPageSession,
dismissCommonOverlays,
fail,
gotoListing,
parseTarget,
runWithOperationTimeout,
} from "./real-estate-photo-common.js";
import {
extractZillowIdentifierHintsFromNextDataScript,
extractZillowIdentifierHintsFromText,
} from "./zillow-photo-data.js";
export async function extractZillowIdentifierHints(rawUrl, options = {}) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
await context.close().catch(() => {});
};
try {
return await runWithOperationTimeout(
"Zillow identifier extraction",
async () => {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
const [scriptText, bodyText] = await Promise.all([
page.locator("#__NEXT_DATA__").textContent().catch(() => null),
page.evaluate(() => document.body?.innerText || "").catch(() => ""),
]);
const structuredHints = extractZillowIdentifierHintsFromNextDataScript(scriptText || "");
const textHints = extractZillowIdentifierHintsFromText(bodyText || "");
const parcelId = structuredHints.parcelId || textHints.parcelId || null;
const apn = structuredHints.apn || textHints.apn || null;
const notes = [];
if (parcelId) {
notes.push(`Zillow listing exposed parcel/APN identifier ${parcelId}.`);
} else if (apn) {
notes.push(`Zillow listing exposed APN/tax identifier ${apn}.`);
} else {
notes.push("Zillow listing did not expose a parcel/APN identifier that could be extracted reliably.");
}
return {
source: "zillow",
requestedUrl,
finalUrl: page.url(),
parcelId,
apn,
notes,
};
},
{
timeoutMs: Number(options.timeoutMs || 0) || undefined,
onTimeout: closeContext,
}
);
} finally {
await closeContext();
}
}
async function main() {
try {
const result = await extractZillowIdentifierHints(process.argv[2]);
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} catch (error) {
fail("Zillow identifier extraction failed.", error instanceof Error ? error.message : String(error));
}
}
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
main();
}

View File

@@ -58,6 +58,92 @@ export function extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptT
return out;
}
function collapseIdentifier(value) {
return String(value || "").replace(/\s+/g, " ").trim();
}
function isLikelyIdentifier(value) {
return /^[A-Z0-9-]{4,40}$/i.test(collapseIdentifier(value));
}
function visitForIdentifierHints(node, hints) {
if (!node || typeof node !== "object") return;
if (Array.isArray(node)) {
for (const item of node) {
visitForIdentifierHints(item, hints);
}
return;
}
for (const [key, value] of Object.entries(node)) {
const normalizedKey = key.toLowerCase();
if ((normalizedKey === "parcelid" || normalizedKey === "parcelnumber") && hints.parcelId == null) {
if (typeof value === "string" || typeof value === "number") {
const candidate = collapseIdentifier(value);
if (isLikelyIdentifier(candidate)) {
hints.parcelId = candidate;
}
}
}
if ((normalizedKey === "apn" || normalizedKey === "apnnumber" || normalizedKey === "taxparcelid" || normalizedKey === "taxid") && hints.apn == null) {
if (typeof value === "string" || typeof value === "number") {
const candidate = collapseIdentifier(value);
if (isLikelyIdentifier(candidate)) {
hints.apn = candidate;
}
}
}
if (value && typeof value === "object") {
visitForIdentifierHints(value, hints);
}
}
}
export function extractZillowIdentifierHintsFromNextDataScript(scriptText) {
if (typeof scriptText !== "string" || !scriptText.trim()) {
return {};
}
let nextData;
try {
nextData = JSON.parse(scriptText);
} catch {
return {};
}
const hints = {};
visitForIdentifierHints(nextData, hints);
const cacheText = nextData?.props?.pageProps?.componentProps?.gdpClientCache;
if (typeof cacheText === "string" && cacheText.trim()) {
try {
visitForIdentifierHints(JSON.parse(cacheText), hints);
} catch {
// Ignore cache parse failures; base next-data parse already succeeded.
}
}
return hints;
}
export function extractZillowIdentifierHintsFromText(text) {
const source = typeof text === "string" ? text : "";
const hints = {};
const parcelMatch = source.match(/\b(?:parcel|parcel number|parcel #|tax parcel)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
if (parcelMatch) {
hints.parcelId = collapseIdentifier(parcelMatch[1]);
}
const apnMatch = source.match(/\b(?:apn|apn #|apn no\.?|tax id)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
if (apnMatch) {
hints.apn = collapseIdentifier(apnMatch[1]);
}
return hints;
}
const DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT = 12;
export function shouldUseStructuredZillowPhotos(candidates, options = {}) {

View File

@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
import {
extractZillowStructuredPhotoCandidatesFromNextDataScript,
extractZillowIdentifierHintsFromNextDataScript,
shouldUseStructuredZillowPhotos,
} from "./zillow-photo-data.js";
@@ -119,3 +120,27 @@ test("shouldUseStructuredZillowPhotos returns false for a tiny structured set wh
false
);
});
test("extractZillowIdentifierHintsFromNextDataScript finds parcel and APN style identifiers", () => {
const scriptText = JSON.stringify({
props: {
pageProps: {
componentProps: {
gdpClientCache: JSON.stringify({
SomeQuery: {
property: {
parcelId: "1234567890",
apn: "APN-77-55",
},
},
}),
},
},
},
});
assert.deepEqual(extractZillowIdentifierHintsFromNextDataScript(scriptText), {
parcelId: "1234567890",
apn: "APN-77-55",
});
});