Use Zillow parcel hints for CAD lookup
This commit is contained in:
81
skills/web-automation/scripts/zillow-identifiers.js
Normal file
81
skills/web-automation/scripts/zillow-identifiers.js
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { pathToFileURL } from "node:url";
|
||||
|
||||
import {
|
||||
createPageSession,
|
||||
dismissCommonOverlays,
|
||||
fail,
|
||||
gotoListing,
|
||||
parseTarget,
|
||||
runWithOperationTimeout,
|
||||
} from "./real-estate-photo-common.js";
|
||||
import {
|
||||
extractZillowIdentifierHintsFromNextDataScript,
|
||||
extractZillowIdentifierHintsFromText,
|
||||
} from "./zillow-photo-data.js";
|
||||
|
||||
export async function extractZillowIdentifierHints(rawUrl, options = {}) {
|
||||
const requestedUrl = parseTarget(rawUrl);
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
const closeContext = async () => {
|
||||
await context.close().catch(() => {});
|
||||
};
|
||||
|
||||
try {
|
||||
return await runWithOperationTimeout(
|
||||
"Zillow identifier extraction",
|
||||
async () => {
|
||||
await gotoListing(page, requestedUrl);
|
||||
await dismissCommonOverlays(page);
|
||||
|
||||
const [scriptText, bodyText] = await Promise.all([
|
||||
page.locator("#__NEXT_DATA__").textContent().catch(() => null),
|
||||
page.evaluate(() => document.body?.innerText || "").catch(() => ""),
|
||||
]);
|
||||
|
||||
const structuredHints = extractZillowIdentifierHintsFromNextDataScript(scriptText || "");
|
||||
const textHints = extractZillowIdentifierHintsFromText(bodyText || "");
|
||||
const parcelId = structuredHints.parcelId || textHints.parcelId || null;
|
||||
const apn = structuredHints.apn || textHints.apn || null;
|
||||
const notes = [];
|
||||
|
||||
if (parcelId) {
|
||||
notes.push(`Zillow listing exposed parcel/APN identifier ${parcelId}.`);
|
||||
} else if (apn) {
|
||||
notes.push(`Zillow listing exposed APN/tax identifier ${apn}.`);
|
||||
} else {
|
||||
notes.push("Zillow listing did not expose a parcel/APN identifier that could be extracted reliably.");
|
||||
}
|
||||
|
||||
return {
|
||||
source: "zillow",
|
||||
requestedUrl,
|
||||
finalUrl: page.url(),
|
||||
parcelId,
|
||||
apn,
|
||||
notes,
|
||||
};
|
||||
},
|
||||
{
|
||||
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||
onTimeout: closeContext,
|
||||
}
|
||||
);
|
||||
} finally {
|
||||
await closeContext();
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const result = await extractZillowIdentifierHints(process.argv[2]);
|
||||
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
} catch (error) {
|
||||
fail("Zillow identifier extraction failed.", error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
}
|
||||
|
||||
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
||||
main();
|
||||
}
|
||||
@@ -58,6 +58,92 @@ export function extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptT
|
||||
return out;
|
||||
}
|
||||
|
||||
function collapseIdentifier(value) {
|
||||
return String(value || "").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function isLikelyIdentifier(value) {
|
||||
return /^[A-Z0-9-]{4,40}$/i.test(collapseIdentifier(value));
|
||||
}
|
||||
|
||||
function visitForIdentifierHints(node, hints) {
|
||||
if (!node || typeof node !== "object") return;
|
||||
if (Array.isArray(node)) {
|
||||
for (const item of node) {
|
||||
visitForIdentifierHints(item, hints);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(node)) {
|
||||
const normalizedKey = key.toLowerCase();
|
||||
if ((normalizedKey === "parcelid" || normalizedKey === "parcelnumber") && hints.parcelId == null) {
|
||||
if (typeof value === "string" || typeof value === "number") {
|
||||
const candidate = collapseIdentifier(value);
|
||||
if (isLikelyIdentifier(candidate)) {
|
||||
hints.parcelId = candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((normalizedKey === "apn" || normalizedKey === "apnnumber" || normalizedKey === "taxparcelid" || normalizedKey === "taxid") && hints.apn == null) {
|
||||
if (typeof value === "string" || typeof value === "number") {
|
||||
const candidate = collapseIdentifier(value);
|
||||
if (isLikelyIdentifier(candidate)) {
|
||||
hints.apn = candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (value && typeof value === "object") {
|
||||
visitForIdentifierHints(value, hints);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function extractZillowIdentifierHintsFromNextDataScript(scriptText) {
|
||||
if (typeof scriptText !== "string" || !scriptText.trim()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
let nextData;
|
||||
try {
|
||||
nextData = JSON.parse(scriptText);
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
|
||||
const hints = {};
|
||||
visitForIdentifierHints(nextData, hints);
|
||||
|
||||
const cacheText = nextData?.props?.pageProps?.componentProps?.gdpClientCache;
|
||||
if (typeof cacheText === "string" && cacheText.trim()) {
|
||||
try {
|
||||
visitForIdentifierHints(JSON.parse(cacheText), hints);
|
||||
} catch {
|
||||
// Ignore cache parse failures; base next-data parse already succeeded.
|
||||
}
|
||||
}
|
||||
|
||||
return hints;
|
||||
}
|
||||
|
||||
export function extractZillowIdentifierHintsFromText(text) {
|
||||
const source = typeof text === "string" ? text : "";
|
||||
const hints = {};
|
||||
|
||||
const parcelMatch = source.match(/\b(?:parcel|parcel number|parcel #|tax parcel)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
|
||||
if (parcelMatch) {
|
||||
hints.parcelId = collapseIdentifier(parcelMatch[1]);
|
||||
}
|
||||
|
||||
const apnMatch = source.match(/\b(?:apn|apn #|apn no\.?|tax id)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
|
||||
if (apnMatch) {
|
||||
hints.apn = collapseIdentifier(apnMatch[1]);
|
||||
}
|
||||
|
||||
return hints;
|
||||
}
|
||||
|
||||
const DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT = 12;
|
||||
|
||||
export function shouldUseStructuredZillowPhotos(candidates, options = {}) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
extractZillowStructuredPhotoCandidatesFromNextDataScript,
|
||||
extractZillowIdentifierHintsFromNextDataScript,
|
||||
shouldUseStructuredZillowPhotos,
|
||||
} from "./zillow-photo-data.js";
|
||||
|
||||
@@ -119,3 +120,27 @@ test("shouldUseStructuredZillowPhotos returns false for a tiny structured set wh
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("extractZillowIdentifierHintsFromNextDataScript finds parcel and APN style identifiers", () => {
|
||||
const scriptText = JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
componentProps: {
|
||||
gdpClientCache: JSON.stringify({
|
||||
SomeQuery: {
|
||||
property: {
|
||||
parcelId: "1234567890",
|
||||
apn: "APN-77-55",
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
assert.deepEqual(extractZillowIdentifierHintsFromNextDataScript(scriptText), {
|
||||
parcelId: "1234567890",
|
||||
apn: "APN-77-55",
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user