82 lines
2.5 KiB
JavaScript
82 lines
2.5 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import { pathToFileURL } from "node:url";
|
|
|
|
import {
|
|
createPageSession,
|
|
dismissCommonOverlays,
|
|
fail,
|
|
gotoListing,
|
|
parseTarget,
|
|
runWithOperationTimeout,
|
|
} from "./real-estate-photo-common.js";
|
|
import {
|
|
extractZillowIdentifierHintsFromNextDataScript,
|
|
extractZillowIdentifierHintsFromText,
|
|
} from "./zillow-photo-data.js";
|
|
|
|
export async function extractZillowIdentifierHints(rawUrl, options = {}) {
|
|
const requestedUrl = parseTarget(rawUrl);
|
|
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
|
const closeContext = async () => {
|
|
await context.close().catch(() => {});
|
|
};
|
|
|
|
try {
|
|
return await runWithOperationTimeout(
|
|
"Zillow identifier extraction",
|
|
async () => {
|
|
await gotoListing(page, requestedUrl);
|
|
await dismissCommonOverlays(page);
|
|
|
|
const [scriptText, bodyText] = await Promise.all([
|
|
page.locator("#__NEXT_DATA__").textContent().catch(() => null),
|
|
page.evaluate(() => document.body?.innerText || "").catch(() => ""),
|
|
]);
|
|
|
|
const structuredHints = extractZillowIdentifierHintsFromNextDataScript(scriptText || "");
|
|
const textHints = extractZillowIdentifierHintsFromText(bodyText || "");
|
|
const parcelId = structuredHints.parcelId || textHints.parcelId || null;
|
|
const apn = structuredHints.apn || textHints.apn || null;
|
|
const notes = [];
|
|
|
|
if (parcelId) {
|
|
notes.push(`Zillow listing exposed parcel/APN identifier ${parcelId}.`);
|
|
} else if (apn) {
|
|
notes.push(`Zillow listing exposed APN/tax identifier ${apn}.`);
|
|
} else {
|
|
notes.push("Zillow listing did not expose a parcel/APN identifier that could be extracted reliably.");
|
|
}
|
|
|
|
return {
|
|
source: "zillow",
|
|
requestedUrl,
|
|
finalUrl: page.url(),
|
|
parcelId,
|
|
apn,
|
|
notes,
|
|
};
|
|
},
|
|
{
|
|
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
|
onTimeout: closeContext,
|
|
}
|
|
);
|
|
} finally {
|
|
await closeContext();
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
try {
|
|
const result = await extractZillowIdentifierHints(process.argv[2]);
|
|
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
} catch (error) {
|
|
fail("Zillow identifier extraction failed.", error instanceof Error ? error.message : String(error));
|
|
}
|
|
}
|
|
|
|
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
main();
|
|
}
|