Add purpose-aware property assessor intake
This commit is contained in:
139
skills/web-automation/scripts/har-discover.js
Normal file
139
skills/web-automation/scripts/har-discover.js
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import {
|
||||
createPageSession,
|
||||
dismissCommonOverlays,
|
||||
fail,
|
||||
gotoListing,
|
||||
sleep,
|
||||
} from "./real-estate-photo-common.js";
|
||||
|
||||
function parseAddress(rawAddress) {
|
||||
const address = String(rawAddress || "").trim();
|
||||
if (!address) {
|
||||
fail("Missing address.");
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
function buildSearchUrl(address) {
|
||||
return `https://www.har.com/search/?q=${encodeURIComponent(address)}`;
|
||||
}
|
||||
|
||||
function buildAddressTokens(address) {
|
||||
return address
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9\s]/g, " ")
|
||||
.split(/\s+/)
|
||||
.filter(Boolean)
|
||||
.filter((token) => !new Set(["tx", "dr", "st", "rd", "ave", "blvd", "ct", "ln", "cir"]).has(token));
|
||||
}
|
||||
|
||||
function normalizeListingUrl(url) {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
parsed.search = "";
|
||||
parsed.hash = "";
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function collectListingUrl(page) {
|
||||
return page.evaluate(() => {
|
||||
const toAbsolute = (href) => {
|
||||
try {
|
||||
return new URL(href, location.href).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const candidates = [];
|
||||
for (const anchor of document.querySelectorAll('a[href*="/homedetail/"]')) {
|
||||
const href = anchor.getAttribute("href");
|
||||
if (!href) continue;
|
||||
const absolute = toAbsolute(href);
|
||||
if (!absolute) continue;
|
||||
const text = (anchor.textContent || "").replace(/\s+/g, " ").trim();
|
||||
const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim();
|
||||
candidates.push({
|
||||
url: absolute,
|
||||
text,
|
||||
parentText,
|
||||
});
|
||||
}
|
||||
|
||||
const unique = [];
|
||||
for (const candidate of candidates) {
|
||||
if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate);
|
||||
}
|
||||
return unique;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const address = parseAddress(process.argv[2]);
|
||||
const searchUrl = buildSearchUrl(address);
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
|
||||
try {
|
||||
const attempts = [`Opened HAR search URL: ${searchUrl}`];
|
||||
await gotoListing(page, searchUrl, 2500);
|
||||
await dismissCommonOverlays(page);
|
||||
await sleep(1500);
|
||||
|
||||
let listingUrl = null;
|
||||
const addressTokens = buildAddressTokens(address);
|
||||
if (page.url().includes("/homedetail/")) {
|
||||
listingUrl = normalizeListingUrl(page.url());
|
||||
attempts.push("HAR search URL resolved directly to a property page.");
|
||||
} else {
|
||||
const discovered = await collectListingUrl(page);
|
||||
const scored = discovered
|
||||
.map((candidate) => {
|
||||
const haystack = `${candidate.url} ${candidate.text} ${candidate.parentText}`.toLowerCase();
|
||||
const score = addressTokens.reduce(
|
||||
(total, token) => total + (haystack.includes(token) ? 1 : 0),
|
||||
0
|
||||
);
|
||||
return { ...candidate, score };
|
||||
})
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
if (scored[0] && scored[0].score >= Math.min(3, addressTokens.length)) {
|
||||
listingUrl = normalizeListingUrl(scored[0].url);
|
||||
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].score}.`);
|
||||
} else {
|
||||
attempts.push("HAR discovery did not expose a confident homedetail match for this address.");
|
||||
}
|
||||
}
|
||||
|
||||
process.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
source: "har",
|
||||
address,
|
||||
searchUrl,
|
||||
finalUrl: page.url(),
|
||||
title: await page.title(),
|
||||
listingUrl,
|
||||
attempts,
|
||||
},
|
||||
null,
|
||||
2
|
||||
)}\n`
|
||||
);
|
||||
await context.close();
|
||||
} catch (error) {
|
||||
try {
|
||||
await context.close();
|
||||
} catch {
|
||||
// Ignore close errors after the primary failure.
|
||||
}
|
||||
fail("HAR discovery failed.", error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user