Files
stef-openclaw-skills/skills/web-automation/scripts/har-discover.js

140 lines
3.8 KiB
JavaScript

#!/usr/bin/env node
import {
createPageSession,
dismissCommonOverlays,
fail,
gotoListing,
sleep,
} from "./real-estate-photo-common.js";
function parseAddress(rawAddress) {
const address = String(rawAddress || "").trim();
if (!address) {
fail("Missing address.");
}
return address;
}
function buildSearchUrl(address) {
return `https://www.har.com/search/?q=${encodeURIComponent(address)}`;
}
function buildAddressTokens(address) {
return address
.toLowerCase()
.replace(/[^a-z0-9\s]/g, " ")
.split(/\s+/)
.filter(Boolean)
.filter((token) => !new Set(["tx", "dr", "st", "rd", "ave", "blvd", "ct", "ln", "cir"]).has(token));
}
function normalizeListingUrl(url) {
try {
const parsed = new URL(url);
parsed.search = "";
parsed.hash = "";
return parsed.toString();
} catch {
return null;
}
}
async function collectListingUrl(page) {
return page.evaluate(() => {
const toAbsolute = (href) => {
try {
return new URL(href, location.href).toString();
} catch {
return null;
}
};
const candidates = [];
for (const anchor of document.querySelectorAll('a[href*="/homedetail/"]')) {
const href = anchor.getAttribute("href");
if (!href) continue;
const absolute = toAbsolute(href);
if (!absolute) continue;
const text = (anchor.textContent || "").replace(/\s+/g, " ").trim();
const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim();
candidates.push({
url: absolute,
text,
parentText,
});
}
const unique = [];
for (const candidate of candidates) {
if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate);
}
return unique;
});
}
async function main() {
const address = parseAddress(process.argv[2]);
const searchUrl = buildSearchUrl(address);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
try {
const attempts = [`Opened HAR search URL: ${searchUrl}`];
await gotoListing(page, searchUrl, 2500);
await dismissCommonOverlays(page);
await sleep(1500);
let listingUrl = null;
const addressTokens = buildAddressTokens(address);
if (page.url().includes("/homedetail/")) {
listingUrl = normalizeListingUrl(page.url());
attempts.push("HAR search URL resolved directly to a property page.");
} else {
const discovered = await collectListingUrl(page);
const scored = discovered
.map((candidate) => {
const haystack = `${candidate.url} ${candidate.text} ${candidate.parentText}`.toLowerCase();
const score = addressTokens.reduce(
(total, token) => total + (haystack.includes(token) ? 1 : 0),
0
);
return { ...candidate, score };
})
.sort((a, b) => b.score - a.score);
if (scored[0] && scored[0].score >= Math.min(3, addressTokens.length)) {
listingUrl = normalizeListingUrl(scored[0].url);
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].score}.`);
} else {
attempts.push("HAR discovery did not expose a confident homedetail match for this address.");
}
}
process.stdout.write(
`${JSON.stringify(
{
source: "har",
address,
searchUrl,
finalUrl: page.url(),
title: await page.title(),
listingUrl,
attempts,
},
null,
2
)}\n`
);
await context.close();
} catch (error) {
try {
await context.close();
} catch {
// Ignore close errors after the primary failure.
}
fail("HAR discovery failed.", error instanceof Error ? error.message : String(error));
}
}
main();