147 lines
4.4 KiB
JavaScript
147 lines
4.4 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import { pathToFileURL } from "node:url";
|
|
|
|
import {
|
|
createPageSession,
|
|
dismissCommonOverlays,
|
|
fail,
|
|
gotoListing,
|
|
runWithOperationTimeout,
|
|
sleep,
|
|
} from "./real-estate-photo-common.js";
|
|
import { parseAddressIdentity, scoreAddressCandidate } from "./real-estate-address.js";
|
|
|
|
function buildSearchUrl(address) {
|
|
return `https://www.har.com/search/?q=${encodeURIComponent(address)}`;
|
|
}
|
|
|
|
function normalizeListingUrl(url) {
|
|
try {
|
|
const parsed = new URL(url);
|
|
parsed.search = "";
|
|
parsed.hash = "";
|
|
return parsed.toString();
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function collectListingUrl(page) {
|
|
return page.evaluate(() => {
|
|
const toAbsolute = (href) => {
|
|
try {
|
|
return new URL(href, location.href).toString();
|
|
} catch {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
const candidates = [];
|
|
for (const anchor of document.querySelectorAll('a[href*="/homedetail/"]')) {
|
|
const href = anchor.getAttribute("href");
|
|
if (!href) continue;
|
|
const absolute = toAbsolute(href);
|
|
if (!absolute) continue;
|
|
const text = (anchor.textContent || "").replace(/\s+/g, " ").trim();
|
|
const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim();
|
|
candidates.push({
|
|
url: absolute,
|
|
text,
|
|
parentText,
|
|
});
|
|
}
|
|
|
|
const unique = [];
|
|
for (const candidate of candidates) {
|
|
if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate);
|
|
}
|
|
return unique;
|
|
});
|
|
}
|
|
|
|
export async function discoverHarListing(rawAddress, options = {}) {
|
|
const address = String(rawAddress || "").trim();
|
|
const identity = parseAddressIdentity(address);
|
|
const searchUrl = buildSearchUrl(address);
|
|
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
|
const closeContext = async () => {
|
|
await context.close().catch(() => {});
|
|
};
|
|
|
|
try {
|
|
return await runWithOperationTimeout(
|
|
"HAR discovery",
|
|
async () => {
|
|
const attempts = [`Opened HAR search URL: ${searchUrl}`];
|
|
await gotoListing(page, searchUrl, 2500);
|
|
await dismissCommonOverlays(page);
|
|
await sleep(1500);
|
|
|
|
let listingUrl = null;
|
|
if (page.url().includes("/homedetail/")) {
|
|
const directScore = scoreAddressCandidate(
|
|
identity,
|
|
`${page.url()} ${(await page.title()) || ""}`
|
|
);
|
|
if (directScore.matched) {
|
|
listingUrl = normalizeListingUrl(page.url());
|
|
attempts.push("HAR search URL resolved directly to a matching property page.");
|
|
} else {
|
|
attempts.push("HAR redirected to a property page, but it did not match the requested address closely enough.");
|
|
}
|
|
} else {
|
|
const discovered = await collectListingUrl(page);
|
|
const scored = discovered
|
|
.map((candidate) => {
|
|
const match = scoreAddressCandidate(
|
|
identity,
|
|
`${candidate.url} ${candidate.text} ${candidate.parentText}`
|
|
);
|
|
return { ...candidate, match };
|
|
})
|
|
.sort((a, b) => b.match.score - a.match.score);
|
|
|
|
if (scored[0]?.match.matched) {
|
|
listingUrl = normalizeListingUrl(scored[0].url);
|
|
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].match.score}.`);
|
|
} else {
|
|
attempts.push("HAR discovery did not expose a confident homedetail match for this address.");
|
|
}
|
|
}
|
|
|
|
return {
|
|
source: "har",
|
|
address,
|
|
searchUrl,
|
|
finalUrl: page.url(),
|
|
title: await page.title(),
|
|
listingUrl,
|
|
attempts,
|
|
};
|
|
},
|
|
{
|
|
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
|
onTimeout: closeContext
|
|
}
|
|
);
|
|
} catch (error) {
|
|
throw new Error(`HAR discovery failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
} finally {
|
|
await closeContext();
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
try {
|
|
const result = await discoverHarListing(process.argv[2]);
|
|
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
} catch (error) {
|
|
fail("HAR discovery failed.", error instanceof Error ? error.message : String(error));
|
|
}
|
|
}
|
|
|
|
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
main();
|
|
}
|