Make listing discovery unit-aware

This commit is contained in:
2026-03-27 23:11:10 -05:00
parent 301986fb25
commit f8c998d579
9 changed files with 259 additions and 59 deletions

View File

@@ -7,28 +7,12 @@ import {
gotoListing,
sleep,
} from "./real-estate-photo-common.js";
function parseAddress(rawAddress) {
const address = String(rawAddress || "").trim();
if (!address) {
fail("Missing address.");
}
return address;
}
import { parseAddressIdentity, scoreAddressCandidate } from "./real-estate-address.js";
function buildSearchUrl(address) {
return `https://www.har.com/search/?q=${encodeURIComponent(address)}`;
}
function buildAddressTokens(address) {
return address
.toLowerCase()
.replace(/[^a-z0-9\s]/g, " ")
.split(/\s+/)
.filter(Boolean)
.filter((token) => !new Set(["tx", "dr", "st", "rd", "ave", "blvd", "ct", "ln", "cir"]).has(token));
}
function normalizeListingUrl(url) {
try {
const parsed = new URL(url);
@@ -74,7 +58,8 @@ async function collectListingUrl(page) {
}
async function main() {
const address = parseAddress(process.argv[2]);
const address = String(process.argv[2] || "").trim();
const identity = parseAddressIdentity(address);
const searchUrl = buildSearchUrl(address);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
@@ -85,26 +70,32 @@ async function main() {
await sleep(1500);
let listingUrl = null;
const addressTokens = buildAddressTokens(address);
if (page.url().includes("/homedetail/")) {
listingUrl = normalizeListingUrl(page.url());
attempts.push("HAR search URL resolved directly to a property page.");
const directScore = scoreAddressCandidate(
identity,
`${page.url()} ${(await page.title()) || ""}`
);
if (directScore.matched) {
listingUrl = normalizeListingUrl(page.url());
attempts.push("HAR search URL resolved directly to a matching property page.");
} else {
attempts.push("HAR redirected to a property page, but it did not match the requested address closely enough.");
}
} else {
const discovered = await collectListingUrl(page);
const scored = discovered
.map((candidate) => {
const haystack = `${candidate.url} ${candidate.text} ${candidate.parentText}`.toLowerCase();
const score = addressTokens.reduce(
(total, token) => total + (haystack.includes(token) ? 1 : 0),
0
const match = scoreAddressCandidate(
identity,
`${candidate.url} ${candidate.text} ${candidate.parentText}`
);
return { ...candidate, score };
return { ...candidate, match };
})
.sort((a, b) => b.score - a.score);
.sort((a, b) => b.match.score - a.match.score);
if (scored[0] && scored[0].score >= Math.min(3, addressTokens.length)) {
if (scored[0]?.match.matched) {
listingUrl = normalizeListingUrl(scored[0].url);
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].score}.`);
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].match.score}.`);
} else {
attempts.push("HAR discovery did not expose a confident homedetail match for this address.");
}