#!/usr/bin/env node import { pathToFileURL } from "node:url"; import { createPageSession, dismissCommonOverlays, fail, gotoListing, runWithOperationTimeout, sleep, } from "./real-estate-photo-common.js"; import { parseAddressIdentity, scoreAddressCandidate } from "./real-estate-address.js"; function buildSearchUrl(address) { return `https://www.har.com/search/?q=${encodeURIComponent(address)}`; } function normalizeListingUrl(url) { try { const parsed = new URL(url); parsed.search = ""; parsed.hash = ""; return parsed.toString(); } catch { return null; } } async function collectListingUrl(page) { return page.evaluate(() => { const toAbsolute = (href) => { try { return new URL(href, location.href).toString(); } catch { return null; } }; const candidates = []; for (const anchor of document.querySelectorAll('a[href*="/homedetail/"]')) { const href = anchor.getAttribute("href"); if (!href) continue; const absolute = toAbsolute(href); if (!absolute) continue; const text = (anchor.textContent || "").replace(/\s+/g, " ").trim(); const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim(); candidates.push({ url: absolute, text, parentText, }); } const unique = []; for (const candidate of candidates) { if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate); } return unique; }); } export async function discoverHarListing(rawAddress, options = {}) { const address = String(rawAddress || "").trim(); const identity = parseAddressIdentity(address); const searchUrl = buildSearchUrl(address); const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); const closeContext = async () => { await context.close().catch(() => {}); }; try { return await runWithOperationTimeout( "HAR discovery", async () => { const attempts = [`Opened HAR search URL: ${searchUrl}`]; await gotoListing(page, searchUrl, 2500); await dismissCommonOverlays(page); await sleep(1500); let listingUrl = null; if (page.url().includes("/homedetail/")) { const directScore = scoreAddressCandidate( identity, `${page.url()} ${(await page.title()) || ""}` ); if (directScore.matched) { listingUrl = normalizeListingUrl(page.url()); attempts.push("HAR search URL resolved directly to a matching property page."); } else { attempts.push("HAR redirected to a property page, but it did not match the requested address closely enough."); } } else { const discovered = await collectListingUrl(page); const scored = discovered .map((candidate) => { const match = scoreAddressCandidate( identity, `${candidate.url} ${candidate.text} ${candidate.parentText}` ); return { ...candidate, match }; }) .sort((a, b) => b.match.score - a.match.score); if (scored[0]?.match.matched) { listingUrl = normalizeListingUrl(scored[0].url); attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].match.score}.`); } else { attempts.push("HAR discovery did not expose a confident homedetail match for this address."); } } return { source: "har", address, searchUrl, finalUrl: page.url(), title: await page.title(), listingUrl, attempts, }; }, { timeoutMs: Number(options.timeoutMs || 0) || undefined, onTimeout: closeContext } ); } catch (error) { throw new Error(`HAR discovery failed: ${error instanceof Error ? error.message : String(error)}`); } finally { await closeContext(); } } async function main() { try { const result = await discoverHarListing(process.argv[2]); process.stdout.write(`${JSON.stringify(result, null, 2)}\n`); } catch (error) { fail("HAR discovery failed.", error instanceof Error ? error.message : String(error)); } } if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { main(); }