diff --git a/docs/property-assessor.md b/docs/property-assessor.md index 11dadbf..a9390ad 100644 --- a/docs/property-assessor.md +++ b/docs/property-assessor.md @@ -6,6 +6,8 @@ Decision-grade residential property assessment skill for OpenClaw, with official `property-assessor` is for evaluating a condo, townhouse, house, or similar residential property from an address or listing URL and ending with a practical recommendation such as `buy`, `pass`, or `only below X`. +If the subject property has an apartment / unit / suite number, include it. Discovery is now unit-aware for Zillow and HAR when unit data is present, while still supporting plain single-family addresses that have no unit. + The skill is intended to: - normalize the property across listing sources diff --git a/docs/web-automation.md b/docs/web-automation.md index e672701..fd825e8 100644 --- a/docs/web-automation.md +++ b/docs/web-automation.md @@ -134,6 +134,7 @@ npx tsx flow.ts --instruction 'go to https://search.fiorinis.com then type "pipp ## Real-estate listing discovery and photo extraction Use the dedicated Zillow and HAR discovery/photo commands before trying a free-form gallery flow. +Discovery is unit-aware when the address includes an apartment / unit / suite identifier, and still supports plain no-unit addresses for single-family homes. ### Zillow discovery diff --git a/skills/property-assessor/SKILL.md b/skills/property-assessor/SKILL.md index 8ce4829..3d4c9cc 100644 --- a/skills/property-assessor/SKILL.md +++ b/skills/property-assessor/SKILL.md @@ -17,6 +17,9 @@ Accept any of: The assessment purpose is required for a decision-grade result. If the user does not say why they want the property assessed, stop and ask before finalizing the analysis. +If the property has a unit / apartment / suite number, include it. +Do not drop the unit when discovering listing sources. Unit-qualified condo/townhome addresses must be matched as the exact unit, while single-family addresses with no unit should still work normally. + ## Core workflow 1. Normalize the address and property type. diff --git a/skills/web-automation/SKILL.md b/skills/web-automation/SKILL.md index 8298713..3b09557 100644 --- a/skills/web-automation/SKILL.md +++ b/skills/web-automation/SKILL.md @@ -143,7 +143,10 @@ Use the dedicated extractors before trying a free-form gallery flow. The discovery scripts are purpose-built for the common address-to-listing workflow: - open the site search or address URL +- keep apartment / unit identifiers when the address includes them - resolve or identify a matching listing page when possible +- reject a mismatched unit when the requested address includes one +- still work normally for single-family / no-unit addresses - return the direct listing URL as JSON The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow: diff --git a/skills/web-automation/scripts/har-discover.js b/skills/web-automation/scripts/har-discover.js index ad19bc2..eb90cbc 100644 --- a/skills/web-automation/scripts/har-discover.js +++ b/skills/web-automation/scripts/har-discover.js @@ -7,28 +7,12 @@ import { gotoListing, sleep, } from "./real-estate-photo-common.js"; - -function parseAddress(rawAddress) { - const address = String(rawAddress || "").trim(); - if (!address) { - fail("Missing address."); - } - return address; -} +import { parseAddressIdentity, scoreAddressCandidate } from "./real-estate-address.js"; function buildSearchUrl(address) { return `https://www.har.com/search/?q=${encodeURIComponent(address)}`; } -function buildAddressTokens(address) { - return address - .toLowerCase() - .replace(/[^a-z0-9\s]/g, " ") - .split(/\s+/) - .filter(Boolean) - .filter((token) => !new Set(["tx", "dr", "st", "rd", "ave", "blvd", "ct", "ln", "cir"]).has(token)); -} - function normalizeListingUrl(url) { try { const parsed = new URL(url); @@ -74,7 +58,8 @@ async function collectListingUrl(page) { } async function main() { - const address = parseAddress(process.argv[2]); + const address = String(process.argv[2] || "").trim(); + const identity = parseAddressIdentity(address); const searchUrl = buildSearchUrl(address); const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); @@ -85,26 +70,32 @@ async function main() { await sleep(1500); let listingUrl = null; - const addressTokens = buildAddressTokens(address); if (page.url().includes("/homedetail/")) { - listingUrl = normalizeListingUrl(page.url()); - attempts.push("HAR search URL resolved directly to a property page."); + const directScore = scoreAddressCandidate( + identity, + `${page.url()} ${(await page.title()) || ""}` + ); + if (directScore.matched) { + listingUrl = normalizeListingUrl(page.url()); + attempts.push("HAR search URL resolved directly to a matching property page."); + } else { + attempts.push("HAR redirected to a property page, but it did not match the requested address closely enough."); + } } else { const discovered = await collectListingUrl(page); const scored = discovered .map((candidate) => { - const haystack = `${candidate.url} ${candidate.text} ${candidate.parentText}`.toLowerCase(); - const score = addressTokens.reduce( - (total, token) => total + (haystack.includes(token) ? 1 : 0), - 0 + const match = scoreAddressCandidate( + identity, + `${candidate.url} ${candidate.text} ${candidate.parentText}` ); - return { ...candidate, score }; + return { ...candidate, match }; }) - .sort((a, b) => b.score - a.score); + .sort((a, b) => b.match.score - a.match.score); - if (scored[0] && scored[0].score >= Math.min(3, addressTokens.length)) { + if (scored[0]?.match.matched) { listingUrl = normalizeListingUrl(scored[0].url); - attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].score}.`); + attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].match.score}.`); } else { attempts.push("HAR discovery did not expose a confident homedetail match for this address."); } diff --git a/skills/web-automation/scripts/package.json b/skills/web-automation/scripts/package.json index 3cfd15a..6552809 100644 --- a/skills/web-automation/scripts/package.json +++ b/skills/web-automation/scripts/package.json @@ -10,7 +10,7 @@ "har-photos": "node har-photos.js", "browse": "tsx browse.ts", "scrape": "tsx scrape.ts", - "test:photos": "node --test real-estate-photo-common.test.mjs zillow-photo-data.test.mjs", + "test:photos": "node --test real-estate-address.test.mjs real-estate-photo-common.test.mjs zillow-photo-data.test.mjs", "zillow-discover": "node zillow-discover.js", "zillow-photos": "node zillow-photos.js", "fetch-browser": "npx cloakbrowser install" diff --git a/skills/web-automation/scripts/real-estate-address.js b/skills/web-automation/scripts/real-estate-address.js new file mode 100644 index 0000000..f3dce77 --- /dev/null +++ b/skills/web-automation/scripts/real-estate-address.js @@ -0,0 +1,125 @@ +#!/usr/bin/env node + +const STREET_STOP_WORDS = new Set([ + "st", + "street", + "rd", + "road", + "dr", + "drive", + "ave", + "avenue", + "blvd", + "boulevard", + "ln", + "lane", + "ct", + "court", + "cir", + "circle", + "way", + "trl", + "trail", + "pkwy", + "parkway", + "tx" +]); + +const UNIT_LABEL_PATTERN = "(?:apt|apartment|unit|suite|ste|#)"; + +function tokenize(value) { + return String(value || "") + .toLowerCase() + .replace(/[^a-z0-9#\s-]/g, " ") + .split(/[\s-]+/) + .map((token) => token.trim()) + .filter(Boolean); +} + +function collapseWhitespace(value) { + return String(value || "").replace(/\s+/g, " ").trim(); +} + +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export function parseAddressIdentity(rawAddress) { + const address = collapseWhitespace(rawAddress); + if (!address) { + throw new Error("Missing address."); + } + + const [streetPartRaw, ...restParts] = address.split(","); + const streetPart = collapseWhitespace(streetPartRaw); + const locality = collapseWhitespace(restParts.join(" ")); + + const unitMatch = streetPart.match( + new RegExp(`^(.*?)(?:\\s+(?:${UNIT_LABEL_PATTERN})\\s*([a-z0-9-]+))$`, "i") + ); + + const streetWithoutUnit = collapseWhitespace(unitMatch ? unitMatch[1] : streetPart); + const unitValue = unitMatch ? unitMatch[2].toLowerCase() : null; + + const streetTokens = tokenize(streetWithoutUnit).filter( + (token) => !STREET_STOP_WORDS.has(token) + ); + const localityTokens = tokenize(locality).filter((token) => !STREET_STOP_WORDS.has(token)); + + return { + raw: address, + streetPart, + streetWithoutUnit, + locality, + streetTokens, + localityTokens, + streetNumber: streetTokens[0] || null, + unitValue, + hasUnit: Boolean(unitValue) + }; +} + +export function buildZillowAddressSlug(rawAddress) { + const address = collapseWhitespace(rawAddress); + return address + .replace(/,/g, "") + .replace(/#/g, "") + .trim() + .split(/\s+/) + .join("-"); +} + +export function scoreAddressCandidate(identity, candidateText) { + const normalized = collapseWhitespace(candidateText).toLowerCase(); + const normalizedPadded = ` ${normalized.replace(/[^a-z0-9#]+/g, " ")} `; + const allTokens = [...identity.streetTokens, ...identity.localityTokens]; + const uniqueTokens = Array.from(new Set(allTokens)); + const matchedTokens = uniqueTokens.filter((token) => + normalizedPadded.includes(` ${token} `) + ); + + let unitMatched = true; + if (identity.hasUnit && identity.unitValue) { + const unitRegex = new RegExp( + `(?:^|\\s)(?:${UNIT_LABEL_PATTERN})\\s*${escapeRegex(identity.unitValue)}(?:\\s|$)`, + "i" + ); + const hashRegex = new RegExp(`#\\s*${escapeRegex(identity.unitValue)}(?:\\b|$)`, "i"); + const looseTokenRegex = new RegExp(`(?:^|\\s)${escapeRegex(identity.unitValue)}(?:\\s|$)`, "i"); + unitMatched = + unitRegex.test(normalized) || + hashRegex.test(candidateText) || + looseTokenRegex.test(normalizedPadded); + } + + const minimumCoreMatches = identity.hasUnit ? 3 : 2; + const matched = + matchedTokens.length >= Math.min(minimumCoreMatches, uniqueTokens.length) && unitMatched; + + return { + matched, + score: matchedTokens.length + (unitMatched && identity.hasUnit ? 2 : 0), + matchedTokens, + unitMatched + }; +} diff --git a/skills/web-automation/scripts/real-estate-address.test.mjs b/skills/web-automation/scripts/real-estate-address.test.mjs new file mode 100644 index 0000000..923f00c --- /dev/null +++ b/skills/web-automation/scripts/real-estate-address.test.mjs @@ -0,0 +1,59 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + buildZillowAddressSlug, + parseAddressIdentity, + scoreAddressCandidate, +} from "./real-estate-address.js"; + +test("parseAddressIdentity detects unit numbers when present", () => { + const identity = parseAddressIdentity("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418"); + + assert.equal(identity.streetWithoutUnit, "4141 Whiteley Dr"); + assert.equal(identity.unitValue, "204"); + assert.equal(identity.hasUnit, true); + assert.deepEqual(identity.streetTokens, ["4141", "whiteley"]); +}); + +test("parseAddressIdentity supports plain single-family style addresses with no unit", () => { + const identity = parseAddressIdentity("1201 E Iberian Ct, Granbury, TX 76048"); + + assert.equal(identity.unitValue, null); + assert.equal(identity.hasUnit, false); + assert.deepEqual(identity.streetTokens, ["1201", "e", "iberian"]); +}); + +test("buildZillowAddressSlug keeps unit identifiers in the slug", () => { + assert.equal( + buildZillowAddressSlug("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418"), + "4141-Whiteley-Dr-Apt-204-Corpus-Christi-TX-78418" + ); +}); + +test("scoreAddressCandidate rejects a wrong unit on the same street", () => { + const identity = parseAddressIdentity("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418"); + const good = scoreAddressCandidate( + identity, + "https://www.zillow.com/homedetails/4141-Whiteley-Dr-Apt-204-Corpus-Christi-TX-78418/123_zpid/" + ); + const bad = scoreAddressCandidate( + identity, + "https://www.zillow.com/homedetails/4141-Whiteley-Dr-Apt-305-Corpus-Christi-TX-78418/456_zpid/" + ); + + assert.equal(good.matched, true); + assert.equal(good.unitMatched, true); + assert.equal(bad.matched, false); + assert.equal(bad.unitMatched, false); +}); + +test("scoreAddressCandidate still matches plain addresses with no unit", () => { + const identity = parseAddressIdentity("1201 E Iberian Ct, Granbury, TX 76048"); + const result = scoreAddressCandidate( + identity, + "1201 E Iberian Ct Granbury, TX 76048 For Sale, Residential" + ); + + assert.equal(result.matched, true); +}); diff --git a/skills/web-automation/scripts/zillow-discover.js b/skills/web-automation/scripts/zillow-discover.js index b9097be..0472e65 100644 --- a/skills/web-automation/scripts/zillow-discover.js +++ b/skills/web-automation/scripts/zillow-discover.js @@ -7,24 +7,11 @@ import { gotoListing, sleep, } from "./real-estate-photo-common.js"; - -function parseAddress(rawAddress) { - const address = String(rawAddress || "").trim(); - if (!address) { - fail("Missing address."); - } - return address; -} - -function buildSearchUrl(address) { - const slug = address - .replace(/,/g, "") - .replace(/#/g, "") - .trim() - .split(/\s+/) - .join("-"); - return `https://www.zillow.com/homes/${encodeURIComponent(slug)}_rb/`; -} +import { + buildZillowAddressSlug, + parseAddressIdentity, + scoreAddressCandidate, +} from "./real-estate-address.js"; function normalizeListingUrl(url) { try { @@ -53,20 +40,31 @@ async function collectListingUrl(page) { if (!href) continue; const absolute = toAbsolute(href); if (!absolute) continue; - candidates.push(absolute); + const text = (anchor.textContent || "").replace(/\s+/g, " ").trim(); + const aria = anchor.getAttribute("aria-label") || ""; + const title = anchor.getAttribute("title") || ""; + const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim(); + candidates.push({ + url: absolute, + text, + aria, + title, + parentText, + }); } const unique = []; for (const candidate of candidates) { - if (!unique.includes(candidate)) unique.push(candidate); + if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate); } - return unique[0] || null; + return unique; }); } async function main() { - const address = parseAddress(process.argv[2]); - const searchUrl = buildSearchUrl(address); + const address = String(process.argv[2] || "").trim(); + const identity = parseAddressIdentity(address); + const searchUrl = `https://www.zillow.com/homes/${encodeURIComponent(buildZillowAddressSlug(address))}_rb/`; const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" }); try { @@ -77,15 +75,33 @@ async function main() { let listingUrl = null; if (page.url().includes("/homedetails/")) { - listingUrl = normalizeListingUrl(page.url()); - attempts.push("Zillow search URL resolved directly to a property page."); + const directScore = scoreAddressCandidate( + identity, + `${page.url()} ${(await page.title()) || ""}` + ); + if (directScore.matched) { + listingUrl = normalizeListingUrl(page.url()); + attempts.push("Zillow search URL resolved directly to a matching property page."); + } else { + attempts.push("Zillow redirected to a property page, but it did not match the requested address closely enough."); + } } else { const discovered = await collectListingUrl(page); - if (discovered) { - listingUrl = normalizeListingUrl(discovered); - attempts.push("Zillow search results exposed a homedetails link."); + const scored = discovered + .map((candidate) => ({ + ...candidate, + match: scoreAddressCandidate( + identity, + `${candidate.url} ${candidate.text} ${candidate.aria} ${candidate.title} ${candidate.parentText}` + ) + })) + .sort((a, b) => b.match.score - a.match.score); + + if (scored[0]?.match.matched) { + listingUrl = normalizeListingUrl(scored[0].url); + attempts.push(`Zillow search results exposed a matching homedetails link with score ${scored[0].match.score}.`); } else { - attempts.push("Zillow discovery did not expose a homedetails link for this address."); + attempts.push("Zillow discovery did not expose a confident homedetails match for this address."); } }