Make listing discovery unit-aware
This commit is contained in:
@@ -6,6 +6,8 @@ Decision-grade residential property assessment skill for OpenClaw, with official
|
||||
|
||||
`property-assessor` is for evaluating a condo, townhouse, house, or similar residential property from an address or listing URL and ending with a practical recommendation such as `buy`, `pass`, or `only below X`.
|
||||
|
||||
If the subject property has an apartment / unit / suite number, include it. Discovery is now unit-aware for Zillow and HAR when unit data is present, while still supporting plain single-family addresses that have no unit.
|
||||
|
||||
The skill is intended to:
|
||||
|
||||
- normalize the property across listing sources
|
||||
|
||||
@@ -134,6 +134,7 @@ npx tsx flow.ts --instruction 'go to https://search.fiorinis.com then type "pipp
|
||||
## Real-estate listing discovery and photo extraction
|
||||
|
||||
Use the dedicated Zillow and HAR discovery/photo commands before trying a free-form gallery flow.
|
||||
Discovery is unit-aware when the address includes an apartment / unit / suite identifier, and still supports plain no-unit addresses for single-family homes.
|
||||
|
||||
### Zillow discovery
|
||||
|
||||
|
||||
@@ -17,6 +17,9 @@ Accept any of:
|
||||
The assessment purpose is required for a decision-grade result.
|
||||
If the user does not say why they want the property assessed, stop and ask before finalizing the analysis.
|
||||
|
||||
If the property has a unit / apartment / suite number, include it.
|
||||
Do not drop the unit when discovering listing sources. Unit-qualified condo/townhome addresses must be matched as the exact unit, while single-family addresses with no unit should still work normally.
|
||||
|
||||
## Core workflow
|
||||
|
||||
1. Normalize the address and property type.
|
||||
|
||||
@@ -143,7 +143,10 @@ Use the dedicated extractors before trying a free-form gallery flow.
|
||||
|
||||
The discovery scripts are purpose-built for the common address-to-listing workflow:
|
||||
- open the site search or address URL
|
||||
- keep apartment / unit identifiers when the address includes them
|
||||
- resolve or identify a matching listing page when possible
|
||||
- reject a mismatched unit when the requested address includes one
|
||||
- still work normally for single-family / no-unit addresses
|
||||
- return the direct listing URL as JSON
|
||||
|
||||
The photo scripts are purpose-built for the common `See all photos` / `Show all photos` workflow:
|
||||
|
||||
@@ -7,28 +7,12 @@ import {
|
||||
gotoListing,
|
||||
sleep,
|
||||
} from "./real-estate-photo-common.js";
|
||||
|
||||
function parseAddress(rawAddress) {
|
||||
const address = String(rawAddress || "").trim();
|
||||
if (!address) {
|
||||
fail("Missing address.");
|
||||
}
|
||||
return address;
|
||||
}
|
||||
import { parseAddressIdentity, scoreAddressCandidate } from "./real-estate-address.js";
|
||||
|
||||
function buildSearchUrl(address) {
|
||||
return `https://www.har.com/search/?q=${encodeURIComponent(address)}`;
|
||||
}
|
||||
|
||||
function buildAddressTokens(address) {
|
||||
return address
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9\s]/g, " ")
|
||||
.split(/\s+/)
|
||||
.filter(Boolean)
|
||||
.filter((token) => !new Set(["tx", "dr", "st", "rd", "ave", "blvd", "ct", "ln", "cir"]).has(token));
|
||||
}
|
||||
|
||||
function normalizeListingUrl(url) {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
@@ -74,7 +58,8 @@ async function collectListingUrl(page) {
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const address = parseAddress(process.argv[2]);
|
||||
const address = String(process.argv[2] || "").trim();
|
||||
const identity = parseAddressIdentity(address);
|
||||
const searchUrl = buildSearchUrl(address);
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
|
||||
@@ -85,26 +70,32 @@ async function main() {
|
||||
await sleep(1500);
|
||||
|
||||
let listingUrl = null;
|
||||
const addressTokens = buildAddressTokens(address);
|
||||
if (page.url().includes("/homedetail/")) {
|
||||
listingUrl = normalizeListingUrl(page.url());
|
||||
attempts.push("HAR search URL resolved directly to a property page.");
|
||||
const directScore = scoreAddressCandidate(
|
||||
identity,
|
||||
`${page.url()} ${(await page.title()) || ""}`
|
||||
);
|
||||
if (directScore.matched) {
|
||||
listingUrl = normalizeListingUrl(page.url());
|
||||
attempts.push("HAR search URL resolved directly to a matching property page.");
|
||||
} else {
|
||||
attempts.push("HAR redirected to a property page, but it did not match the requested address closely enough.");
|
||||
}
|
||||
} else {
|
||||
const discovered = await collectListingUrl(page);
|
||||
const scored = discovered
|
||||
.map((candidate) => {
|
||||
const haystack = `${candidate.url} ${candidate.text} ${candidate.parentText}`.toLowerCase();
|
||||
const score = addressTokens.reduce(
|
||||
(total, token) => total + (haystack.includes(token) ? 1 : 0),
|
||||
0
|
||||
const match = scoreAddressCandidate(
|
||||
identity,
|
||||
`${candidate.url} ${candidate.text} ${candidate.parentText}`
|
||||
);
|
||||
return { ...candidate, score };
|
||||
return { ...candidate, match };
|
||||
})
|
||||
.sort((a, b) => b.score - a.score);
|
||||
.sort((a, b) => b.match.score - a.match.score);
|
||||
|
||||
if (scored[0] && scored[0].score >= Math.min(3, addressTokens.length)) {
|
||||
if (scored[0]?.match.matched) {
|
||||
listingUrl = normalizeListingUrl(scored[0].url);
|
||||
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].score}.`);
|
||||
attempts.push(`HAR search results exposed a matching homedetail link with score ${scored[0].match.score}.`);
|
||||
} else {
|
||||
attempts.push("HAR discovery did not expose a confident homedetail match for this address.");
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"har-photos": "node har-photos.js",
|
||||
"browse": "tsx browse.ts",
|
||||
"scrape": "tsx scrape.ts",
|
||||
"test:photos": "node --test real-estate-photo-common.test.mjs zillow-photo-data.test.mjs",
|
||||
"test:photos": "node --test real-estate-address.test.mjs real-estate-photo-common.test.mjs zillow-photo-data.test.mjs",
|
||||
"zillow-discover": "node zillow-discover.js",
|
||||
"zillow-photos": "node zillow-photos.js",
|
||||
"fetch-browser": "npx cloakbrowser install"
|
||||
|
||||
125
skills/web-automation/scripts/real-estate-address.js
Normal file
125
skills/web-automation/scripts/real-estate-address.js
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const STREET_STOP_WORDS = new Set([
|
||||
"st",
|
||||
"street",
|
||||
"rd",
|
||||
"road",
|
||||
"dr",
|
||||
"drive",
|
||||
"ave",
|
||||
"avenue",
|
||||
"blvd",
|
||||
"boulevard",
|
||||
"ln",
|
||||
"lane",
|
||||
"ct",
|
||||
"court",
|
||||
"cir",
|
||||
"circle",
|
||||
"way",
|
||||
"trl",
|
||||
"trail",
|
||||
"pkwy",
|
||||
"parkway",
|
||||
"tx"
|
||||
]);
|
||||
|
||||
const UNIT_LABEL_PATTERN = "(?:apt|apartment|unit|suite|ste|#)";
|
||||
|
||||
function tokenize(value) {
|
||||
return String(value || "")
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9#\s-]/g, " ")
|
||||
.split(/[\s-]+/)
|
||||
.map((token) => token.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function collapseWhitespace(value) {
|
||||
return String(value || "").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function escapeRegex(value) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
export function parseAddressIdentity(rawAddress) {
|
||||
const address = collapseWhitespace(rawAddress);
|
||||
if (!address) {
|
||||
throw new Error("Missing address.");
|
||||
}
|
||||
|
||||
const [streetPartRaw, ...restParts] = address.split(",");
|
||||
const streetPart = collapseWhitespace(streetPartRaw);
|
||||
const locality = collapseWhitespace(restParts.join(" "));
|
||||
|
||||
const unitMatch = streetPart.match(
|
||||
new RegExp(`^(.*?)(?:\\s+(?:${UNIT_LABEL_PATTERN})\\s*([a-z0-9-]+))$`, "i")
|
||||
);
|
||||
|
||||
const streetWithoutUnit = collapseWhitespace(unitMatch ? unitMatch[1] : streetPart);
|
||||
const unitValue = unitMatch ? unitMatch[2].toLowerCase() : null;
|
||||
|
||||
const streetTokens = tokenize(streetWithoutUnit).filter(
|
||||
(token) => !STREET_STOP_WORDS.has(token)
|
||||
);
|
||||
const localityTokens = tokenize(locality).filter((token) => !STREET_STOP_WORDS.has(token));
|
||||
|
||||
return {
|
||||
raw: address,
|
||||
streetPart,
|
||||
streetWithoutUnit,
|
||||
locality,
|
||||
streetTokens,
|
||||
localityTokens,
|
||||
streetNumber: streetTokens[0] || null,
|
||||
unitValue,
|
||||
hasUnit: Boolean(unitValue)
|
||||
};
|
||||
}
|
||||
|
||||
export function buildZillowAddressSlug(rawAddress) {
|
||||
const address = collapseWhitespace(rawAddress);
|
||||
return address
|
||||
.replace(/,/g, "")
|
||||
.replace(/#/g, "")
|
||||
.trim()
|
||||
.split(/\s+/)
|
||||
.join("-");
|
||||
}
|
||||
|
||||
export function scoreAddressCandidate(identity, candidateText) {
|
||||
const normalized = collapseWhitespace(candidateText).toLowerCase();
|
||||
const normalizedPadded = ` ${normalized.replace(/[^a-z0-9#]+/g, " ")} `;
|
||||
const allTokens = [...identity.streetTokens, ...identity.localityTokens];
|
||||
const uniqueTokens = Array.from(new Set(allTokens));
|
||||
const matchedTokens = uniqueTokens.filter((token) =>
|
||||
normalizedPadded.includes(` ${token} `)
|
||||
);
|
||||
|
||||
let unitMatched = true;
|
||||
if (identity.hasUnit && identity.unitValue) {
|
||||
const unitRegex = new RegExp(
|
||||
`(?:^|\\s)(?:${UNIT_LABEL_PATTERN})\\s*${escapeRegex(identity.unitValue)}(?:\\s|$)`,
|
||||
"i"
|
||||
);
|
||||
const hashRegex = new RegExp(`#\\s*${escapeRegex(identity.unitValue)}(?:\\b|$)`, "i");
|
||||
const looseTokenRegex = new RegExp(`(?:^|\\s)${escapeRegex(identity.unitValue)}(?:\\s|$)`, "i");
|
||||
unitMatched =
|
||||
unitRegex.test(normalized) ||
|
||||
hashRegex.test(candidateText) ||
|
||||
looseTokenRegex.test(normalizedPadded);
|
||||
}
|
||||
|
||||
const minimumCoreMatches = identity.hasUnit ? 3 : 2;
|
||||
const matched =
|
||||
matchedTokens.length >= Math.min(minimumCoreMatches, uniqueTokens.length) && unitMatched;
|
||||
|
||||
return {
|
||||
matched,
|
||||
score: matchedTokens.length + (unitMatched && identity.hasUnit ? 2 : 0),
|
||||
matchedTokens,
|
||||
unitMatched
|
||||
};
|
||||
}
|
||||
59
skills/web-automation/scripts/real-estate-address.test.mjs
Normal file
59
skills/web-automation/scripts/real-estate-address.test.mjs
Normal file
@@ -0,0 +1,59 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
buildZillowAddressSlug,
|
||||
parseAddressIdentity,
|
||||
scoreAddressCandidate,
|
||||
} from "./real-estate-address.js";
|
||||
|
||||
test("parseAddressIdentity detects unit numbers when present", () => {
|
||||
const identity = parseAddressIdentity("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418");
|
||||
|
||||
assert.equal(identity.streetWithoutUnit, "4141 Whiteley Dr");
|
||||
assert.equal(identity.unitValue, "204");
|
||||
assert.equal(identity.hasUnit, true);
|
||||
assert.deepEqual(identity.streetTokens, ["4141", "whiteley"]);
|
||||
});
|
||||
|
||||
test("parseAddressIdentity supports plain single-family style addresses with no unit", () => {
|
||||
const identity = parseAddressIdentity("1201 E Iberian Ct, Granbury, TX 76048");
|
||||
|
||||
assert.equal(identity.unitValue, null);
|
||||
assert.equal(identity.hasUnit, false);
|
||||
assert.deepEqual(identity.streetTokens, ["1201", "e", "iberian"]);
|
||||
});
|
||||
|
||||
test("buildZillowAddressSlug keeps unit identifiers in the slug", () => {
|
||||
assert.equal(
|
||||
buildZillowAddressSlug("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418"),
|
||||
"4141-Whiteley-Dr-Apt-204-Corpus-Christi-TX-78418"
|
||||
);
|
||||
});
|
||||
|
||||
test("scoreAddressCandidate rejects a wrong unit on the same street", () => {
|
||||
const identity = parseAddressIdentity("4141 Whiteley Dr Apt 204, Corpus Christi, TX 78418");
|
||||
const good = scoreAddressCandidate(
|
||||
identity,
|
||||
"https://www.zillow.com/homedetails/4141-Whiteley-Dr-Apt-204-Corpus-Christi-TX-78418/123_zpid/"
|
||||
);
|
||||
const bad = scoreAddressCandidate(
|
||||
identity,
|
||||
"https://www.zillow.com/homedetails/4141-Whiteley-Dr-Apt-305-Corpus-Christi-TX-78418/456_zpid/"
|
||||
);
|
||||
|
||||
assert.equal(good.matched, true);
|
||||
assert.equal(good.unitMatched, true);
|
||||
assert.equal(bad.matched, false);
|
||||
assert.equal(bad.unitMatched, false);
|
||||
});
|
||||
|
||||
test("scoreAddressCandidate still matches plain addresses with no unit", () => {
|
||||
const identity = parseAddressIdentity("1201 E Iberian Ct, Granbury, TX 76048");
|
||||
const result = scoreAddressCandidate(
|
||||
identity,
|
||||
"1201 E Iberian Ct Granbury, TX 76048 For Sale, Residential"
|
||||
);
|
||||
|
||||
assert.equal(result.matched, true);
|
||||
});
|
||||
@@ -7,24 +7,11 @@ import {
|
||||
gotoListing,
|
||||
sleep,
|
||||
} from "./real-estate-photo-common.js";
|
||||
|
||||
function parseAddress(rawAddress) {
|
||||
const address = String(rawAddress || "").trim();
|
||||
if (!address) {
|
||||
fail("Missing address.");
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
function buildSearchUrl(address) {
|
||||
const slug = address
|
||||
.replace(/,/g, "")
|
||||
.replace(/#/g, "")
|
||||
.trim()
|
||||
.split(/\s+/)
|
||||
.join("-");
|
||||
return `https://www.zillow.com/homes/${encodeURIComponent(slug)}_rb/`;
|
||||
}
|
||||
import {
|
||||
buildZillowAddressSlug,
|
||||
parseAddressIdentity,
|
||||
scoreAddressCandidate,
|
||||
} from "./real-estate-address.js";
|
||||
|
||||
function normalizeListingUrl(url) {
|
||||
try {
|
||||
@@ -53,20 +40,31 @@ async function collectListingUrl(page) {
|
||||
if (!href) continue;
|
||||
const absolute = toAbsolute(href);
|
||||
if (!absolute) continue;
|
||||
candidates.push(absolute);
|
||||
const text = (anchor.textContent || "").replace(/\s+/g, " ").trim();
|
||||
const aria = anchor.getAttribute("aria-label") || "";
|
||||
const title = anchor.getAttribute("title") || "";
|
||||
const parentText = (anchor.parentElement?.textContent || "").replace(/\s+/g, " ").trim();
|
||||
candidates.push({
|
||||
url: absolute,
|
||||
text,
|
||||
aria,
|
||||
title,
|
||||
parentText,
|
||||
});
|
||||
}
|
||||
|
||||
const unique = [];
|
||||
for (const candidate of candidates) {
|
||||
if (!unique.includes(candidate)) unique.push(candidate);
|
||||
if (!unique.some((item) => item.url === candidate.url)) unique.push(candidate);
|
||||
}
|
||||
return unique[0] || null;
|
||||
return unique;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const address = parseAddress(process.argv[2]);
|
||||
const searchUrl = buildSearchUrl(address);
|
||||
const address = String(process.argv[2] || "").trim();
|
||||
const identity = parseAddressIdentity(address);
|
||||
const searchUrl = `https://www.zillow.com/homes/${encodeURIComponent(buildZillowAddressSlug(address))}_rb/`;
|
||||
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||
|
||||
try {
|
||||
@@ -77,15 +75,33 @@ async function main() {
|
||||
|
||||
let listingUrl = null;
|
||||
if (page.url().includes("/homedetails/")) {
|
||||
listingUrl = normalizeListingUrl(page.url());
|
||||
attempts.push("Zillow search URL resolved directly to a property page.");
|
||||
const directScore = scoreAddressCandidate(
|
||||
identity,
|
||||
`${page.url()} ${(await page.title()) || ""}`
|
||||
);
|
||||
if (directScore.matched) {
|
||||
listingUrl = normalizeListingUrl(page.url());
|
||||
attempts.push("Zillow search URL resolved directly to a matching property page.");
|
||||
} else {
|
||||
attempts.push("Zillow redirected to a property page, but it did not match the requested address closely enough.");
|
||||
}
|
||||
} else {
|
||||
const discovered = await collectListingUrl(page);
|
||||
if (discovered) {
|
||||
listingUrl = normalizeListingUrl(discovered);
|
||||
attempts.push("Zillow search results exposed a homedetails link.");
|
||||
const scored = discovered
|
||||
.map((candidate) => ({
|
||||
...candidate,
|
||||
match: scoreAddressCandidate(
|
||||
identity,
|
||||
`${candidate.url} ${candidate.text} ${candidate.aria} ${candidate.title} ${candidate.parentText}`
|
||||
)
|
||||
}))
|
||||
.sort((a, b) => b.match.score - a.match.score);
|
||||
|
||||
if (scored[0]?.match.matched) {
|
||||
listingUrl = normalizeListingUrl(scored[0].url);
|
||||
attempts.push(`Zillow search results exposed a matching homedetails link with score ${scored[0].match.score}.`);
|
||||
} else {
|
||||
attempts.push("Zillow discovery did not expose a homedetails link for this address.");
|
||||
attempts.push("Zillow discovery did not expose a confident homedetails match for this address.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user