Use Zillow parcel hints for CAD lookup
This commit is contained in:
@@ -176,6 +176,7 @@ Important rules:
|
|||||||
|
|
||||||
- Zillow/Redfin/HAR geo IDs are hints only
|
- Zillow/Redfin/HAR geo IDs are hints only
|
||||||
- parcel/APN/account IDs are stronger search keys than listing geo IDs
|
- parcel/APN/account IDs are stronger search keys than listing geo IDs
|
||||||
|
- if Zillow exposes a parcel/APN/account number on the listing, capture it and use that identifier in CAD lookup before falling back to address-only matching
|
||||||
- official jurisdiction pages should be linked in the final report
|
- official jurisdiction pages should be linked in the final report
|
||||||
- if a direct property detail page is accessible, its data should be labeled as official public-record evidence
|
- if a direct property detail page is accessible, its data should be labeled as official public-record evidence
|
||||||
|
|
||||||
@@ -195,9 +196,14 @@ That output should be used by the skill to:
|
|||||||
|
|
||||||
- identify the correct CAD
|
- identify the correct CAD
|
||||||
- attempt address / parcel / account lookup on the discovered CAD site for that county
|
- attempt address / parcel / account lookup on the discovered CAD site for that county
|
||||||
|
- prefer Zillow-exposed parcel/APN/account identifiers over address-only search when the listing provides them
|
||||||
- capture official owner / legal / assessed-value evidence when a public detail page is available
|
- capture official owner / legal / assessed-value evidence when a public detail page is available
|
||||||
- treat county-specific CAD detail retrieval as an adapter layer on top of generic county/jurisdiction resolution
|
- treat county-specific CAD detail retrieval as an adapter layer on top of generic county/jurisdiction resolution
|
||||||
|
|
||||||
|
Nueces-specific note:
|
||||||
|
|
||||||
|
- when using Nueces CAD `By ID` / `Geographic ID`, insert a dash after the first 4 digits and again after the first 8 digits, for example `123456789012` -> `1234-5678-9012`
|
||||||
|
|
||||||
Recommended fields to capture from official records when accessible:
|
Recommended fields to capture from official records when accessible:
|
||||||
|
|
||||||
- account number
|
- account number
|
||||||
|
|||||||
@@ -195,6 +195,19 @@ Expected success shape:
|
|||||||
- `expectedPhotoCount` matches `photoCount`
|
- `expectedPhotoCount` matches `photoCount`
|
||||||
- `imageUrls` contains the listing photo set
|
- `imageUrls` contains the listing photo set
|
||||||
|
|
||||||
|
### Zillow identifiers
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/.openclaw/workspace/skills/web-automation/scripts
|
||||||
|
node zillow-identifiers.js "https://www.zillow.com/homedetails/6702-Everhart-Rd-APT-T106-Corpus-Christi-TX-78413/2067445642_zpid/"
|
||||||
|
```
|
||||||
|
|
||||||
|
What it does:
|
||||||
|
- opens the Zillow listing shell without forcing the photo workflow
|
||||||
|
- inspects embedded `__NEXT_DATA__` plus visible listing text
|
||||||
|
- extracts parcel/APN-style identifiers when Zillow exposes them
|
||||||
|
- returns those identifiers so `property-assessor` can use them as stronger CAD lookup keys than listing geo IDs
|
||||||
|
|
||||||
### HAR
|
### HAR
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -197,6 +197,7 @@ This command currently:
|
|||||||
Important rules:
|
Important rules:
|
||||||
- listing-site geo IDs are hints only; do **not** treat them as assessor record keys
|
- listing-site geo IDs are hints only; do **not** treat them as assessor record keys
|
||||||
- parcel/APN/account identifiers from Zillow/HAR/Redfin are much stronger keys than listing geo IDs
|
- parcel/APN/account identifiers from Zillow/HAR/Redfin are much stronger keys than listing geo IDs
|
||||||
|
- if Zillow exposes a parcel/APN/account number on the listing, capture it and feed that identifier into CAD lookup before relying on address-only matching
|
||||||
- if a direct public-record property page is available, use its data in the assessment and link it explicitly
|
- if a direct public-record property page is available, use its data in the assessment and link it explicitly
|
||||||
- when the helper exposes official CAD owner, legal-description, property-ID/account, value, or exemption data, treat those as primary-source facts in the model's assessment
|
- when the helper exposes official CAD owner, legal-description, property-ID/account, value, or exemption data, treat those as primary-source facts in the model's assessment
|
||||||
- if the jurisdiction can be identified but the property detail page is not directly retrievable, still link the official jurisdiction page and say what could not be confirmed
|
- if the jurisdiction can be identified but the property detail page is not directly retrievable, still link the official jurisdiction page and say what could not be confirmed
|
||||||
@@ -210,6 +211,7 @@ Process:
|
|||||||
1. run `locate-public-records` from the subject address
|
1. run `locate-public-records` from the subject address
|
||||||
2. use the returned Texas Comptroller county directory page as the official jurisdiction reference
|
2. use the returned Texas Comptroller county directory page as the official jurisdiction reference
|
||||||
3. use the returned CAD website for address / account / parcel lookup
|
3. use the returned CAD website for address / account / parcel lookup
|
||||||
|
- when Zillow exposes the parcel/APN/account number, prefer that over address-only search
|
||||||
4. when accessible, capture:
|
4. when accessible, capture:
|
||||||
- account number
|
- account number
|
||||||
- owner name
|
- owner name
|
||||||
@@ -221,6 +223,9 @@ Process:
|
|||||||
|
|
||||||
In the final assessment, explicitly label official public-record facts as such.
|
In the final assessment, explicitly label official public-record facts as such.
|
||||||
|
|
||||||
|
Nueces-specific note:
|
||||||
|
- when searching Nueces CAD by parcel / Geographic ID, format the identifier with a dash after the first 4 digits and after the first 8 digits, for example `123456789012` -> `1234-5678-9012`
|
||||||
|
|
||||||
## Minimum data to capture
|
## Minimum data to capture
|
||||||
|
|
||||||
For the target property, capture when available:
|
For the target property, capture when available:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
|
||||||
|
import { extractZillowIdentifierHints } from "../../web-automation/scripts/zillow-identifiers.js";
|
||||||
import { discoverListingSources, type ListingDiscoveryResult } from "./listing-discovery.js";
|
import { discoverListingSources, type ListingDiscoveryResult } from "./listing-discovery.js";
|
||||||
import { extractPhotoData, type PhotoExtractionResult, type PhotoSource } from "./photo-review.js";
|
import { extractPhotoData, type PhotoExtractionResult, type PhotoSource } from "./photo-review.js";
|
||||||
import { resolvePublicRecords, type PublicRecordsResolution } from "./public-records.js";
|
import { resolvePublicRecords, type PublicRecordsResolution } from "./public-records.js";
|
||||||
@@ -36,6 +37,7 @@ interface AssessPropertyDeps {
|
|||||||
renderReportPdfFn?: typeof renderReportPdf;
|
renderReportPdfFn?: typeof renderReportPdf;
|
||||||
discoverListingSourcesFn?: typeof discoverListingSources;
|
discoverListingSourcesFn?: typeof discoverListingSources;
|
||||||
extractPhotoDataFn?: typeof extractPhotoData;
|
extractPhotoDataFn?: typeof extractPhotoData;
|
||||||
|
extractZillowIdentifierHintsFn?: typeof extractZillowIdentifierHints;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface PurposeGuidance {
|
interface PurposeGuidance {
|
||||||
@@ -195,14 +197,17 @@ function inferSourceFromUrl(rawUrl: string): PhotoSource | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function resolvePhotoReview(
|
interface ResolvedListingCandidates {
|
||||||
options: AssessPropertyOptions,
|
attempts: string[];
|
||||||
discoverListingSourcesFn: typeof discoverListingSources,
|
|
||||||
extractPhotoDataFn: typeof extractPhotoData
|
|
||||||
): Promise<{
|
|
||||||
listingUrls: Array<{ label: string; url: string }>;
|
listingUrls: Array<{ label: string; url: string }>;
|
||||||
photoReview: Record<string, unknown>;
|
zillowUrl: string | null;
|
||||||
}> {
|
harUrl: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolveListingCandidates(
|
||||||
|
options: AssessPropertyOptions,
|
||||||
|
discoverListingSourcesFn: typeof discoverListingSources
|
||||||
|
): Promise<ResolvedListingCandidates> {
|
||||||
const attempts: string[] = [];
|
const attempts: string[] = [];
|
||||||
const listingUrls: Array<{ label: string; url: string }> = [];
|
const listingUrls: Array<{ label: string; url: string }> = [];
|
||||||
|
|
||||||
@@ -241,9 +246,27 @@ async function resolvePhotoReview(
|
|||||||
addListingUrl("Discovered HAR Listing", harUrl);
|
addListingUrl("Discovered HAR Listing", harUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
attempts,
|
||||||
|
listingUrls,
|
||||||
|
zillowUrl,
|
||||||
|
harUrl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolvePhotoReview(
|
||||||
|
listingCandidates: ResolvedListingCandidates,
|
||||||
|
extractPhotoDataFn: typeof extractPhotoData,
|
||||||
|
additionalAttempts: string[] = []
|
||||||
|
): Promise<{
|
||||||
|
listingUrls: Array<{ label: string; url: string }>;
|
||||||
|
photoReview: Record<string, unknown>;
|
||||||
|
}> {
|
||||||
|
const attempts: string[] = [...listingCandidates.attempts, ...additionalAttempts];
|
||||||
|
const listingUrls = [...listingCandidates.listingUrls];
|
||||||
const candidates: Array<{ source: PhotoSource; url: string }> = [];
|
const candidates: Array<{ source: PhotoSource; url: string }> = [];
|
||||||
if (zillowUrl) candidates.push({ source: "zillow", url: zillowUrl });
|
if (listingCandidates.zillowUrl) candidates.push({ source: "zillow", url: listingCandidates.zillowUrl });
|
||||||
if (harUrl) candidates.push({ source: "har", url: harUrl });
|
if (listingCandidates.harUrl) candidates.push({ source: "har", url: listingCandidates.harUrl });
|
||||||
|
|
||||||
let extracted: PhotoExtractionResult | null = null;
|
let extracted: PhotoExtractionResult | null = null;
|
||||||
for (const candidate of candidates) {
|
for (const candidate of candidates) {
|
||||||
@@ -411,21 +434,52 @@ export async function assessProperty(
|
|||||||
const renderReportPdfFn = deps.renderReportPdfFn || renderReportPdf;
|
const renderReportPdfFn = deps.renderReportPdfFn || renderReportPdf;
|
||||||
const discoverListingSourcesFn = deps.discoverListingSourcesFn || discoverListingSources;
|
const discoverListingSourcesFn = deps.discoverListingSourcesFn || discoverListingSources;
|
||||||
const extractPhotoDataFn = deps.extractPhotoDataFn || extractPhotoData;
|
const extractPhotoDataFn = deps.extractPhotoDataFn || extractPhotoData;
|
||||||
|
const extractZillowIdentifierHintsFn =
|
||||||
|
deps.extractZillowIdentifierHintsFn || extractZillowIdentifierHints;
|
||||||
|
|
||||||
|
const listingCandidates = await resolveListingCandidates(
|
||||||
|
{ ...options, assessmentPurpose: purpose },
|
||||||
|
discoverListingSourcesFn
|
||||||
|
);
|
||||||
|
|
||||||
|
const identifierAttempts: string[] = [];
|
||||||
|
let effectiveParcelId = options.parcelId;
|
||||||
|
if (!effectiveParcelId && listingCandidates.zillowUrl) {
|
||||||
|
try {
|
||||||
|
const hints = await extractZillowIdentifierHintsFn(listingCandidates.zillowUrl);
|
||||||
|
effectiveParcelId = hints.parcelId || hints.apn || effectiveParcelId;
|
||||||
|
if (Array.isArray(hints.notes) && hints.notes.length) {
|
||||||
|
identifierAttempts.push(...hints.notes);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
identifierAttempts.push(
|
||||||
|
`Zillow parcel/APN extraction failed: ${error instanceof Error ? error.message : String(error)}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const effectiveListingSourceUrl =
|
||||||
|
options.listingSourceUrl || listingCandidates.zillowUrl || listingCandidates.harUrl || undefined;
|
||||||
|
|
||||||
const publicRecords = await resolvePublicRecordsFn(options.address, {
|
const publicRecords = await resolvePublicRecordsFn(options.address, {
|
||||||
parcelId: options.parcelId,
|
parcelId: effectiveParcelId,
|
||||||
listingGeoId: options.listingGeoId,
|
listingGeoId: options.listingGeoId,
|
||||||
listingSourceUrl: options.listingSourceUrl
|
listingSourceUrl: effectiveListingSourceUrl
|
||||||
});
|
});
|
||||||
|
|
||||||
const photoResolution = await resolvePhotoReview(
|
const photoResolution = await resolvePhotoReview(
|
||||||
{ ...options, assessmentPurpose: purpose },
|
listingCandidates,
|
||||||
discoverListingSourcesFn,
|
extractPhotoDataFn,
|
||||||
extractPhotoDataFn
|
identifierAttempts
|
||||||
);
|
);
|
||||||
|
|
||||||
const reportPayload = buildAssessmentReportPayload(
|
const reportPayload = buildAssessmentReportPayload(
|
||||||
{ ...options, assessmentPurpose: purpose },
|
{
|
||||||
|
...options,
|
||||||
|
assessmentPurpose: purpose,
|
||||||
|
parcelId: effectiveParcelId,
|
||||||
|
listingSourceUrl: effectiveListingSourceUrl
|
||||||
|
},
|
||||||
publicRecords,
|
publicRecords,
|
||||||
photoResolution.listingUrls,
|
photoResolution.listingUrls,
|
||||||
photoResolution.photoReview
|
photoResolution.photoReview
|
||||||
|
|||||||
@@ -175,6 +175,16 @@ function buildCadSearchKeywords(address: string, year: number): string {
|
|||||||
return `${collapseWhitespace(address)} Year:${year}`.trim();
|
return `${collapseWhitespace(address)} Year:${year}`.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatNuecesGeographicId(parcelId: string | null | undefined): string | null {
|
||||||
|
const normalized = collapseWhitespace(parcelId).replace(/[^0-9]/g, "");
|
||||||
|
if (!normalized) return null;
|
||||||
|
if (normalized.length <= 4) return normalized;
|
||||||
|
if (normalized.length <= 8) {
|
||||||
|
return `${normalized.slice(0, 4)}-${normalized.slice(4)}`;
|
||||||
|
}
|
||||||
|
return `${normalized.slice(0, 4)}-${normalized.slice(4, 8)}-${normalized.slice(8)}`;
|
||||||
|
}
|
||||||
|
|
||||||
function parseAddressForCadSearch(address: string): {
|
function parseAddressForCadSearch(address: string): {
|
||||||
rawAddress: string;
|
rawAddress: string;
|
||||||
streetNumber: string | null;
|
streetNumber: string | null;
|
||||||
@@ -327,6 +337,7 @@ function pickBestCadResult(
|
|||||||
async function enrichNuecesCadPropertyDetails(
|
async function enrichNuecesCadPropertyDetails(
|
||||||
address: string,
|
address: string,
|
||||||
appraisalDistrictWebsite: string,
|
appraisalDistrictWebsite: string,
|
||||||
|
parcelId: string | null | undefined,
|
||||||
fetchText: FetchLike
|
fetchText: FetchLike
|
||||||
): Promise<PropertyDetailsResolution | null> {
|
): Promise<PropertyDetailsResolution | null> {
|
||||||
const parsedAddress = parseAddressForCadSearch(address);
|
const parsedAddress = parseAddressForCadSearch(address);
|
||||||
@@ -341,10 +352,12 @@ async function enrichNuecesCadPropertyDetails(
|
|||||||
if (!searchToken) return null;
|
if (!searchToken) return null;
|
||||||
|
|
||||||
const searchYear = parseCurrentYearFromSearchHome(searchHomeHtml);
|
const searchYear = parseCurrentYearFromSearchHome(searchHomeHtml);
|
||||||
|
const formattedGeographicId = formatNuecesGeographicId(parcelId);
|
||||||
const searchKeywords =
|
const searchKeywords =
|
||||||
parsedAddress.streetNumber && parsedAddress.streetName
|
formattedGeographicId ||
|
||||||
|
(parsedAddress.streetNumber && parsedAddress.streetName
|
||||||
? `StreetNumber:${parsedAddress.streetNumber} StreetName:"${parsedAddress.streetName}"`
|
? `StreetNumber:${parsedAddress.streetNumber} StreetName:"${parsedAddress.streetName}"`
|
||||||
: buildCadSearchKeywords(address, searchYear);
|
: buildCadSearchKeywords(address, searchYear));
|
||||||
|
|
||||||
const fetchSearchPage = async (page: number): Promise<any> => {
|
const fetchSearchPage = async (page: number): Promise<any> => {
|
||||||
const searchResultsUrl = `${normalizedPropertySearchUrl}/search/SearchResults?keywords=${encodeURIComponent(searchKeywords)}`;
|
const searchResultsUrl = `${normalizedPropertySearchUrl}/search/SearchResults?keywords=${encodeURIComponent(searchKeywords)}`;
|
||||||
@@ -478,6 +491,7 @@ async function enrichNuecesCadPropertyDetails(
|
|||||||
|
|
||||||
async function tryEnrichPropertyDetails(
|
async function tryEnrichPropertyDetails(
|
||||||
address: string,
|
address: string,
|
||||||
|
parcelId: string | null | undefined,
|
||||||
appraisalDistrictWebsite: string | null,
|
appraisalDistrictWebsite: string | null,
|
||||||
fetchText: FetchLike
|
fetchText: FetchLike
|
||||||
): Promise<PropertyDetailsResolution | null> {
|
): Promise<PropertyDetailsResolution | null> {
|
||||||
@@ -487,7 +501,7 @@ async function tryEnrichPropertyDetails(
|
|||||||
const normalizedWebsite = normalizeUrl(website).toLowerCase();
|
const normalizedWebsite = normalizeUrl(website).toLowerCase();
|
||||||
try {
|
try {
|
||||||
if (normalizedWebsite.includes("nuecescad.net") || normalizedWebsite.includes("ncadistrict.com")) {
|
if (normalizedWebsite.includes("nuecescad.net") || normalizedWebsite.includes("ncadistrict.com")) {
|
||||||
return await enrichNuecesCadPropertyDetails(address, website, fetchText);
|
return await enrichNuecesCadPropertyDetails(address, website, parcelId, fetchText);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
@@ -729,6 +743,7 @@ export async function resolvePublicRecords(
|
|||||||
taxAssessorCollector = offices.taxAssessorCollector;
|
taxAssessorCollector = offices.taxAssessorCollector;
|
||||||
propertyDetails = await tryEnrichPropertyDetails(
|
propertyDetails = await tryEnrichPropertyDetails(
|
||||||
address,
|
address,
|
||||||
|
options.parcelId,
|
||||||
typeof offices.appraisalDistrict?.Website === "string"
|
typeof offices.appraisalDistrict?.Website === "string"
|
||||||
? offices.appraisalDistrict.Website
|
? offices.appraisalDistrict.Website
|
||||||
: null,
|
: null,
|
||||||
|
|||||||
@@ -146,6 +146,47 @@ test("assessProperty auto-discovers listing sources, runs Zillow photos first, a
|
|||||||
assert.deepEqual(result.reportPayload?.recipientEmails, []);
|
assert.deepEqual(result.reportPayload?.recipientEmails, []);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("assessProperty uses parcel/APN hints extracted from Zillow before CAD lookup when parcel ID was not provided", async () => {
|
||||||
|
const seenPublicRecordOptions: Array<Record<string, unknown>> = [];
|
||||||
|
|
||||||
|
const result = await assessProperty(
|
||||||
|
{
|
||||||
|
address: "6702 Everhart Rd APT T106, Corpus Christi, TX 78413",
|
||||||
|
assessmentPurpose: "college housing for daughter attending TAMU-CC"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
resolvePublicRecordsFn: async (_address, options) => {
|
||||||
|
seenPublicRecordOptions.push({ ...options });
|
||||||
|
return samplePublicRecords;
|
||||||
|
},
|
||||||
|
discoverListingSourcesFn: async () => ({
|
||||||
|
attempts: ["Zillow discovery located a property page from the address."],
|
||||||
|
zillowUrl:
|
||||||
|
"https://www.zillow.com/homedetails/6702-Everhart-Rd-APT-T106-Corpus-Christi-TX-78413/2067445642_zpid/",
|
||||||
|
harUrl: null
|
||||||
|
}),
|
||||||
|
extractZillowIdentifierHintsFn: async () => ({
|
||||||
|
parcelId: "1234567890",
|
||||||
|
notes: ["Zillow listing exposed parcel/APN number 1234567890."]
|
||||||
|
}),
|
||||||
|
extractPhotoDataFn: async (source, url) => ({
|
||||||
|
source,
|
||||||
|
requestedUrl: url,
|
||||||
|
finalUrl: url,
|
||||||
|
expectedPhotoCount: 29,
|
||||||
|
complete: true,
|
||||||
|
photoCount: 29,
|
||||||
|
imageUrls: ["https://photos.example/1.jpg"],
|
||||||
|
notes: [`${source} extractor succeeded.`]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.equal(result.ok, true);
|
||||||
|
assert.equal(seenPublicRecordOptions.length, 1);
|
||||||
|
assert.equal(seenPublicRecordOptions[0]?.parcelId, "1234567890");
|
||||||
|
});
|
||||||
|
|
||||||
test("assessProperty asks for recipient email only when PDF render is explicitly requested", async () => {
|
test("assessProperty asks for recipient email only when PDF render is explicitly requested", async () => {
|
||||||
const result = await assessProperty(
|
const result = await assessProperty(
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -291,3 +291,90 @@ test("resolvePublicRecords enriches official CAD property facts when a supported
|
|||||||
fetchedUrls.some((url) => url.includes("esearch.nuecescad.net/property/view/14069438"))
|
fetchedUrls.some((url) => url.includes("esearch.nuecescad.net/property/view/14069438"))
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("resolvePublicRecords uses formatted Nueces Geographic ID search when a parcel ID is available", async () => {
|
||||||
|
const fetchedUrls: string[] = [];
|
||||||
|
const enrichedFetchText = async (url: string): Promise<string> => {
|
||||||
|
fetchedUrls.push(url);
|
||||||
|
|
||||||
|
if (url.includes("geocoding.geo.census.gov")) {
|
||||||
|
return JSON.stringify(geocoderPayload);
|
||||||
|
}
|
||||||
|
if (url.endsWith("/county-directory/")) {
|
||||||
|
return countyIndexHtml;
|
||||||
|
}
|
||||||
|
if (url.endsWith("/county-directory/nueces.php")) {
|
||||||
|
return countyPageHtml.replace(
|
||||||
|
"http://www.ncadistrict.com/",
|
||||||
|
"https://nuecescad.net/"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (url === "https://nuecescad.net/") {
|
||||||
|
return `
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<a href="https://esearch.nuecescad.net/">Property Search</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
if (url === "https://esearch.nuecescad.net/") {
|
||||||
|
return `
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta name="search-token" content="token-value|2026-03-28T00:00:00Z" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Property Search
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
if (url.includes("/search/SearchResults?")) {
|
||||||
|
assert.match(url, /keywords=1234-5678-9012/);
|
||||||
|
return JSON.stringify({
|
||||||
|
success: true,
|
||||||
|
resultsList: [
|
||||||
|
{
|
||||||
|
propertyId: "200016970",
|
||||||
|
ownerName: "NGUYEN TRANG THUY",
|
||||||
|
ownerId: "677681",
|
||||||
|
address: "6702 Everhart Rd Apt T106, Corpus Christi, TX 78413",
|
||||||
|
legalDescription: "UNIT T106 EXAMPLE CONDO",
|
||||||
|
appraisedValueDisplay: "$128,876",
|
||||||
|
detailUrl: "/property/view/200016970?year=2026"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (url === "https://esearch.nuecescad.net/property/view/200016970?year=2026") {
|
||||||
|
return `
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div class="property-summary">
|
||||||
|
<div>Owner Name</div><div>NGUYEN TRANG THUY</div>
|
||||||
|
<div>Account Number</div><div>200016970</div>
|
||||||
|
<div>Situs Address</div><div>6702 Everhart Rd Apt T106, Corpus Christi, TX 78413</div>
|
||||||
|
<div>Legal Description</div><div>UNIT T106 EXAMPLE CONDO</div>
|
||||||
|
<div>Land Value</div><div>$20,000</div>
|
||||||
|
<div>Improvement Value</div><div>$108,876</div>
|
||||||
|
<div>Market Value</div><div>$128,876</div>
|
||||||
|
<div>Assessed Value</div><div>$128,876</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
throw new Error(`Unexpected URL: ${url}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
const payload = await resolvePublicRecords("6702 Everhart Rd APT T106, Corpus Christi, TX 78413", {
|
||||||
|
parcelId: "123456789012",
|
||||||
|
fetchText: enrichedFetchText
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(payload.propertyDetails?.propertyId, "200016970");
|
||||||
|
assert.ok(
|
||||||
|
fetchedUrls.some((url) => url.includes("keywords=1234-5678-9012"))
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|||||||
@@ -144,6 +144,7 @@ Use the dedicated extractors before trying a free-form gallery flow.
|
|||||||
- Zillow discovery: `node scripts/zillow-discover.js "<street-address>"`
|
- Zillow discovery: `node scripts/zillow-discover.js "<street-address>"`
|
||||||
- HAR discovery: `node scripts/har-discover.js "<street-address>"`
|
- HAR discovery: `node scripts/har-discover.js "<street-address>"`
|
||||||
- Zillow: `node scripts/zillow-photos.js "<listing-url>"`
|
- Zillow: `node scripts/zillow-photos.js "<listing-url>"`
|
||||||
|
- Zillow identifiers: `node scripts/zillow-identifiers.js "<listing-url>"`
|
||||||
- HAR: `node scripts/har-photos.js "<listing-url>"`
|
- HAR: `node scripts/har-photos.js "<listing-url>"`
|
||||||
|
|
||||||
The discovery scripts are purpose-built for the common address-to-listing workflow:
|
The discovery scripts are purpose-built for the common address-to-listing workflow:
|
||||||
@@ -173,6 +174,12 @@ Output is JSON with:
|
|||||||
- `imageUrls`
|
- `imageUrls`
|
||||||
- `notes`
|
- `notes`
|
||||||
|
|
||||||
|
`zillow-identifiers.js` is a lighter helper for CAD/public-record workflows:
|
||||||
|
- open the Zillow listing shell
|
||||||
|
- inspect embedded `__NEXT_DATA__` plus visible listing text
|
||||||
|
- capture parcel/APN-style identifiers when Zillow exposes them
|
||||||
|
- return those hints so `property-assessor` can use them as stronger CAD lookup keys than listing geo IDs
|
||||||
|
|
||||||
For property-assessor style workflows, prefer these dedicated commands over generic natural-language gallery automation.
|
For property-assessor style workflows, prefer these dedicated commands over generic natural-language gallery automation.
|
||||||
|
|
||||||
### Gallery/lightbox and all-photos workflows
|
### Gallery/lightbox and all-photos workflows
|
||||||
|
|||||||
81
skills/web-automation/scripts/zillow-identifiers.js
Normal file
81
skills/web-automation/scripts/zillow-identifiers.js
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
import { pathToFileURL } from "node:url";
|
||||||
|
|
||||||
|
import {
|
||||||
|
createPageSession,
|
||||||
|
dismissCommonOverlays,
|
||||||
|
fail,
|
||||||
|
gotoListing,
|
||||||
|
parseTarget,
|
||||||
|
runWithOperationTimeout,
|
||||||
|
} from "./real-estate-photo-common.js";
|
||||||
|
import {
|
||||||
|
extractZillowIdentifierHintsFromNextDataScript,
|
||||||
|
extractZillowIdentifierHintsFromText,
|
||||||
|
} from "./zillow-photo-data.js";
|
||||||
|
|
||||||
|
export async function extractZillowIdentifierHints(rawUrl, options = {}) {
|
||||||
|
const requestedUrl = parseTarget(rawUrl);
|
||||||
|
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
||||||
|
const closeContext = async () => {
|
||||||
|
await context.close().catch(() => {});
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await runWithOperationTimeout(
|
||||||
|
"Zillow identifier extraction",
|
||||||
|
async () => {
|
||||||
|
await gotoListing(page, requestedUrl);
|
||||||
|
await dismissCommonOverlays(page);
|
||||||
|
|
||||||
|
const [scriptText, bodyText] = await Promise.all([
|
||||||
|
page.locator("#__NEXT_DATA__").textContent().catch(() => null),
|
||||||
|
page.evaluate(() => document.body?.innerText || "").catch(() => ""),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const structuredHints = extractZillowIdentifierHintsFromNextDataScript(scriptText || "");
|
||||||
|
const textHints = extractZillowIdentifierHintsFromText(bodyText || "");
|
||||||
|
const parcelId = structuredHints.parcelId || textHints.parcelId || null;
|
||||||
|
const apn = structuredHints.apn || textHints.apn || null;
|
||||||
|
const notes = [];
|
||||||
|
|
||||||
|
if (parcelId) {
|
||||||
|
notes.push(`Zillow listing exposed parcel/APN identifier ${parcelId}.`);
|
||||||
|
} else if (apn) {
|
||||||
|
notes.push(`Zillow listing exposed APN/tax identifier ${apn}.`);
|
||||||
|
} else {
|
||||||
|
notes.push("Zillow listing did not expose a parcel/APN identifier that could be extracted reliably.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
source: "zillow",
|
||||||
|
requestedUrl,
|
||||||
|
finalUrl: page.url(),
|
||||||
|
parcelId,
|
||||||
|
apn,
|
||||||
|
notes,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timeoutMs: Number(options.timeoutMs || 0) || undefined,
|
||||||
|
onTimeout: closeContext,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
await closeContext();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
const result = await extractZillowIdentifierHints(process.argv[2]);
|
||||||
|
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||||
|
} catch (error) {
|
||||||
|
fail("Zillow identifier extraction failed.", error instanceof Error ? error.message : String(error));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
||||||
|
main();
|
||||||
|
}
|
||||||
@@ -58,6 +58,92 @@ export function extractZillowStructuredPhotoCandidatesFromNextDataScript(scriptT
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function collapseIdentifier(value) {
|
||||||
|
return String(value || "").replace(/\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLikelyIdentifier(value) {
|
||||||
|
return /^[A-Z0-9-]{4,40}$/i.test(collapseIdentifier(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
function visitForIdentifierHints(node, hints) {
|
||||||
|
if (!node || typeof node !== "object") return;
|
||||||
|
if (Array.isArray(node)) {
|
||||||
|
for (const item of node) {
|
||||||
|
visitForIdentifierHints(item, hints);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(node)) {
|
||||||
|
const normalizedKey = key.toLowerCase();
|
||||||
|
if ((normalizedKey === "parcelid" || normalizedKey === "parcelnumber") && hints.parcelId == null) {
|
||||||
|
if (typeof value === "string" || typeof value === "number") {
|
||||||
|
const candidate = collapseIdentifier(value);
|
||||||
|
if (isLikelyIdentifier(candidate)) {
|
||||||
|
hints.parcelId = candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((normalizedKey === "apn" || normalizedKey === "apnnumber" || normalizedKey === "taxparcelid" || normalizedKey === "taxid") && hints.apn == null) {
|
||||||
|
if (typeof value === "string" || typeof value === "number") {
|
||||||
|
const candidate = collapseIdentifier(value);
|
||||||
|
if (isLikelyIdentifier(candidate)) {
|
||||||
|
hints.apn = candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value && typeof value === "object") {
|
||||||
|
visitForIdentifierHints(value, hints);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractZillowIdentifierHintsFromNextDataScript(scriptText) {
|
||||||
|
if (typeof scriptText !== "string" || !scriptText.trim()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
let nextData;
|
||||||
|
try {
|
||||||
|
nextData = JSON.parse(scriptText);
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
const hints = {};
|
||||||
|
visitForIdentifierHints(nextData, hints);
|
||||||
|
|
||||||
|
const cacheText = nextData?.props?.pageProps?.componentProps?.gdpClientCache;
|
||||||
|
if (typeof cacheText === "string" && cacheText.trim()) {
|
||||||
|
try {
|
||||||
|
visitForIdentifierHints(JSON.parse(cacheText), hints);
|
||||||
|
} catch {
|
||||||
|
// Ignore cache parse failures; base next-data parse already succeeded.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hints;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractZillowIdentifierHintsFromText(text) {
|
||||||
|
const source = typeof text === "string" ? text : "";
|
||||||
|
const hints = {};
|
||||||
|
|
||||||
|
const parcelMatch = source.match(/\b(?:parcel|parcel number|parcel #|tax parcel)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
|
||||||
|
if (parcelMatch) {
|
||||||
|
hints.parcelId = collapseIdentifier(parcelMatch[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const apnMatch = source.match(/\b(?:apn|apn #|apn no\.?|tax id)(?:\s*(?:number|#|no\.?))?\s*[:#]?\s*([A-Z0-9-]{4,40})\b/i);
|
||||||
|
if (apnMatch) {
|
||||||
|
hints.apn = collapseIdentifier(apnMatch[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return hints;
|
||||||
|
}
|
||||||
|
|
||||||
const DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT = 12;
|
const DEFAULT_MINIMUM_TRUSTED_STRUCTURED_PHOTO_COUNT = 12;
|
||||||
|
|
||||||
export function shouldUseStructuredZillowPhotos(candidates, options = {}) {
|
export function shouldUseStructuredZillowPhotos(candidates, options = {}) {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import assert from "node:assert/strict";
|
|||||||
|
|
||||||
import {
|
import {
|
||||||
extractZillowStructuredPhotoCandidatesFromNextDataScript,
|
extractZillowStructuredPhotoCandidatesFromNextDataScript,
|
||||||
|
extractZillowIdentifierHintsFromNextDataScript,
|
||||||
shouldUseStructuredZillowPhotos,
|
shouldUseStructuredZillowPhotos,
|
||||||
} from "./zillow-photo-data.js";
|
} from "./zillow-photo-data.js";
|
||||||
|
|
||||||
@@ -119,3 +120,27 @@ test("shouldUseStructuredZillowPhotos returns false for a tiny structured set wh
|
|||||||
false
|
false
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("extractZillowIdentifierHintsFromNextDataScript finds parcel and APN style identifiers", () => {
|
||||||
|
const scriptText = JSON.stringify({
|
||||||
|
props: {
|
||||||
|
pageProps: {
|
||||||
|
componentProps: {
|
||||||
|
gdpClientCache: JSON.stringify({
|
||||||
|
SomeQuery: {
|
||||||
|
property: {
|
||||||
|
parcelId: "1234567890",
|
||||||
|
apn: "APN-77-55",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.deepEqual(extractZillowIdentifierHintsFromNextDataScript(scriptText), {
|
||||||
|
parcelId: "1234567890",
|
||||||
|
apn: "APN-77-55",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user