Enrich property assessor with CAD detail data

This commit is contained in:
2026-03-28 02:07:18 -05:00
parent b1722a04fa
commit 7690dc259b
6 changed files with 561 additions and 13 deletions

View File

@@ -9,6 +9,21 @@ export const TEXAS_PROPERTY_TAX_PORTAL = "https://texas.gov/PropertyTaxes";
export class PublicRecordsLookupError extends Error {}
export interface PropertyDetailsResolution {
source: string;
sourceUrl: string;
propertyId: string | null;
ownerName: string | null;
situsAddress: string | null;
legalDescription: string | null;
landValue: number | null;
improvementValue: number | null;
marketValue: number | null;
assessedTotalValue: number | null;
exemptions: string[];
notes: string[];
}
export interface PublicRecordsResolution {
requestedAddress: string;
matchedAddress: string;
@@ -34,17 +49,27 @@ export interface PublicRecordsResolution {
taxAssessorCollector: Record<string, unknown> | null;
lookupRecommendations: string[];
sourceIdentifierHints: Record<string, string>;
propertyDetails: PropertyDetailsResolution | null;
}
interface FetchTextInit {
body?: string;
headers?: Record<string, string>;
method?: string;
}
interface FetchLike {
(url: string): Promise<string>;
(url: string, init?: FetchTextInit): Promise<string>;
}
const defaultFetchText: FetchLike = async (url) => {
const defaultFetchText: FetchLike = async (url, init = {}) => {
const response = await fetch(url, {
body: init.body,
headers: {
"user-agent": "property-assessor/1.0"
}
"user-agent": "property-assessor/1.0",
...(init.headers || {})
},
method: init.method || "GET"
});
if (!response.ok) {
throw new PublicRecordsLookupError(`Request failed for ${url}: ${response.status}`);
@@ -72,6 +97,8 @@ function stripHtml(value: string): string {
.replace(/&amp;/gi, "&")
.replace(/&quot;/gi, '"')
.replace(/&#39;/g, "'")
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/g, (_, number) => String.fromCodePoint(Number.parseInt(number, 10)))
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">");
output = collapseWhitespace(output.replace(/\n/g, ", "));
@@ -109,6 +136,366 @@ function extractAnchorHref(fragment: string): string | null {
return href;
}
function normalizeUrl(rawUrl: string): string {
const value = collapseWhitespace(rawUrl);
if (!value) return value;
if (/^https?:\/\//i.test(value)) return value;
return `https://${value.replace(/^\/+/, "")}`;
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/g, (_, number) => String.fromCodePoint(Number.parseInt(number, 10)))
.replace(/&amp;/gi, "&")
.replace(/&quot;/gi, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">");
}
function parseCurrencyValue(value: string | null | undefined): number | null {
const normalized = collapseWhitespace(value);
if (!normalized) return null;
const numeric = normalized.replace(/[^0-9.-]/g, "");
if (!numeric) return null;
const parsed = Number(numeric);
return Number.isFinite(parsed) ? parsed : null;
}
function parseCurrentYearFromSearchHome(searchHomeHtml: string): number {
const configuredYear = searchHomeHtml.match(/"DefaultYear"\s*:\s*(\d{4})/i);
if (configuredYear) {
return Number(configuredYear[1]);
}
return new Date().getFullYear();
}
function buildCadSearchKeywords(address: string, year: number): string {
return `${collapseWhitespace(address)} Year:${year}`.trim();
}
function parseAddressForCadSearch(address: string): {
rawAddress: string;
streetNumber: string | null;
streetName: string | null;
unit: string | null;
} {
const rawAddress = collapseWhitespace(address);
const streetPart = collapseWhitespace(rawAddress.split(",")[0] || rawAddress);
const unitMatch = streetPart.match(/\b(?:apt|apartment|unit|suite|ste|#)\s*([a-z0-9-]+)/i);
const unit = unitMatch ? unitMatch[1].toUpperCase() : null;
const withoutUnit = collapseWhitespace(
streetPart
.replace(/\b(?:apt|apartment|unit|suite|ste)\s*[a-z0-9-]+/gi, "")
.replace(/#\s*[a-z0-9-]+/gi, "")
);
const numberMatch = withoutUnit.match(/^(\d+[a-z]?)/i);
const streetNumber = numberMatch ? numberMatch[1] : null;
const suffixes = new Set([
"rd",
"road",
"dr",
"drive",
"st",
"street",
"ave",
"avenue",
"blvd",
"boulevard",
"ct",
"court",
"cir",
"circle",
"ln",
"lane",
"trl",
"trail",
"way",
"pkwy",
"parkway",
"pl",
"place",
"ter",
"terrace",
"loop",
"hwy",
"highway"
]);
const streetTokens = withoutUnit
.replace(/^(\d+[a-z]?)\s*/i, "")
.split(/\s+/)
.filter(Boolean);
while (streetTokens.length && suffixes.has(streetTokens[streetTokens.length - 1].toLowerCase())) {
streetTokens.pop();
}
return {
rawAddress,
streetNumber,
streetName: streetTokens.length ? streetTokens.join(" ") : null,
unit
};
}
function extractSearchToken(searchHomeHtml: string): string | null {
const match = searchHomeHtml.match(/meta name="search-token" content="([^"]+)"/i);
return match ? decodeHtmlEntities(match[1]) : null;
}
function extractPropertySearchUrl(homepageHtml: string): string | null {
const preferred = homepageHtml.match(/href="(https:\/\/[^"]*esearch[^"]*)"/i);
if (preferred) {
return preferred[1];
}
const generic = homepageHtml.match(/href="([^"]+)"[^>]*>\s*(?:SEARCH NOW|Property Search)\s*</i);
return generic ? generic[1] : null;
}
function extractDetailField(detailHtml: string, label: string): string | null {
const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const patterns = [
new RegExp(`<div[^>]*>\\s*${escaped}\\s*<\\/div>\\s*<div[^>]*>(.*?)<\\/div>`, "is"),
new RegExp(`<strong>\\s*${escaped}\\s*:?\\s*<\\/strong>\\s*(.*?)(?:<br\\s*\\/?>|<\\/p>|<\\/div>)`, "is"),
new RegExp(`${escaped}\\s*:?\\s*<\\/[^>]+>\\s*<[^>]+>(.*?)<\\/[^>]+>`, "is")
];
for (const pattern of patterns) {
const match = detailHtml.match(pattern);
if (match) {
return stripHtml(match[1]);
}
}
return null;
}
function extractExemptions(detailHtml: string): string[] {
const raw = extractDetailField(detailHtml, "Exemptions");
if (!raw) return [];
return raw
.split(/[,;|]/)
.map((item) => collapseWhitespace(item))
.filter(Boolean);
}
function scoreAddressMatch(needle: string, haystack: string): number {
const normalizedNeedle = collapseWhitespace(needle).toLowerCase();
const normalizedHaystack = collapseWhitespace(haystack).toLowerCase();
if (!normalizedNeedle || !normalizedHaystack) return 0;
let score = 0;
const tokens = normalizedNeedle.split(/[\s,]+/).filter(Boolean);
for (const token of tokens) {
if (normalizedHaystack.includes(token)) {
score += token.length > 3 ? 2 : 1;
}
}
const unitMatch = normalizedNeedle.match(/\b(?:apt|apartment|unit|suite|ste|#)\s*([a-z0-9-]+)/i);
if (unitMatch) {
score += normalizedHaystack.includes(unitMatch[1].toLowerCase()) ? 4 : -4;
}
return score;
}
function pickBestCadResult(
address: string,
results: Array<Record<string, unknown>>
): Record<string, unknown> | null {
const scored = results
.map((result) => {
const candidateText = [
result.address,
result.legalDescription,
result.ownerName,
result.condo,
result.geoId,
result.propertyId
]
.map((item) => collapseWhitespace(String(item || "")))
.join(" ");
return { result, score: scoreAddressMatch(address, candidateText) };
})
.sort((a, b) => b.score - a.score);
return scored[0]?.score > 0 ? scored[0].result : null;
}
async function enrichNuecesCadPropertyDetails(
address: string,
appraisalDistrictWebsite: string,
fetchText: FetchLike
): Promise<PropertyDetailsResolution | null> {
const parsedAddress = parseAddressForCadSearch(address);
const homepageUrl = normalizeUrl(appraisalDistrictWebsite);
const homepageHtml = await fetchText(homepageUrl);
const propertySearchUrl = extractPropertySearchUrl(homepageHtml);
if (!propertySearchUrl) return null;
const normalizedPropertySearchUrl = normalizeUrl(propertySearchUrl).replace(/\/+$/, "");
const searchHomeHtml = await fetchText(`${normalizedPropertySearchUrl}/`);
const searchToken = extractSearchToken(searchHomeHtml);
if (!searchToken) return null;
const searchYear = parseCurrentYearFromSearchHome(searchHomeHtml);
const searchKeywords =
parsedAddress.streetNumber && parsedAddress.streetName
? `StreetNumber:${parsedAddress.streetNumber} StreetName:"${parsedAddress.streetName}"`
: buildCadSearchKeywords(address, searchYear);
const fetchSearchPage = async (page: number): Promise<any> => {
const searchResultsUrl = `${normalizedPropertySearchUrl}/search/SearchResults?keywords=${encodeURIComponent(searchKeywords)}`;
if (fetchText === defaultFetchText) {
const sessionTokenResponse = await fetch(
`${normalizedPropertySearchUrl}/search/requestSessionToken`,
{
headers: {
"user-agent": "property-assessor/1.0"
}
}
);
const sessionTokenPayload = await sessionTokenResponse.json();
const searchSessionToken = sessionTokenPayload?.searchSessionToken;
const resultUrl = `${normalizedPropertySearchUrl}/search/result?keywords=${encodeURIComponent(searchKeywords)}&searchSessionToken=${encodeURIComponent(String(searchSessionToken || ""))}`;
const resultResponse = await fetch(resultUrl, {
headers: {
"user-agent": "property-assessor/1.0"
}
});
const cookieHeader = (resultResponse.headers.getSetCookie?.() || [])
.map((item) => item.split(";", 1)[0])
.join("; ");
const resultPageHtml = await resultResponse.text();
const liveSearchToken = extractSearchToken(resultPageHtml) || searchToken;
const jsonResponse = await fetch(searchResultsUrl, {
body: JSON.stringify({
page,
pageSize: 25,
isArb: false,
recaptchaToken: "",
searchToken: liveSearchToken
}),
headers: {
"content-type": "application/json",
cookie: cookieHeader,
referer: resultUrl,
"user-agent": "property-assessor/1.0"
},
method: "POST"
});
return await jsonResponse.json();
}
const searchResultsRaw = await fetchText(searchResultsUrl, {
body: JSON.stringify({
page,
pageSize: 25,
isArb: false,
recaptchaToken: "",
searchToken
}),
headers: {
"content-type": "application/json"
},
method: "POST"
});
return JSON.parse(searchResultsRaw);
};
const firstPage = await fetchSearchPage(1);
const totalPages = Math.min(Number(firstPage?.totalPages || 1), 8);
const collectedResults: Array<Record<string, unknown>> = Array.isArray(firstPage?.resultsList)
? [...firstPage.resultsList]
: [];
let bestResult = pickBestCadResult(address, collectedResults);
if (parsedAddress.unit && !String(bestResult?.legalDescription || "").toUpperCase().includes(`UNIT ${parsedAddress.unit}`)) {
bestResult = null;
}
for (let page = 2; !bestResult && page <= totalPages; page += 1) {
const nextPage = await fetchSearchPage(page);
if (Array.isArray(nextPage?.resultsList)) {
collectedResults.push(...nextPage.resultsList);
bestResult = pickBestCadResult(address, collectedResults);
if (parsedAddress.unit && !String(bestResult?.legalDescription || "").toUpperCase().includes(`UNIT ${parsedAddress.unit}`)) {
bestResult = null;
}
}
}
if (!bestResult) return null;
const detailPath = collapseWhitespace(String(bestResult.detailUrl || ""));
const canUseDetailPath = Boolean(detailPath) && !/[?&]Id=/i.test(detailPath);
const detailUrl = canUseDetailPath
? new URL(detailPath, `${normalizedPropertySearchUrl}/`).toString()
: new URL(
`/property/view/${encodeURIComponent(String(bestResult.propertyId || ""))}?year=${encodeURIComponent(String(bestResult.year || searchYear))}&ownerId=${encodeURIComponent(String(bestResult.ownerId || ""))}`,
`${normalizedPropertySearchUrl}/`
).toString();
const detailHtml = await fetchText(detailUrl);
return {
source: "nueces-esearch",
sourceUrl: detailUrl,
propertyId: collapseWhitespace(String(bestResult.propertyId || "")) || null,
ownerName:
extractDetailField(detailHtml, "Owner Name") ||
collapseWhitespace(String(bestResult.ownerName || "")) ||
null,
situsAddress:
extractDetailField(detailHtml, "Situs Address") ||
extractDetailField(detailHtml, "Address") ||
collapseWhitespace(String(bestResult.address || "")) ||
null,
legalDescription:
extractDetailField(detailHtml, "Legal Description") ||
collapseWhitespace(String(bestResult.legalDescription || "")) ||
null,
landValue: parseCurrencyValue(extractDetailField(detailHtml, "Land Value")),
improvementValue: parseCurrencyValue(extractDetailField(detailHtml, "Improvement Value")),
marketValue:
parseCurrencyValue(extractDetailField(detailHtml, "Market Value")) ||
(Number.isFinite(Number(bestResult.appraisedValue))
? Number(bestResult.appraisedValue)
: parseCurrencyValue(String(bestResult.appraisedValueDisplay || ""))),
assessedTotalValue:
parseCurrencyValue(extractDetailField(detailHtml, "Assessed Value")) ||
parseCurrencyValue(extractDetailField(detailHtml, "Appraised Value")) ||
(Number.isFinite(Number(bestResult.appraisedValue))
? Number(bestResult.appraisedValue)
: parseCurrencyValue(String(bestResult.appraisedValueDisplay || ""))),
exemptions: extractExemptions(detailHtml),
notes: [
"Official CAD property detail page exposed owner, value, and exemption data."
]
};
}
async function tryEnrichPropertyDetails(
address: string,
appraisalDistrictWebsite: string | null,
fetchText: FetchLike
): Promise<PropertyDetailsResolution | null> {
const website = collapseWhitespace(appraisalDistrictWebsite);
if (!website) return null;
const normalizedWebsite = normalizeUrl(website).toLowerCase();
try {
if (normalizedWebsite.includes("nuecescad.net") || normalizedWebsite.includes("ncadistrict.com")) {
return await enrichNuecesCadPropertyDetails(address, website, fetchText);
}
} catch {
return null;
}
return null;
}
async function geocodeAddress(address: string, fetchText: FetchLike): Promise<{
match: any;
censusGeocoderUrl: string;
@@ -321,6 +708,7 @@ export async function resolvePublicRecords(
let texasPropertyTaxPortal: string | null = null;
let appraisalDistrict: Record<string, unknown> | null = null;
let taxAssessorCollector: Record<string, unknown> | null = null;
let propertyDetails: PropertyDetailsResolution | null = null;
const lookupRecommendations = [
"Start from the official public-record jurisdiction instead of a listing-site geo ID.",
@@ -339,10 +727,23 @@ export async function resolvePublicRecords(
texasPropertyTaxPortal = TEXAS_PROPERTY_TAX_PORTAL;
appraisalDistrict = offices.appraisalDistrict;
taxAssessorCollector = offices.taxAssessorCollector;
propertyDetails = await tryEnrichPropertyDetails(
address,
typeof offices.appraisalDistrict?.Website === "string"
? offices.appraisalDistrict.Website
: null,
fetchText
);
lookupRecommendations.push(
"Use the Texas Comptroller county directory page as the official jurisdiction link in the final report.",
"Attempt to retrieve assessed value, land value, improvement value, exemptions, and account number from the CAD website when a direct property page is publicly accessible."
);
if (propertyDetails) {
lookupRecommendations.push(
...propertyDetails.notes,
"Use the official CAD property-detail values in the final assessment instead of relying only on listing-site value hints."
);
}
}
const sourceIdentifierHints: Record<string, string> = {};
@@ -381,6 +782,7 @@ export async function resolvePublicRecords(
appraisalDistrict,
taxAssessorCollector,
lookupRecommendations,
sourceIdentifierHints
sourceIdentifierHints,
propertyDetails
};
}