Port property assessor helpers to TypeScript

This commit is contained in:
2026-03-27 22:23:58 -05:00
parent 954374ce48
commit e6d987d725
14 changed files with 2155 additions and 202 deletions

View File

@@ -0,0 +1,292 @@
export const CENSUS_GEOCODER_URL =
"https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress";
export const TEXAS_COUNTY_DIRECTORY_URL =
"https://comptroller.texas.gov/taxes/property-tax/county-directory/";
export const TEXAS_PROPERTY_TAX_PORTAL = "https://texas.gov/PropertyTaxes";
export class PublicRecordsLookupError extends Error {}
export interface PublicRecordsResolution {
requestedAddress: string;
matchedAddress: string;
latitude: number | null;
longitude: number | null;
geoid: string | null;
state: {
name: string | null;
code: string | null;
fips: string | null;
};
county: {
name: string | null;
fips: string | null;
geoid: string | null;
};
officialLinks: {
censusGeocoder: string;
texasCountyDirectory: string | null;
texasPropertyTaxPortal: string | null;
};
appraisalDistrict: Record<string, unknown> | null;
taxAssessorCollector: Record<string, unknown> | null;
lookupRecommendations: string[];
sourceIdentifierHints: Record<string, string>;
}
interface FetchLike {
(url: string): Promise<string>;
}
const defaultFetchText: FetchLike = async (url) => {
const response = await fetch(url, {
headers: {
"user-agent": "property-assessor/1.0"
}
});
if (!response.ok) {
throw new PublicRecordsLookupError(`Request failed for ${url}: ${response.status}`);
}
return await response.text();
};
function collapseWhitespace(value: string | null | undefined): string {
return (value || "").replace(/\s+/g, " ").trim();
}
function normalizeCountyName(value: string): string {
return collapseWhitespace(value)
.toLowerCase()
.replace(/ county\b/, "")
.replace(/[^a-z0-9]+/g, "");
}
function stripHtml(value: string): string {
let output = value
.replace(/<br\s*\/?>/gi, "\n")
.replace(/<[^>]+>/g, "");
output = output
.replace(/&nbsp;/gi, " ")
.replace(/&amp;/gi, "&")
.replace(/&quot;/gi, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">");
output = collapseWhitespace(output.replace(/\n/g, ", "));
output = output.replace(/\s*,\s*/g, ", ").replace(/(,\s*){2,}/g, ", ");
return output.replace(/^,\s*|\s*,\s*$/g, "");
}
function extractAnchorHref(fragment: string): string | null {
const match = fragment.match(/<a[^>]+href="([^"]+)"/i);
if (!match) return null;
const href = match[1].trim();
if (href.startsWith("//")) return `https:${href}`;
return href;
}
async function geocodeAddress(address: string, fetchText: FetchLike): Promise<{
match: any;
censusGeocoderUrl: string;
}> {
const query = new URLSearchParams({
address,
benchmark: "Public_AR_Current",
vintage: "Current_Current",
format: "json"
});
const url = `${CENSUS_GEOCODER_URL}?${query.toString()}`;
const payload = JSON.parse(await fetchText(url));
const matches = payload?.result?.addressMatches || [];
if (!matches.length) {
throw new PublicRecordsLookupError(`No Census geocoder match found for address: ${address}`);
}
return { match: matches[0], censusGeocoderUrl: url };
}
async function findTexasCountyHref(countyName: string, fetchText: FetchLike): Promise<string> {
const html = await fetchText(TEXAS_COUNTY_DIRECTORY_URL);
const countyNorm = normalizeCountyName(countyName);
const matches = html.matchAll(/<a href="([^"]+\.php)">\s*\d+\s+([^<]+)\s*<\/a>/gi);
for (const match of matches) {
const href = match[1];
const label = match[2];
if (normalizeCountyName(label) === countyNorm) {
return href.startsWith("http://") || href.startsWith("https://")
? href
: `${TEXAS_COUNTY_DIRECTORY_URL}${href.replace(/^\/+/, "")}`;
}
}
throw new PublicRecordsLookupError(
`Could not find Texas county directory page for county: ${countyName}`
);
}
function parseTexasSection(sectionHtml: string): Record<string, unknown> {
const result: Record<string, unknown> = {};
const lastUpdated = sectionHtml.match(
/<p class="file-info">\s*Last Updated:\s*([^<]+)<\/p>/i
);
if (lastUpdated) {
result.lastUpdated = collapseWhitespace(lastUpdated[1]);
}
const lead = sectionHtml.match(/<h4>\s*([^:<]+):\s*([^<]+)<\/h4>/i);
if (lead) {
result[lead[1].trim()] = collapseWhitespace(lead[2]);
}
const infoBlock = sectionHtml.match(/<h4>\s*[^<]+<\/h4>\s*<p>(.*?)<\/p>/is);
if (infoBlock) {
for (const match of infoBlock[1].matchAll(
/<strong>\s*([^:<]+):\s*<\/strong>\s*(.*?)(?:<br\s*\/?>|$)/gis
)) {
const key = collapseWhitespace(match[1]);
const rawValue = match[2];
const hrefValue = extractAnchorHref(rawValue);
if (key.toLowerCase() === "website" && hrefValue) {
result[key] = hrefValue;
} else if (
key.toLowerCase() === "email" &&
hrefValue &&
hrefValue.startsWith("mailto:")
) {
result[key] = hrefValue.replace(/^mailto:/i, "");
} else {
result[key] = stripHtml(rawValue);
}
}
}
const headings: Array<[string, string]> = [
["Mailing Address", "mailingAddress"],
["Street Address", "streetAddress"],
["Collecting Unit", "collectingUnit"]
];
for (const [heading, key] of headings) {
const match = sectionHtml.match(
new RegExp(`<h4>\\s*${heading}\\s*<\\/h4>\\s*<p>(.*?)<\\/p>`, "is")
);
if (match) {
result[key] = stripHtml(match[1]);
}
}
return result;
}
async function fetchTexasCountyOffices(
countyName: string,
fetchText: FetchLike
): Promise<{
directoryPage: string;
appraisalDistrict: Record<string, unknown>;
taxAssessorCollector: Record<string, unknown> | null;
}> {
const pageUrl = await findTexasCountyHref(countyName, fetchText);
const html = await fetchText(pageUrl);
const appraisalMatch = html.match(
/<h3>\s*Appraisal District\s*<\/h3>(.*?)(?=<h3>\s*Tax Assessor\/Collector\s*<\/h3>)/is
);
const taxMatch = html.match(/<h3>\s*Tax Assessor\/Collector\s*<\/h3>(.*)$/is);
if (!appraisalMatch) {
throw new PublicRecordsLookupError(
`Could not parse Appraisal District section for county: ${countyName}`
);
}
const appraisalDistrict = parseTexasSection(appraisalMatch[1]);
appraisalDistrict.directoryPage = pageUrl;
const taxAssessorCollector = taxMatch ? parseTexasSection(taxMatch[1]) : null;
if (taxAssessorCollector) {
taxAssessorCollector.directoryPage = pageUrl;
}
return {
directoryPage: pageUrl,
appraisalDistrict,
taxAssessorCollector
};
}
export async function resolvePublicRecords(
address: string,
options: {
parcelId?: string;
listingGeoId?: string;
listingSourceUrl?: string;
fetchText?: FetchLike;
} = {}
): Promise<PublicRecordsResolution> {
const fetchText = options.fetchText || defaultFetchText;
const { match, censusGeocoderUrl } = await geocodeAddress(address, fetchText);
const geographies = match.geographies || {};
const state = (geographies.States || [{}])[0];
const county = (geographies.Counties || [{}])[0];
const block = (geographies["2020 Census Blocks"] || [{}])[0];
const coordinates = match.coordinates || {};
let texasCountyDirectory: string | null = null;
let texasPropertyTaxPortal: string | null = null;
let appraisalDistrict: Record<string, unknown> | null = null;
let taxAssessorCollector: Record<string, unknown> | null = null;
const lookupRecommendations = [
"Start from the official public-record jurisdiction instead of a listing-site geo ID.",
"Try official address search first on the appraisal district site.",
"If the listing exposes parcel/APN/account identifiers, use them as stronger search keys than ZPID or listing geo IDs."
];
if (state.STUSAB === "TX" && county.NAME) {
const offices = await fetchTexasCountyOffices(county.NAME, fetchText);
texasCountyDirectory = offices.directoryPage;
texasPropertyTaxPortal = TEXAS_PROPERTY_TAX_PORTAL;
appraisalDistrict = offices.appraisalDistrict;
taxAssessorCollector = offices.taxAssessorCollector;
lookupRecommendations.push(
"Use the Texas Comptroller county directory page as the official jurisdiction link in the final report.",
"Attempt to retrieve assessed value, land value, improvement value, exemptions, and account number from the CAD website when a direct property page is publicly accessible."
);
}
const sourceIdentifierHints: Record<string, string> = {};
if (options.parcelId) sourceIdentifierHints.parcelId = options.parcelId;
if (options.listingGeoId) {
sourceIdentifierHints.listingGeoId = options.listingGeoId;
lookupRecommendations.push(
"Treat listing geo IDs as regional hints only; do not use them as assessor record keys."
);
}
if (options.listingSourceUrl) {
sourceIdentifierHints.listingSourceUrl = options.listingSourceUrl;
}
return {
requestedAddress: address,
matchedAddress: match.matchedAddress || address,
latitude: coordinates.y ?? null,
longitude: coordinates.x ?? null,
geoid: block.GEOID || null,
state: {
name: state.NAME || null,
code: state.STUSAB || null,
fips: state.STATE || null
},
county: {
name: county.NAME || null,
fips: county.COUNTY || null,
geoid: county.GEOID || null
},
officialLinks: {
censusGeocoder: censusGeocoderUrl,
texasCountyDirectory,
texasPropertyTaxPortal
},
appraisalDistrict,
taxAssessorCollector,
lookupRecommendations,
sourceIdentifierHints
};
}