Port property assessor helpers to TypeScript
This commit is contained in:
292
skills/property-assessor/src/public-records.ts
Normal file
292
skills/property-assessor/src/public-records.ts
Normal file
@@ -0,0 +1,292 @@
|
||||
export const CENSUS_GEOCODER_URL =
|
||||
"https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress";
|
||||
export const TEXAS_COUNTY_DIRECTORY_URL =
|
||||
"https://comptroller.texas.gov/taxes/property-tax/county-directory/";
|
||||
export const TEXAS_PROPERTY_TAX_PORTAL = "https://texas.gov/PropertyTaxes";
|
||||
|
||||
export class PublicRecordsLookupError extends Error {}
|
||||
|
||||
export interface PublicRecordsResolution {
|
||||
requestedAddress: string;
|
||||
matchedAddress: string;
|
||||
latitude: number | null;
|
||||
longitude: number | null;
|
||||
geoid: string | null;
|
||||
state: {
|
||||
name: string | null;
|
||||
code: string | null;
|
||||
fips: string | null;
|
||||
};
|
||||
county: {
|
||||
name: string | null;
|
||||
fips: string | null;
|
||||
geoid: string | null;
|
||||
};
|
||||
officialLinks: {
|
||||
censusGeocoder: string;
|
||||
texasCountyDirectory: string | null;
|
||||
texasPropertyTaxPortal: string | null;
|
||||
};
|
||||
appraisalDistrict: Record<string, unknown> | null;
|
||||
taxAssessorCollector: Record<string, unknown> | null;
|
||||
lookupRecommendations: string[];
|
||||
sourceIdentifierHints: Record<string, string>;
|
||||
}
|
||||
|
||||
interface FetchLike {
|
||||
(url: string): Promise<string>;
|
||||
}
|
||||
|
||||
const defaultFetchText: FetchLike = async (url) => {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
"user-agent": "property-assessor/1.0"
|
||||
}
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new PublicRecordsLookupError(`Request failed for ${url}: ${response.status}`);
|
||||
}
|
||||
return await response.text();
|
||||
};
|
||||
|
||||
function collapseWhitespace(value: string | null | undefined): string {
|
||||
return (value || "").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function normalizeCountyName(value: string): string {
|
||||
return collapseWhitespace(value)
|
||||
.toLowerCase()
|
||||
.replace(/ county\b/, "")
|
||||
.replace(/[^a-z0-9]+/g, "");
|
||||
}
|
||||
|
||||
function stripHtml(value: string): string {
|
||||
let output = value
|
||||
.replace(/<br\s*\/?>/gi, "\n")
|
||||
.replace(/<[^>]+>/g, "");
|
||||
output = output
|
||||
.replace(/ /gi, " ")
|
||||
.replace(/&/gi, "&")
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</gi, "<")
|
||||
.replace(/>/gi, ">");
|
||||
output = collapseWhitespace(output.replace(/\n/g, ", "));
|
||||
output = output.replace(/\s*,\s*/g, ", ").replace(/(,\s*){2,}/g, ", ");
|
||||
return output.replace(/^,\s*|\s*,\s*$/g, "");
|
||||
}
|
||||
|
||||
function extractAnchorHref(fragment: string): string | null {
|
||||
const match = fragment.match(/<a[^>]+href="([^"]+)"/i);
|
||||
if (!match) return null;
|
||||
const href = match[1].trim();
|
||||
if (href.startsWith("//")) return `https:${href}`;
|
||||
return href;
|
||||
}
|
||||
|
||||
async function geocodeAddress(address: string, fetchText: FetchLike): Promise<{
|
||||
match: any;
|
||||
censusGeocoderUrl: string;
|
||||
}> {
|
||||
const query = new URLSearchParams({
|
||||
address,
|
||||
benchmark: "Public_AR_Current",
|
||||
vintage: "Current_Current",
|
||||
format: "json"
|
||||
});
|
||||
const url = `${CENSUS_GEOCODER_URL}?${query.toString()}`;
|
||||
const payload = JSON.parse(await fetchText(url));
|
||||
const matches = payload?.result?.addressMatches || [];
|
||||
if (!matches.length) {
|
||||
throw new PublicRecordsLookupError(`No Census geocoder match found for address: ${address}`);
|
||||
}
|
||||
return { match: matches[0], censusGeocoderUrl: url };
|
||||
}
|
||||
|
||||
async function findTexasCountyHref(countyName: string, fetchText: FetchLike): Promise<string> {
|
||||
const html = await fetchText(TEXAS_COUNTY_DIRECTORY_URL);
|
||||
const countyNorm = normalizeCountyName(countyName);
|
||||
const matches = html.matchAll(/<a href="([^"]+\.php)">\s*\d+\s+([^<]+)\s*<\/a>/gi);
|
||||
for (const match of matches) {
|
||||
const href = match[1];
|
||||
const label = match[2];
|
||||
if (normalizeCountyName(label) === countyNorm) {
|
||||
return href.startsWith("http://") || href.startsWith("https://")
|
||||
? href
|
||||
: `${TEXAS_COUNTY_DIRECTORY_URL}${href.replace(/^\/+/, "")}`;
|
||||
}
|
||||
}
|
||||
throw new PublicRecordsLookupError(
|
||||
`Could not find Texas county directory page for county: ${countyName}`
|
||||
);
|
||||
}
|
||||
|
||||
function parseTexasSection(sectionHtml: string): Record<string, unknown> {
|
||||
const result: Record<string, unknown> = {};
|
||||
|
||||
const lastUpdated = sectionHtml.match(
|
||||
/<p class="file-info">\s*Last Updated:\s*([^<]+)<\/p>/i
|
||||
);
|
||||
if (lastUpdated) {
|
||||
result.lastUpdated = collapseWhitespace(lastUpdated[1]);
|
||||
}
|
||||
|
||||
const lead = sectionHtml.match(/<h4>\s*([^:<]+):\s*([^<]+)<\/h4>/i);
|
||||
if (lead) {
|
||||
result[lead[1].trim()] = collapseWhitespace(lead[2]);
|
||||
}
|
||||
|
||||
const infoBlock = sectionHtml.match(/<h4>\s*[^<]+<\/h4>\s*<p>(.*?)<\/p>/is);
|
||||
if (infoBlock) {
|
||||
for (const match of infoBlock[1].matchAll(
|
||||
/<strong>\s*([^:<]+):\s*<\/strong>\s*(.*?)(?:<br\s*\/?>|$)/gis
|
||||
)) {
|
||||
const key = collapseWhitespace(match[1]);
|
||||
const rawValue = match[2];
|
||||
const hrefValue = extractAnchorHref(rawValue);
|
||||
if (key.toLowerCase() === "website" && hrefValue) {
|
||||
result[key] = hrefValue;
|
||||
} else if (
|
||||
key.toLowerCase() === "email" &&
|
||||
hrefValue &&
|
||||
hrefValue.startsWith("mailto:")
|
||||
) {
|
||||
result[key] = hrefValue.replace(/^mailto:/i, "");
|
||||
} else {
|
||||
result[key] = stripHtml(rawValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const headings: Array<[string, string]> = [
|
||||
["Mailing Address", "mailingAddress"],
|
||||
["Street Address", "streetAddress"],
|
||||
["Collecting Unit", "collectingUnit"]
|
||||
];
|
||||
|
||||
for (const [heading, key] of headings) {
|
||||
const match = sectionHtml.match(
|
||||
new RegExp(`<h4>\\s*${heading}\\s*<\\/h4>\\s*<p>(.*?)<\\/p>`, "is")
|
||||
);
|
||||
if (match) {
|
||||
result[key] = stripHtml(match[1]);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function fetchTexasCountyOffices(
|
||||
countyName: string,
|
||||
fetchText: FetchLike
|
||||
): Promise<{
|
||||
directoryPage: string;
|
||||
appraisalDistrict: Record<string, unknown>;
|
||||
taxAssessorCollector: Record<string, unknown> | null;
|
||||
}> {
|
||||
const pageUrl = await findTexasCountyHref(countyName, fetchText);
|
||||
const html = await fetchText(pageUrl);
|
||||
const appraisalMatch = html.match(
|
||||
/<h3>\s*Appraisal District\s*<\/h3>(.*?)(?=<h3>\s*Tax Assessor\/Collector\s*<\/h3>)/is
|
||||
);
|
||||
const taxMatch = html.match(/<h3>\s*Tax Assessor\/Collector\s*<\/h3>(.*)$/is);
|
||||
if (!appraisalMatch) {
|
||||
throw new PublicRecordsLookupError(
|
||||
`Could not parse Appraisal District section for county: ${countyName}`
|
||||
);
|
||||
}
|
||||
|
||||
const appraisalDistrict = parseTexasSection(appraisalMatch[1]);
|
||||
appraisalDistrict.directoryPage = pageUrl;
|
||||
|
||||
const taxAssessorCollector = taxMatch ? parseTexasSection(taxMatch[1]) : null;
|
||||
if (taxAssessorCollector) {
|
||||
taxAssessorCollector.directoryPage = pageUrl;
|
||||
}
|
||||
|
||||
return {
|
||||
directoryPage: pageUrl,
|
||||
appraisalDistrict,
|
||||
taxAssessorCollector
|
||||
};
|
||||
}
|
||||
|
||||
export async function resolvePublicRecords(
|
||||
address: string,
|
||||
options: {
|
||||
parcelId?: string;
|
||||
listingGeoId?: string;
|
||||
listingSourceUrl?: string;
|
||||
fetchText?: FetchLike;
|
||||
} = {}
|
||||
): Promise<PublicRecordsResolution> {
|
||||
const fetchText = options.fetchText || defaultFetchText;
|
||||
const { match, censusGeocoderUrl } = await geocodeAddress(address, fetchText);
|
||||
const geographies = match.geographies || {};
|
||||
const state = (geographies.States || [{}])[0];
|
||||
const county = (geographies.Counties || [{}])[0];
|
||||
const block = (geographies["2020 Census Blocks"] || [{}])[0];
|
||||
const coordinates = match.coordinates || {};
|
||||
|
||||
let texasCountyDirectory: string | null = null;
|
||||
let texasPropertyTaxPortal: string | null = null;
|
||||
let appraisalDistrict: Record<string, unknown> | null = null;
|
||||
let taxAssessorCollector: Record<string, unknown> | null = null;
|
||||
|
||||
const lookupRecommendations = [
|
||||
"Start from the official public-record jurisdiction instead of a listing-site geo ID.",
|
||||
"Try official address search first on the appraisal district site.",
|
||||
"If the listing exposes parcel/APN/account identifiers, use them as stronger search keys than ZPID or listing geo IDs."
|
||||
];
|
||||
|
||||
if (state.STUSAB === "TX" && county.NAME) {
|
||||
const offices = await fetchTexasCountyOffices(county.NAME, fetchText);
|
||||
texasCountyDirectory = offices.directoryPage;
|
||||
texasPropertyTaxPortal = TEXAS_PROPERTY_TAX_PORTAL;
|
||||
appraisalDistrict = offices.appraisalDistrict;
|
||||
taxAssessorCollector = offices.taxAssessorCollector;
|
||||
lookupRecommendations.push(
|
||||
"Use the Texas Comptroller county directory page as the official jurisdiction link in the final report.",
|
||||
"Attempt to retrieve assessed value, land value, improvement value, exemptions, and account number from the CAD website when a direct property page is publicly accessible."
|
||||
);
|
||||
}
|
||||
|
||||
const sourceIdentifierHints: Record<string, string> = {};
|
||||
if (options.parcelId) sourceIdentifierHints.parcelId = options.parcelId;
|
||||
if (options.listingGeoId) {
|
||||
sourceIdentifierHints.listingGeoId = options.listingGeoId;
|
||||
lookupRecommendations.push(
|
||||
"Treat listing geo IDs as regional hints only; do not use them as assessor record keys."
|
||||
);
|
||||
}
|
||||
if (options.listingSourceUrl) {
|
||||
sourceIdentifierHints.listingSourceUrl = options.listingSourceUrl;
|
||||
}
|
||||
|
||||
return {
|
||||
requestedAddress: address,
|
||||
matchedAddress: match.matchedAddress || address,
|
||||
latitude: coordinates.y ?? null,
|
||||
longitude: coordinates.x ?? null,
|
||||
geoid: block.GEOID || null,
|
||||
state: {
|
||||
name: state.NAME || null,
|
||||
code: state.STUSAB || null,
|
||||
fips: state.STATE || null
|
||||
},
|
||||
county: {
|
||||
name: county.NAME || null,
|
||||
fips: county.COUNTY || null,
|
||||
geoid: county.GEOID || null
|
||||
},
|
||||
officialLinks: {
|
||||
censusGeocoder: censusGeocoderUrl,
|
||||
texasCountyDirectory,
|
||||
texasPropertyTaxPortal
|
||||
},
|
||||
appraisalDistrict,
|
||||
taxAssessorCollector,
|
||||
lookupRecommendations,
|
||||
sourceIdentifierHints
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user