804 lines
26 KiB
TypeScript
804 lines
26 KiB
TypeScript
export const CENSUS_GEOCODER_URL =
|
|
"https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress";
|
|
export const CENSUS_COORDINATES_URL =
|
|
"https://geocoding.geo.census.gov/geocoder/geographies/coordinates";
|
|
export const NOMINATIM_SEARCH_URL = "https://nominatim.openstreetmap.org/search";
|
|
export const TEXAS_COUNTY_DIRECTORY_URL =
|
|
"https://comptroller.texas.gov/taxes/property-tax/county-directory/";
|
|
export const TEXAS_PROPERTY_TAX_PORTAL = "https://texas.gov/PropertyTaxes";
|
|
|
|
export class PublicRecordsLookupError extends Error {}
|
|
|
|
export interface PropertyDetailsResolution {
|
|
source: string;
|
|
sourceUrl: string;
|
|
propertyId: string | null;
|
|
ownerName: string | null;
|
|
situsAddress: string | null;
|
|
legalDescription: string | null;
|
|
landValue: number | null;
|
|
improvementValue: number | null;
|
|
marketValue: number | null;
|
|
assessedTotalValue: number | null;
|
|
exemptions: string[];
|
|
notes: string[];
|
|
}
|
|
|
|
export interface PublicRecordsResolution {
|
|
requestedAddress: string;
|
|
matchedAddress: string;
|
|
latitude: number | null;
|
|
longitude: number | null;
|
|
geoid: string | null;
|
|
state: {
|
|
name: string | null;
|
|
code: string | null;
|
|
fips: string | null;
|
|
};
|
|
county: {
|
|
name: string | null;
|
|
fips: string | null;
|
|
geoid: string | null;
|
|
};
|
|
officialLinks: {
|
|
censusGeocoder: string;
|
|
texasCountyDirectory: string | null;
|
|
texasPropertyTaxPortal: string | null;
|
|
};
|
|
appraisalDistrict: Record<string, unknown> | null;
|
|
taxAssessorCollector: Record<string, unknown> | null;
|
|
lookupRecommendations: string[];
|
|
sourceIdentifierHints: Record<string, string>;
|
|
propertyDetails: PropertyDetailsResolution | null;
|
|
}
|
|
|
|
interface FetchTextInit {
|
|
body?: string;
|
|
headers?: Record<string, string>;
|
|
method?: string;
|
|
}
|
|
|
|
interface FetchLike {
|
|
(url: string, init?: FetchTextInit): Promise<string>;
|
|
}
|
|
|
|
const defaultFetchText: FetchLike = async (url, init = {}) => {
|
|
const response = await fetch(url, {
|
|
body: init.body,
|
|
headers: {
|
|
"user-agent": "property-assessor/1.0",
|
|
...(init.headers || {})
|
|
},
|
|
method: init.method || "GET"
|
|
});
|
|
if (!response.ok) {
|
|
throw new PublicRecordsLookupError(`Request failed for ${url}: ${response.status}`);
|
|
}
|
|
return await response.text();
|
|
};
|
|
|
|
function collapseWhitespace(value: string | null | undefined): string {
|
|
return (value || "").replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function normalizeCountyName(value: string): string {
|
|
return collapseWhitespace(value)
|
|
.toLowerCase()
|
|
.replace(/ county\b/, "")
|
|
.replace(/[^a-z0-9]+/g, "");
|
|
}
|
|
|
|
function stripHtml(value: string): string {
|
|
let output = value
|
|
.replace(/<br\s*\/?>/gi, "\n")
|
|
.replace(/<[^>]+>/g, "");
|
|
output = output
|
|
.replace(/ /gi, " ")
|
|
.replace(/&/gi, "&")
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
|
|
.replace(/&#(\d+);/g, (_, number) => String.fromCodePoint(Number.parseInt(number, 10)))
|
|
.replace(/</gi, "<")
|
|
.replace(/>/gi, ">");
|
|
output = collapseWhitespace(output.replace(/\n/g, ", "));
|
|
output = output.replace(/\s*,\s*/g, ", ").replace(/(,\s*){2,}/g, ", ");
|
|
return output.replace(/^,\s*|\s*,\s*$/g, "");
|
|
}
|
|
|
|
function buildFallbackAddressCandidates(address: string): string[] {
|
|
const normalized = collapseWhitespace(address);
|
|
if (!normalized) return [];
|
|
|
|
const candidates = [normalized];
|
|
const [streetPartRaw, ...restParts] = normalized.split(",");
|
|
const streetPart = collapseWhitespace(streetPartRaw);
|
|
const locality = restParts.map((part) => collapseWhitespace(part)).filter(Boolean).join(", ");
|
|
const strippedStreet = collapseWhitespace(
|
|
streetPart.replace(
|
|
/\s+(?:apt|apartment|unit|suite|ste)\s*[a-z0-9-]+$/i,
|
|
""
|
|
).replace(/\s+#\s*[a-z0-9-]+$/i, "")
|
|
);
|
|
|
|
if (strippedStreet && strippedStreet !== streetPart) {
|
|
candidates.push(locality ? `${strippedStreet}, ${locality}` : strippedStreet);
|
|
}
|
|
|
|
return candidates;
|
|
}
|
|
|
|
function extractAnchorHref(fragment: string): string | null {
|
|
const match = fragment.match(/<a[^>]+href="([^"]+)"/i);
|
|
if (!match) return null;
|
|
const href = match[1].trim();
|
|
if (href.startsWith("//")) return `https:${href}`;
|
|
return href;
|
|
}
|
|
|
|
function normalizeUrl(rawUrl: string): string {
|
|
const value = collapseWhitespace(rawUrl);
|
|
if (!value) return value;
|
|
if (/^https?:\/\//i.test(value)) return value;
|
|
return `https://${value.replace(/^\/+/, "")}`;
|
|
}
|
|
|
|
function decodeHtmlEntities(value: string): string {
|
|
return value
|
|
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
|
|
.replace(/&#(\d+);/g, (_, number) => String.fromCodePoint(Number.parseInt(number, 10)))
|
|
.replace(/&/gi, "&")
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/</gi, "<")
|
|
.replace(/>/gi, ">");
|
|
}
|
|
|
|
function parseCurrencyValue(value: string | null | undefined): number | null {
|
|
const normalized = collapseWhitespace(value);
|
|
if (!normalized) return null;
|
|
const numeric = normalized.replace(/[^0-9.-]/g, "");
|
|
if (!numeric) return null;
|
|
const parsed = Number(numeric);
|
|
return Number.isFinite(parsed) ? parsed : null;
|
|
}
|
|
|
|
function parseCurrentYearFromSearchHome(searchHomeHtml: string): number {
|
|
const configuredYear = searchHomeHtml.match(/"DefaultYear"\s*:\s*(\d{4})/i);
|
|
if (configuredYear) {
|
|
return Number(configuredYear[1]);
|
|
}
|
|
return new Date().getFullYear();
|
|
}
|
|
|
|
function buildCadSearchKeywords(address: string, year: number): string {
|
|
return `${collapseWhitespace(address)} Year:${year}`.trim();
|
|
}
|
|
|
|
function formatNuecesGeographicId(parcelId: string | null | undefined): string | null {
|
|
const normalized = collapseWhitespace(parcelId).replace(/[^0-9]/g, "");
|
|
if (!normalized) return null;
|
|
if (normalized.length <= 4) return normalized;
|
|
if (normalized.length <= 8) {
|
|
return `${normalized.slice(0, 4)}-${normalized.slice(4)}`;
|
|
}
|
|
return `${normalized.slice(0, 4)}-${normalized.slice(4, 8)}-${normalized.slice(8)}`;
|
|
}
|
|
|
|
function parseAddressForCadSearch(address: string): {
|
|
rawAddress: string;
|
|
streetNumber: string | null;
|
|
streetName: string | null;
|
|
unit: string | null;
|
|
} {
|
|
const rawAddress = collapseWhitespace(address);
|
|
const streetPart = collapseWhitespace(rawAddress.split(",")[0] || rawAddress);
|
|
const unitMatch = streetPart.match(/\b(?:apt|apartment|unit|suite|ste|#)\s*([a-z0-9-]+)/i);
|
|
const unit = unitMatch ? unitMatch[1].toUpperCase() : null;
|
|
|
|
const withoutUnit = collapseWhitespace(
|
|
streetPart
|
|
.replace(/\b(?:apt|apartment|unit|suite|ste)\s*[a-z0-9-]+/gi, "")
|
|
.replace(/#\s*[a-z0-9-]+/gi, "")
|
|
);
|
|
const numberMatch = withoutUnit.match(/^(\d+[a-z]?)/i);
|
|
const streetNumber = numberMatch ? numberMatch[1] : null;
|
|
const suffixes = new Set([
|
|
"rd",
|
|
"road",
|
|
"dr",
|
|
"drive",
|
|
"st",
|
|
"street",
|
|
"ave",
|
|
"avenue",
|
|
"blvd",
|
|
"boulevard",
|
|
"ct",
|
|
"court",
|
|
"cir",
|
|
"circle",
|
|
"ln",
|
|
"lane",
|
|
"trl",
|
|
"trail",
|
|
"way",
|
|
"pkwy",
|
|
"parkway",
|
|
"pl",
|
|
"place",
|
|
"ter",
|
|
"terrace",
|
|
"loop",
|
|
"hwy",
|
|
"highway"
|
|
]);
|
|
|
|
const streetTokens = withoutUnit
|
|
.replace(/^(\d+[a-z]?)\s*/i, "")
|
|
.split(/\s+/)
|
|
.filter(Boolean);
|
|
while (streetTokens.length && suffixes.has(streetTokens[streetTokens.length - 1].toLowerCase())) {
|
|
streetTokens.pop();
|
|
}
|
|
|
|
return {
|
|
rawAddress,
|
|
streetNumber,
|
|
streetName: streetTokens.length ? streetTokens.join(" ") : null,
|
|
unit
|
|
};
|
|
}
|
|
|
|
function extractSearchToken(searchHomeHtml: string): string | null {
|
|
const match = searchHomeHtml.match(/meta name="search-token" content="([^"]+)"/i);
|
|
return match ? decodeHtmlEntities(match[1]) : null;
|
|
}
|
|
|
|
function extractPropertySearchUrl(homepageHtml: string): string | null {
|
|
const preferred = homepageHtml.match(/href="(https:\/\/[^"]*esearch[^"]*)"/i);
|
|
if (preferred) {
|
|
return preferred[1];
|
|
}
|
|
const generic = homepageHtml.match(/href="([^"]+)"[^>]*>\s*(?:SEARCH NOW|Property Search)\s*</i);
|
|
return generic ? generic[1] : null;
|
|
}
|
|
|
|
function extractDetailField(detailHtml: string, label: string): string | null {
|
|
const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
const patterns = [
|
|
new RegExp(`<div[^>]*>\\s*${escaped}\\s*<\\/div>\\s*<div[^>]*>(.*?)<\\/div>`, "is"),
|
|
new RegExp(`<strong>\\s*${escaped}\\s*:?\\s*<\\/strong>\\s*(.*?)(?:<br\\s*\\/?>|<\\/p>|<\\/div>)`, "is"),
|
|
new RegExp(`${escaped}\\s*:?\\s*<\\/[^>]+>\\s*<[^>]+>(.*?)<\\/[^>]+>`, "is")
|
|
];
|
|
|
|
for (const pattern of patterns) {
|
|
const match = detailHtml.match(pattern);
|
|
if (match) {
|
|
return stripHtml(match[1]);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function extractExemptions(detailHtml: string): string[] {
|
|
const raw = extractDetailField(detailHtml, "Exemptions");
|
|
if (!raw) return [];
|
|
return raw
|
|
.split(/[,;|]/)
|
|
.map((item) => collapseWhitespace(item))
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function scoreAddressMatch(needle: string, haystack: string): number {
|
|
const normalizedNeedle = collapseWhitespace(needle).toLowerCase();
|
|
const normalizedHaystack = collapseWhitespace(haystack).toLowerCase();
|
|
if (!normalizedNeedle || !normalizedHaystack) return 0;
|
|
|
|
let score = 0;
|
|
const tokens = normalizedNeedle.split(/[\s,]+/).filter(Boolean);
|
|
for (const token of tokens) {
|
|
if (normalizedHaystack.includes(token)) {
|
|
score += token.length > 3 ? 2 : 1;
|
|
}
|
|
}
|
|
|
|
const unitMatch = normalizedNeedle.match(/\b(?:apt|apartment|unit|suite|ste|#)\s*([a-z0-9-]+)/i);
|
|
if (unitMatch) {
|
|
score += normalizedHaystack.includes(unitMatch[1].toLowerCase()) ? 4 : -4;
|
|
}
|
|
|
|
return score;
|
|
}
|
|
|
|
function pickBestCadResult(
|
|
address: string,
|
|
results: Array<Record<string, unknown>>
|
|
): Record<string, unknown> | null {
|
|
const scored = results
|
|
.map((result) => {
|
|
const candidateText = [
|
|
result.address,
|
|
result.legalDescription,
|
|
result.ownerName,
|
|
result.condo,
|
|
result.geoId,
|
|
result.propertyId
|
|
]
|
|
.map((item) => collapseWhitespace(String(item || "")))
|
|
.join(" ");
|
|
return { result, score: scoreAddressMatch(address, candidateText) };
|
|
})
|
|
.sort((a, b) => b.score - a.score);
|
|
|
|
return scored[0]?.score > 0 ? scored[0].result : null;
|
|
}
|
|
|
|
async function enrichNuecesCadPropertyDetails(
|
|
address: string,
|
|
appraisalDistrictWebsite: string,
|
|
parcelId: string | null | undefined,
|
|
fetchText: FetchLike
|
|
): Promise<PropertyDetailsResolution | null> {
|
|
const parsedAddress = parseAddressForCadSearch(address);
|
|
const homepageUrl = normalizeUrl(appraisalDistrictWebsite);
|
|
const homepageHtml = await fetchText(homepageUrl);
|
|
const propertySearchUrl = extractPropertySearchUrl(homepageHtml);
|
|
if (!propertySearchUrl) return null;
|
|
|
|
const normalizedPropertySearchUrl = normalizeUrl(propertySearchUrl).replace(/\/+$/, "");
|
|
const searchHomeHtml = await fetchText(`${normalizedPropertySearchUrl}/`);
|
|
const searchToken = extractSearchToken(searchHomeHtml);
|
|
if (!searchToken) return null;
|
|
|
|
const searchYear = parseCurrentYearFromSearchHome(searchHomeHtml);
|
|
const formattedGeographicId = formatNuecesGeographicId(parcelId);
|
|
const searchKeywords =
|
|
formattedGeographicId ||
|
|
(parsedAddress.streetNumber && parsedAddress.streetName
|
|
? `StreetNumber:${parsedAddress.streetNumber} StreetName:"${parsedAddress.streetName}"`
|
|
: buildCadSearchKeywords(address, searchYear));
|
|
|
|
const fetchSearchPage = async (page: number): Promise<any> => {
|
|
const searchResultsUrl = `${normalizedPropertySearchUrl}/search/SearchResults?keywords=${encodeURIComponent(searchKeywords)}`;
|
|
if (fetchText === defaultFetchText) {
|
|
const sessionTokenResponse = await fetch(
|
|
`${normalizedPropertySearchUrl}/search/requestSessionToken`,
|
|
{
|
|
headers: {
|
|
"user-agent": "property-assessor/1.0"
|
|
}
|
|
}
|
|
);
|
|
const sessionTokenPayload = await sessionTokenResponse.json();
|
|
const searchSessionToken = sessionTokenPayload?.searchSessionToken;
|
|
const resultUrl = `${normalizedPropertySearchUrl}/search/result?keywords=${encodeURIComponent(searchKeywords)}&searchSessionToken=${encodeURIComponent(String(searchSessionToken || ""))}`;
|
|
const resultResponse = await fetch(resultUrl, {
|
|
headers: {
|
|
"user-agent": "property-assessor/1.0"
|
|
}
|
|
});
|
|
const cookieHeader = (resultResponse.headers.getSetCookie?.() || [])
|
|
.map((item) => item.split(";", 1)[0])
|
|
.join("; ");
|
|
const resultPageHtml = await resultResponse.text();
|
|
const liveSearchToken = extractSearchToken(resultPageHtml) || searchToken;
|
|
const jsonResponse = await fetch(searchResultsUrl, {
|
|
body: JSON.stringify({
|
|
page,
|
|
pageSize: 25,
|
|
isArb: false,
|
|
recaptchaToken: "",
|
|
searchToken: liveSearchToken
|
|
}),
|
|
headers: {
|
|
"content-type": "application/json",
|
|
cookie: cookieHeader,
|
|
referer: resultUrl,
|
|
"user-agent": "property-assessor/1.0"
|
|
},
|
|
method: "POST"
|
|
});
|
|
return await jsonResponse.json();
|
|
}
|
|
|
|
const searchResultsRaw = await fetchText(searchResultsUrl, {
|
|
body: JSON.stringify({
|
|
page,
|
|
pageSize: 25,
|
|
isArb: false,
|
|
recaptchaToken: "",
|
|
searchToken
|
|
}),
|
|
headers: {
|
|
"content-type": "application/json"
|
|
},
|
|
method: "POST"
|
|
});
|
|
return JSON.parse(searchResultsRaw);
|
|
};
|
|
|
|
const firstPage = await fetchSearchPage(1);
|
|
const totalPages = Math.min(Number(firstPage?.totalPages || 1), 8);
|
|
const collectedResults: Array<Record<string, unknown>> = Array.isArray(firstPage?.resultsList)
|
|
? [...firstPage.resultsList]
|
|
: [];
|
|
let bestResult = pickBestCadResult(address, collectedResults);
|
|
|
|
if (parsedAddress.unit && !String(bestResult?.legalDescription || "").toUpperCase().includes(`UNIT ${parsedAddress.unit}`)) {
|
|
bestResult = null;
|
|
}
|
|
|
|
for (let page = 2; !bestResult && page <= totalPages; page += 1) {
|
|
const nextPage = await fetchSearchPage(page);
|
|
if (Array.isArray(nextPage?.resultsList)) {
|
|
collectedResults.push(...nextPage.resultsList);
|
|
bestResult = pickBestCadResult(address, collectedResults);
|
|
if (parsedAddress.unit && !String(bestResult?.legalDescription || "").toUpperCase().includes(`UNIT ${parsedAddress.unit}`)) {
|
|
bestResult = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!bestResult) return null;
|
|
|
|
const detailPath = collapseWhitespace(String(bestResult.detailUrl || ""));
|
|
const canUseDetailPath = Boolean(detailPath) && !/[?&]Id=/i.test(detailPath);
|
|
const detailUrl = canUseDetailPath
|
|
? new URL(detailPath, `${normalizedPropertySearchUrl}/`).toString()
|
|
: new URL(
|
|
`/property/view/${encodeURIComponent(String(bestResult.propertyId || ""))}?year=${encodeURIComponent(String(bestResult.year || searchYear))}&ownerId=${encodeURIComponent(String(bestResult.ownerId || ""))}`,
|
|
`${normalizedPropertySearchUrl}/`
|
|
).toString();
|
|
|
|
const detailHtml = await fetchText(detailUrl);
|
|
return {
|
|
source: "nueces-esearch",
|
|
sourceUrl: detailUrl,
|
|
propertyId: collapseWhitespace(String(bestResult.propertyId || "")) || null,
|
|
ownerName:
|
|
extractDetailField(detailHtml, "Owner Name") ||
|
|
collapseWhitespace(String(bestResult.ownerName || "")) ||
|
|
null,
|
|
situsAddress:
|
|
extractDetailField(detailHtml, "Situs Address") ||
|
|
extractDetailField(detailHtml, "Address") ||
|
|
collapseWhitespace(String(bestResult.address || "")) ||
|
|
null,
|
|
legalDescription:
|
|
extractDetailField(detailHtml, "Legal Description") ||
|
|
collapseWhitespace(String(bestResult.legalDescription || "")) ||
|
|
null,
|
|
landValue: parseCurrencyValue(extractDetailField(detailHtml, "Land Value")),
|
|
improvementValue: parseCurrencyValue(extractDetailField(detailHtml, "Improvement Value")),
|
|
marketValue:
|
|
parseCurrencyValue(extractDetailField(detailHtml, "Market Value")) ||
|
|
(Number.isFinite(Number(bestResult.appraisedValue))
|
|
? Number(bestResult.appraisedValue)
|
|
: parseCurrencyValue(String(bestResult.appraisedValueDisplay || ""))),
|
|
assessedTotalValue:
|
|
parseCurrencyValue(extractDetailField(detailHtml, "Assessed Value")) ||
|
|
parseCurrencyValue(extractDetailField(detailHtml, "Appraised Value")) ||
|
|
(Number.isFinite(Number(bestResult.appraisedValue))
|
|
? Number(bestResult.appraisedValue)
|
|
: parseCurrencyValue(String(bestResult.appraisedValueDisplay || ""))),
|
|
exemptions: extractExemptions(detailHtml),
|
|
notes: [
|
|
"Official CAD property detail page exposed owner, value, and exemption data."
|
|
]
|
|
};
|
|
}
|
|
|
|
async function tryEnrichPropertyDetails(
|
|
address: string,
|
|
parcelId: string | null | undefined,
|
|
appraisalDistrictWebsite: string | null,
|
|
fetchText: FetchLike
|
|
): Promise<PropertyDetailsResolution | null> {
|
|
const website = collapseWhitespace(appraisalDistrictWebsite);
|
|
if (!website) return null;
|
|
|
|
const normalizedWebsite = normalizeUrl(website).toLowerCase();
|
|
try {
|
|
if (normalizedWebsite.includes("nuecescad.net") || normalizedWebsite.includes("ncadistrict.com")) {
|
|
return await enrichNuecesCadPropertyDetails(address, website, parcelId, fetchText);
|
|
}
|
|
} catch {
|
|
return null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function geocodeAddress(address: string, fetchText: FetchLike): Promise<{
|
|
match: any;
|
|
censusGeocoderUrl: string;
|
|
usedFallbackGeocoder: boolean;
|
|
}> {
|
|
const query = new URLSearchParams({
|
|
address,
|
|
benchmark: "Public_AR_Current",
|
|
vintage: "Current_Current",
|
|
format: "json"
|
|
});
|
|
const url = `${CENSUS_GEOCODER_URL}?${query.toString()}`;
|
|
const payload = JSON.parse(await fetchText(url));
|
|
const matches = payload?.result?.addressMatches || [];
|
|
if (matches.length) {
|
|
return {
|
|
match: matches[0],
|
|
censusGeocoderUrl: url,
|
|
usedFallbackGeocoder: false
|
|
};
|
|
}
|
|
|
|
let fallbackMatch: any = null;
|
|
for (const candidateAddress of buildFallbackAddressCandidates(address)) {
|
|
const fallbackQuery = new URLSearchParams({
|
|
q: candidateAddress,
|
|
format: "jsonv2",
|
|
limit: "1",
|
|
countrycodes: "us",
|
|
addressdetails: "1"
|
|
});
|
|
const fallbackUrl = `${NOMINATIM_SEARCH_URL}?${fallbackQuery.toString()}`;
|
|
const fallbackPayload = JSON.parse(await fetchText(fallbackUrl));
|
|
fallbackMatch = Array.isArray(fallbackPayload) ? fallbackPayload[0] : null;
|
|
if (fallbackMatch) {
|
|
break;
|
|
}
|
|
}
|
|
if (!fallbackMatch) {
|
|
throw new PublicRecordsLookupError(`No Census geocoder match found for address: ${address}`);
|
|
}
|
|
|
|
const latitude = Number(fallbackMatch.lat);
|
|
const longitude = Number(fallbackMatch.lon);
|
|
if (!Number.isFinite(latitude) || !Number.isFinite(longitude)) {
|
|
throw new PublicRecordsLookupError(
|
|
`Fallback geocoder returned invalid coordinates for address: ${address}`
|
|
);
|
|
}
|
|
|
|
const coordinateQuery = new URLSearchParams({
|
|
x: String(longitude),
|
|
y: String(latitude),
|
|
benchmark: "Public_AR_Current",
|
|
vintage: "Current_Current",
|
|
format: "json"
|
|
});
|
|
const coordinateUrl = `${CENSUS_COORDINATES_URL}?${coordinateQuery.toString()}`;
|
|
const coordinatePayload = JSON.parse(await fetchText(coordinateUrl));
|
|
const geographies = coordinatePayload?.result?.geographies;
|
|
if (!geographies) {
|
|
throw new PublicRecordsLookupError(
|
|
`Census coordinate geographies lookup failed for address: ${address}`
|
|
);
|
|
}
|
|
|
|
return {
|
|
match: {
|
|
matchedAddress: collapseWhitespace(fallbackMatch.display_name || address),
|
|
coordinates: {
|
|
x: longitude,
|
|
y: latitude
|
|
},
|
|
geographies
|
|
},
|
|
censusGeocoderUrl: coordinateUrl,
|
|
usedFallbackGeocoder: true
|
|
};
|
|
}
|
|
|
|
async function findTexasCountyHref(countyName: string, fetchText: FetchLike): Promise<string> {
|
|
const html = await fetchText(TEXAS_COUNTY_DIRECTORY_URL);
|
|
const countyNorm = normalizeCountyName(countyName);
|
|
const matches = html.matchAll(/<a href="([^"]+\.php)">\s*\d+\s+([^<]+)\s*<\/a>/gi);
|
|
for (const match of matches) {
|
|
const href = match[1];
|
|
const label = match[2];
|
|
if (normalizeCountyName(label) === countyNorm) {
|
|
return href.startsWith("http://") || href.startsWith("https://")
|
|
? href
|
|
: `${TEXAS_COUNTY_DIRECTORY_URL}${href.replace(/^\/+/, "")}`;
|
|
}
|
|
}
|
|
throw new PublicRecordsLookupError(
|
|
`Could not find Texas county directory page for county: ${countyName}`
|
|
);
|
|
}
|
|
|
|
function parseTexasSection(sectionHtml: string): Record<string, unknown> {
|
|
const result: Record<string, unknown> = {};
|
|
|
|
const lastUpdated = sectionHtml.match(
|
|
/<p class="file-info">\s*Last Updated:\s*([^<]+)<\/p>/i
|
|
);
|
|
if (lastUpdated) {
|
|
result.lastUpdated = collapseWhitespace(lastUpdated[1]);
|
|
}
|
|
|
|
const lead = sectionHtml.match(/<h4>\s*([^:<]+):\s*([^<]+)<\/h4>/i);
|
|
if (lead) {
|
|
result[lead[1].trim()] = collapseWhitespace(lead[2]);
|
|
}
|
|
|
|
const infoBlock = sectionHtml.match(/<h4>\s*[^<]+<\/h4>\s*<p>(.*?)<\/p>/is);
|
|
if (infoBlock) {
|
|
for (const match of infoBlock[1].matchAll(
|
|
/<strong>\s*([^:<]+):\s*<\/strong>\s*(.*?)(?:<br\s*\/?>|$)/gis
|
|
)) {
|
|
const key = collapseWhitespace(match[1]);
|
|
const rawValue = match[2];
|
|
const hrefValue = extractAnchorHref(rawValue);
|
|
if (key.toLowerCase() === "website" && hrefValue) {
|
|
result[key] = hrefValue;
|
|
} else if (
|
|
key.toLowerCase() === "email" &&
|
|
hrefValue &&
|
|
hrefValue.startsWith("mailto:")
|
|
) {
|
|
result[key] = hrefValue.replace(/^mailto:/i, "");
|
|
} else {
|
|
result[key] = stripHtml(rawValue);
|
|
}
|
|
}
|
|
}
|
|
|
|
const headings: Array<[string, string]> = [
|
|
["Mailing Address", "mailingAddress"],
|
|
["Street Address", "streetAddress"],
|
|
["Collecting Unit", "collectingUnit"]
|
|
];
|
|
|
|
for (const [heading, key] of headings) {
|
|
const match = sectionHtml.match(
|
|
new RegExp(`<h4>\\s*${heading}\\s*<\\/h4>\\s*<p>(.*?)<\\/p>`, "is")
|
|
);
|
|
if (match) {
|
|
result[key] = stripHtml(match[1]);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
async function fetchTexasCountyOffices(
|
|
countyName: string,
|
|
fetchText: FetchLike
|
|
): Promise<{
|
|
directoryPage: string;
|
|
appraisalDistrict: Record<string, unknown>;
|
|
taxAssessorCollector: Record<string, unknown> | null;
|
|
}> {
|
|
const pageUrl = await findTexasCountyHref(countyName, fetchText);
|
|
const html = await fetchText(pageUrl);
|
|
const appraisalMatch = html.match(
|
|
/<h3>\s*Appraisal District\s*<\/h3>(.*?)(?=<h3>\s*Tax Assessor\/Collector\s*<\/h3>)/is
|
|
);
|
|
const taxMatch = html.match(/<h3>\s*Tax Assessor\/Collector\s*<\/h3>(.*)$/is);
|
|
if (!appraisalMatch) {
|
|
throw new PublicRecordsLookupError(
|
|
`Could not parse Appraisal District section for county: ${countyName}`
|
|
);
|
|
}
|
|
|
|
const appraisalDistrict = parseTexasSection(appraisalMatch[1]);
|
|
appraisalDistrict.directoryPage = pageUrl;
|
|
|
|
const taxAssessorCollector = taxMatch ? parseTexasSection(taxMatch[1]) : null;
|
|
if (taxAssessorCollector) {
|
|
taxAssessorCollector.directoryPage = pageUrl;
|
|
}
|
|
|
|
return {
|
|
directoryPage: pageUrl,
|
|
appraisalDistrict,
|
|
taxAssessorCollector
|
|
};
|
|
}
|
|
|
|
export async function resolvePublicRecords(
|
|
address: string,
|
|
options: {
|
|
parcelId?: string;
|
|
listingGeoId?: string;
|
|
listingSourceUrl?: string;
|
|
fetchText?: FetchLike;
|
|
} = {}
|
|
): Promise<PublicRecordsResolution> {
|
|
const fetchText = options.fetchText || defaultFetchText;
|
|
const { match, censusGeocoderUrl, usedFallbackGeocoder } = await geocodeAddress(
|
|
address,
|
|
fetchText
|
|
);
|
|
const geographies = match.geographies || {};
|
|
const state = (geographies.States || [{}])[0];
|
|
const county = (geographies.Counties || [{}])[0];
|
|
const block = (geographies["2020 Census Blocks"] || [{}])[0];
|
|
const coordinates = match.coordinates || {};
|
|
|
|
let texasCountyDirectory: string | null = null;
|
|
let texasPropertyTaxPortal: string | null = null;
|
|
let appraisalDistrict: Record<string, unknown> | null = null;
|
|
let taxAssessorCollector: Record<string, unknown> | null = null;
|
|
let propertyDetails: PropertyDetailsResolution | null = null;
|
|
|
|
const lookupRecommendations = [
|
|
"Start from the official public-record jurisdiction instead of a listing-site geo ID.",
|
|
"Try official address search first on the appraisal district site.",
|
|
"If the listing exposes parcel/APN/account identifiers, use them as stronger search keys than ZPID or listing geo IDs."
|
|
];
|
|
if (usedFallbackGeocoder) {
|
|
lookupRecommendations.push(
|
|
"The Census address lookup missed this address, so a fallback geocoder was used to obtain coordinates before resolving official Census geographies."
|
|
);
|
|
}
|
|
|
|
if (state.STUSAB === "TX" && county.NAME) {
|
|
const offices = await fetchTexasCountyOffices(county.NAME, fetchText);
|
|
texasCountyDirectory = offices.directoryPage;
|
|
texasPropertyTaxPortal = TEXAS_PROPERTY_TAX_PORTAL;
|
|
appraisalDistrict = offices.appraisalDistrict;
|
|
taxAssessorCollector = offices.taxAssessorCollector;
|
|
propertyDetails = await tryEnrichPropertyDetails(
|
|
address,
|
|
options.parcelId,
|
|
typeof offices.appraisalDistrict?.Website === "string"
|
|
? offices.appraisalDistrict.Website
|
|
: null,
|
|
fetchText
|
|
);
|
|
lookupRecommendations.push(
|
|
"Use the Texas Comptroller county directory page as the official jurisdiction link in the final report.",
|
|
"Attempt to retrieve assessed value, land value, improvement value, exemptions, and account number from the CAD website when a direct property page is publicly accessible."
|
|
);
|
|
if (propertyDetails) {
|
|
lookupRecommendations.push(
|
|
...propertyDetails.notes,
|
|
"Use the official CAD property-detail values in the final assessment instead of relying only on listing-site value hints."
|
|
);
|
|
}
|
|
}
|
|
|
|
const sourceIdentifierHints: Record<string, string> = {};
|
|
if (options.parcelId) sourceIdentifierHints.parcelId = options.parcelId;
|
|
if (options.listingGeoId) {
|
|
sourceIdentifierHints.listingGeoId = options.listingGeoId;
|
|
lookupRecommendations.push(
|
|
"Treat listing geo IDs as regional hints only; do not use them as assessor record keys."
|
|
);
|
|
}
|
|
if (options.listingSourceUrl) {
|
|
sourceIdentifierHints.listingSourceUrl = options.listingSourceUrl;
|
|
}
|
|
|
|
return {
|
|
requestedAddress: address,
|
|
matchedAddress: match.matchedAddress || address,
|
|
latitude: coordinates.y ?? null,
|
|
longitude: coordinates.x ?? null,
|
|
geoid: block.GEOID || null,
|
|
state: {
|
|
name: state.NAME || null,
|
|
code: state.STUSAB || null,
|
|
fips: state.STATE || null
|
|
},
|
|
county: {
|
|
name: county.NAME || null,
|
|
fips: county.COUNTY || null,
|
|
geoid: county.GEOID || null
|
|
},
|
|
officialLinks: {
|
|
censusGeocoder: censusGeocoderUrl,
|
|
texasCountyDirectory,
|
|
texasPropertyTaxPortal
|
|
},
|
|
appraisalDistrict,
|
|
taxAssessorCollector,
|
|
lookupRecommendations,
|
|
sourceIdentifierHints,
|
|
propertyDetails
|
|
};
|
|
}
|