#!/usr/bin/env node const STREET_STOP_WORDS = new Set([ "st", "street", "rd", "road", "dr", "drive", "ave", "avenue", "blvd", "boulevard", "ln", "lane", "ct", "court", "cir", "circle", "way", "trl", "trail", "pkwy", "parkway", "tx" ]); const UNIT_LABEL_PATTERN = "(?:apt|apartment|unit|suite|ste|#)"; function tokenize(value) { return String(value || "") .toLowerCase() .replace(/[^a-z0-9#\s-]/g, " ") .split(/[\s-]+/) .map((token) => token.trim()) .filter(Boolean); } function collapseWhitespace(value) { return String(value || "").replace(/\s+/g, " ").trim(); } function escapeRegex(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } export function parseAddressIdentity(rawAddress) { const address = collapseWhitespace(rawAddress); if (!address) { throw new Error("Missing address."); } const [streetPartRaw, ...restParts] = address.split(","); const streetPart = collapseWhitespace(streetPartRaw); const locality = collapseWhitespace(restParts.join(" ")); const unitMatch = streetPart.match( new RegExp(`^(.*?)(?:\\s+(?:${UNIT_LABEL_PATTERN})\\s*([a-z0-9-]+))$`, "i") ); const streetWithoutUnit = collapseWhitespace(unitMatch ? unitMatch[1] : streetPart); const unitValue = unitMatch ? unitMatch[2].toLowerCase() : null; const streetTokens = tokenize(streetWithoutUnit).filter( (token) => !STREET_STOP_WORDS.has(token) ); const localityTokens = tokenize(locality).filter((token) => !STREET_STOP_WORDS.has(token)); return { raw: address, streetPart, streetWithoutUnit, locality, streetTokens, localityTokens, streetNumber: streetTokens[0] || null, unitValue, hasUnit: Boolean(unitValue) }; } export function buildZillowAddressSlug(rawAddress) { const address = collapseWhitespace(rawAddress); return address .replace(/,/g, "") .replace(/#/g, "") .trim() .split(/\s+/) .join("-"); } export function scoreAddressCandidate(identity, candidateText) { const normalized = collapseWhitespace(candidateText).toLowerCase(); const normalizedPadded = ` ${normalized.replace(/[^a-z0-9#]+/g, " ")} `; const allTokens = [...identity.streetTokens, ...identity.localityTokens]; const uniqueTokens = Array.from(new Set(allTokens)); const matchedTokens = uniqueTokens.filter((token) => normalizedPadded.includes(` ${token} `) ); let unitMatched = true; if (identity.hasUnit && identity.unitValue) { const unitRegex = new RegExp( `(?:^|\\s)(?:${UNIT_LABEL_PATTERN})\\s*${escapeRegex(identity.unitValue)}(?:\\s|$)`, "i" ); const hashRegex = new RegExp(`#\\s*${escapeRegex(identity.unitValue)}(?:\\b|$)`, "i"); const looseTokenRegex = new RegExp(`(?:^|\\s)${escapeRegex(identity.unitValue)}(?:\\s|$)`, "i"); unitMatched = unitRegex.test(normalized) || hashRegex.test(candidateText) || looseTokenRegex.test(normalizedPadded); } const minimumCoreMatches = identity.hasUnit ? 3 : 2; const matched = matchedTokens.length >= Math.min(minimumCoreMatches, uniqueTokens.length) && unitMatched; return { matched, score: matchedTokens.length + (unitMatched && identity.hasUnit ? 2 : 0), matchedTokens, unitMatched }; }