fix(amazon-shopping): verify prime and delivery filters

This commit is contained in:
2026-04-15 20:28:16 -05:00
parent a81a055ec6
commit fda0602ac9
20 changed files with 605 additions and 36 deletions
+21 -1
View File
@@ -31,6 +31,9 @@ Options:
--min-reviews N Minimum review count
--max-price N Maximum displayed product price
--max-unit-price N Maximum price per unit
--min-width N Minimum product width in inches
--require-prime Require Prime delivery verification
--delivery-by VALUE Require delivery timing, e.g. today, tomorrow, overnight
--max-search-pages N Search result pages to scan, 1-5 (default: 2)
--skip-details Do not open product detail pages
--dry-run Parse and print the planned request without Amazon network access
@@ -66,7 +69,7 @@ export function buildSearchUrl(query: string): string {
export function parseCliRequest(argv: string[]): SearchProductsRequest {
const args = minimist(argv, {
boolean: ["help", "json", "markdown", "allow-large-limit", "dry-run", "skip-details"],
boolean: ["help", "json", "markdown", "allow-large-limit", "dry-run", "skip-details", "require-prime"],
string: [
"query",
"limit",
@@ -75,6 +78,9 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest {
"min-reviews",
"max-price",
"max-unit-price",
"min-width",
"delivery-by",
"sort-by",
"max-search-pages"
],
alias: { h: "help", max: "limit" }
@@ -101,10 +107,24 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest {
const minReviews = parsePositiveInteger(args["min-reviews"], "min-reviews");
const maxPrice = parseNumber(args["max-price"], "max-price");
const maxUnitPrice = parseNumber(args["max-unit-price"], "max-unit-price");
const minWidth = parseNumber(args["min-width"], "min-width");
if (minRating !== undefined) filters.minRating = minRating;
if (minReviews !== undefined) filters.minReviews = minReviews;
if (maxPrice !== undefined) filters.maxPrice = maxPrice;
if (maxUnitPrice !== undefined) filters.maxUnitPrice = maxUnitPrice;
if (minWidth !== undefined) {
filters.minWidthInches = minWidth;
filters.widthComparison = "gte";
}
if (args["require-prime"]) filters.requirePrime = true;
if (args["delivery-by"]) filters.deliveryBy = String(args["delivery-by"]);
if (args["sort-by"]) {
const sortBy = String(args["sort-by"]);
if (sortBy !== "price" && sortBy !== "relevance") {
throw new Error("sort-by must be either price or relevance");
}
filters.sortBy = sortBy;
}
const json = Boolean(args.json);
const markdown = Boolean(args.markdown);
+31 -4
View File
@@ -76,15 +76,42 @@ function extractHistogramText(root: HTMLElement): string {
return parts.join(" ");
}
function deliveryFromText(text: string): DeliverySummary | undefined {
function deliveryFromText(text: string, primeSignal = false): DeliverySummary | undefined {
const display = text.replace(/\s+/g, " ").trim();
if (!display) {
return undefined;
return primeSignal ? { display: "Prime delivery available", prime: true } : undefined;
}
return {
display,
free: /\bfree\b/i.test(display),
prime: /\bprime\b/i.test(display)
prime: primeSignal || /\bprime\b/i.test(display)
};
}
function hasPrimeSignal(root: HTMLElement): boolean {
const attributeText = root.querySelectorAll("[id], [class], [aria-label], img[alt]")
.map((node) => [
attrOf(node, "id"),
attrOf(node, "class"),
attrOf(node, "aria-label"),
attrOf(node, "alt")
].join(" "))
.join(" ");
return /a-icon-prime|prime-logo|primeExclusive|primePopover|amazon\s+prime|\bprime\b/i.test(attributeText);
}
function mergeDelivery(detail: DeliverySummary | undefined, base: DeliverySummary | undefined): DeliverySummary | undefined {
if (!detail) {
return base;
}
if (!base) {
return detail;
}
return {
display: detail.display || base.display,
free: Boolean(detail.free || base.free),
prime: Boolean(detail.prime || base.prime),
fastestDate: detail.fastestDate ?? base.fastestDate
};
}
@@ -113,7 +140,7 @@ export function extractDetailPage(html: string, base: ProductSearchResult): Prod
price: parseMoney(priceText) ?? base.price,
rating: parseRating(ratingText) ?? base.rating,
reviewCount: parseReviewCount(reviewText) ?? base.reviewCount,
delivery: deliveryFromText(deliveryText) ?? base.delivery,
delivery: mergeDelivery(deliveryFromText(deliveryText, hasPrimeSignal(root)), base.delivery),
availability: availability || base.availability,
seller: seller || base.seller,
bullets: extractBullets(root),
+40 -3
View File
@@ -1,4 +1,5 @@
import type { FilteredProducts, ProductFilters, ProductSearchResult } from "./types.js";
import { extractWidthInches } from "./product-metrics.js";
function passesMin(value: number | undefined, threshold: number, comparison: "gt" | "gte" | undefined): boolean {
if (value === undefined) {
@@ -29,16 +30,48 @@ function filterReasons(product: ProductSearchResult, filters: ProductFilters): s
reasons.push(`unit price ${product.unitPrice.display} above filter`);
}
}
if (filters.minWidthInches !== undefined) {
const width = extractWidthInches(product);
if (width === undefined) {
reasons.push("width unknown");
} else if (!passesMin(width, filters.minWidthInches, filters.widthComparison)) {
reasons.push(`width ${width} inches below filter`);
}
}
if (filters.requirePrime && !product.delivery?.prime) {
reasons.push("Prime delivery not verified");
}
if (filters.requireFreeDelivery && !product.delivery?.free) {
reasons.push("free delivery not verified");
}
if (filters.deliveryBy && !deliveryMatches(product.delivery?.display, filters.deliveryBy)) {
reasons.push(`${filters.deliveryBy} delivery not verified`);
}
return reasons;
}
function rankProducts(a: ProductSearchResult, b: ProductSearchResult): number {
function deliveryMatches(display: string | undefined, deliveryBy: string): boolean {
if (!display) {
return false;
}
const normalized = display.toLowerCase();
if (deliveryBy === "today") {
return /\btoday\b|same[- ]day/.test(normalized);
}
if (deliveryBy === "tomorrow" || deliveryBy === "overnight") {
return /\btomorrow\b|overnight|next[- ]day|one[- ]day/.test(normalized);
}
return normalized.includes(deliveryBy.toLowerCase());
}
function comparisonSymbol(comparison: "gt" | "gte" | undefined): string {
return comparison === "gt" ? ">" : ">=";
}
function rankProducts(a: ProductSearchResult, b: ProductSearchResult, filters: ProductFilters): number {
if (filters.sortBy === "price") {
return (a.price?.amount ?? Number.POSITIVE_INFINITY) - (b.price?.amount ?? Number.POSITIVE_INFINITY);
}
const ratingDiff = (b.rating ?? -1) - (a.rating ?? -1);
if (ratingDiff !== 0) return ratingDiff;
const reviewDiff = (b.reviewCount ?? -1) - (a.reviewCount ?? -1);
@@ -73,13 +106,17 @@ export function applyFiltersAndLimit(
...(filters.minRating !== undefined ? [`rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}`] : []),
...(filters.minReviews !== undefined ? [`reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}`] : []),
...(filters.maxPrice !== undefined ? [`price <= ${filters.maxPrice}`] : []),
...(filters.maxUnitPrice !== undefined ? [`unit price <= ${filters.maxUnitPrice}`] : [])
...(filters.maxUnitPrice !== undefined ? [`unit price <= ${filters.maxUnitPrice}`] : []),
...(filters.minWidthInches !== undefined ? [`width ${comparisonSymbol(filters.widthComparison)} ${filters.minWidthInches} inches`] : []),
...(filters.requirePrime ? ["Prime delivery"] : []),
...(filters.requireFreeDelivery ? ["free delivery"] : []),
...(filters.deliveryBy ? [`delivery by ${filters.deliveryBy}`] : [])
]
});
}
return {
results: passing.sort(rankProducts).slice(0, limit),
results: passing.sort((a, b) => rankProducts(a, b, filters)).slice(0, limit),
filteredOutCount: uniqueProducts.size - passing.length,
filteredOutReasons
};
@@ -0,0 +1,62 @@
import type { ProductSearchResult, ProductSpec } from "./types.js";
function parseDimensionNumber(text: string): number | undefined {
const match = text.match(/([0-9]+(?:\.[0-9]+)?)/);
return match ? Number(match[1]) : undefined;
}
function isOverallWidthSpec(spec: ProductSpec): boolean {
const name = spec.name.toLowerCase();
if (/seat|arm|door|package|box|back|cushion/.test(name)) {
return false;
}
return /width|dimensions?/.test(name);
}
function widthFromSpec(spec: ProductSpec): number | undefined {
if (!isOverallWidthSpec(spec)) {
return undefined;
}
const name = spec.name.toLowerCase();
const value = spec.value;
const labeledWidth = value.match(/([0-9]+(?:\.[0-9]+)?)\s*(?:"|in(?:ches?)?)?\s*W\b/i);
if (labeledWidth) {
return Number(labeledWidth[1]);
}
if (/width/.test(name)) {
return parseDimensionNumber(value);
}
const orderMatch = name.match(/\b([dwh])\s*x\s*([dwh])(?:\s*x\s*([dwh]))?\b/i);
if (orderMatch) {
const order = orderMatch.slice(1).filter(Boolean).map((part) => part.toLowerCase());
const widthIndex = order.indexOf("w");
const values = value.match(/[0-9]+(?:\.[0-9]+)?/g)?.map(Number) ?? [];
if (widthIndex >= 0 && values[widthIndex] !== undefined) {
return values[widthIndex];
}
}
return undefined;
}
export function extractWidthInches(product: ProductSearchResult): number | undefined {
for (const spec of product.specs) {
const width = widthFromSpec(spec);
if (width !== undefined) {
return width;
}
}
const titleMatch = product.title.match(/\b([0-9]+(?:\.[0-9]+)?)\s*(?:["”]|in(?:ch(?:es)?)?)\b/i);
return titleMatch ? Number(titleMatch[1]) : undefined;
}
export function formatWidthInches(width: number | undefined): string {
if (width === undefined) {
return "unknown";
}
return `${Number.isInteger(width) ? width.toFixed(0) : width.toFixed(1)}"`;
}
+42 -2
View File
@@ -2,7 +2,12 @@ import type { ParsedNaturalLanguageRequest, ProductFilters } from "./types.js";
function cleanQuery(text: string): string {
return text
.replace(/\breview score of\b/gi, " ")
.replace(/\brating of\b/gi, " ")
.replace(/\bof\s+in\s+width\b/gi, " ")
.replace(/\bin\s+width\b/gi, " ")
.replace(/\b(?:that|and|with|have)\b/gi, " ")
.replace(/[,\s]+/g, " ")
.replace(/\s+/g, " ")
.replace(/\s+(and|or|a)$/i, "")
.trim();
@@ -29,6 +34,38 @@ export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguag
remaining = removeMatched(remaining, limitMatch);
}
const sortByPriceMatch = remaining.match(/\b(?:by price|sort(?:ed)? by price|lowest price|cheapest|least expensive)\b/i);
if (sortByPriceMatch) {
filters.sortBy = "price";
remaining = removeMatched(remaining, sortByPriceMatch);
}
const deliveryTomorrowMatch = remaining.match(/\b(?:delivery|delivered|arrives?|shipping|ships?)\s+(?:by\s+)?tomorrow\b/i);
const deliveryTodayMatch = remaining.match(/\b(?:delivery|delivered|arrives?|shipping|ships?)\s+(?:by\s+)?today\b/i)
?? remaining.match(/\bsame[- ]day\s+(?:delivery|shipping)\b/i);
const overnightMatch = remaining.match(/\bovernight\s+(?:delivery|shipping)\b/i)
?? remaining.match(/\bnext[- ]day\s+(?:delivery|shipping)\b/i);
const deliveryMatch = overnightMatch ?? deliveryTomorrowMatch ?? deliveryTodayMatch;
if (deliveryMatch) {
filters.deliveryBy = overnightMatch ? "overnight" : deliveryTomorrowMatch ? "tomorrow" : "today";
remaining = removeMatched(remaining, deliveryMatch);
}
const primeMatch = remaining.match(/\b(?:(?:shipped|ships|shipping|delivery|delivered)\s+(?:with|by|from)\s+)?prime\b/i);
if (primeMatch) {
filters.requirePrime = true;
remaining = removeMatched(remaining, primeMatch);
}
const widthMatch = remaining.match(/\b(?:width\s*(?:of\s*)?)?([0-9]+(?:\.[0-9]+)?)\s*(?:inches|inch|in\.?|")\s*(?:or\s+)?(?:wider|wide|larger|longer)\b/i)
?? remaining.match(/\b([0-9]+(?:\.[0-9]+)?)\s*(?:inches|inch|in\.?|")\s*(?:or\s+)?(?:wider|wide|larger|longer)\s+(?:in\s+)?width\b/i)
?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-9]+(?:\.[0-9]+)?)\s*(?:inches|inch|in\.?|")\s+(?:wide|width)\b/i);
if (widthMatch) {
filters.minWidthInches = Number(widthMatch[1]);
filters.widthComparison = "gte";
remaining = removeMatched(remaining, widthMatch);
}
const unitPriceMatch = remaining.match(/\b(?:cost\s+)?(?:less than|under|below)\s+\$([0-9]+(?:\.[0-9]{1,2})?)\s*(?:each|per\b|\/\s*(?:count|unit|item))\b/i);
if (unitPriceMatch) {
filters.maxUnitPrice = Number(unitPriceMatch[1]);
@@ -42,7 +79,8 @@ export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguag
}
const exclusiveReviews = remaining.match(/\b(?:over|more than|above)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i);
const inclusiveReviews = remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i);
const inclusiveReviews = remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i)
?? remaining.match(/\b([0-9][0-9,]*)\s*\+\s*(?:reviews?|ratings?)\b/i);
const reviewMatch = exclusiveReviews ?? inclusiveReviews;
if (reviewMatch) {
filters.minReviews = Number(reviewMatch[1].replace(/,/g, ""));
@@ -51,7 +89,9 @@ export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguag
}
const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:(?:review score|rating)\s+of\s+|rating\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i);
const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i)
const inclusiveRating = remaining.match(/\b(?:review score|rating)\s+of\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\s+(?:or|and)\s+(?:higher|better)\b/i)
?? remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i)
?? remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+(?:or|and)\s+(?:higher|better)\b/i)
?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i);
const ratingMatch = exclusiveRating ?? inclusiveRating;
if (ratingMatch) {
+84 -16
View File
@@ -1,4 +1,5 @@
import type { ProductFilters, ProductSearchResult, SearchProductsResponse } from "./types.js";
import { extractWidthInches, formatWidthInches } from "./product-metrics.js";
export interface ResponseInput {
query: string;
@@ -33,25 +34,87 @@ function formatFilters(filters: ProductFilters): string {
filters.minRating !== undefined ? `rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}` : "",
filters.minReviews !== undefined ? `reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}` : "",
filters.maxPrice !== undefined ? `price <= $${filters.maxPrice}` : "",
filters.maxUnitPrice !== undefined ? `unit price <= $${filters.maxUnitPrice}` : ""
filters.maxUnitPrice !== undefined ? `unit price <= $${filters.maxUnitPrice}` : "",
filters.minWidthInches !== undefined ? `width ${filters.widthComparison ?? "gte"} ${filters.minWidthInches} inches` : "",
filters.requirePrime ? "Prime delivery" : "",
filters.requireFreeDelivery ? "free delivery" : "",
filters.deliveryBy ? `delivery by ${filters.deliveryBy}` : "",
filters.sortBy === "price" ? "sort by price" : ""
].filter(Boolean);
return parts.length > 0 ? parts.join(", ") : "none";
}
function formatProduct(product: ProductSearchResult, index: number): string {
const specs = product.specs.slice(0, 3).map((spec) => `${spec.name}: ${spec.value}`).join("; ");
const lines = [
`${index}. ${product.title}`,
` Link: ${product.url}`,
` Price: ${product.price?.display ?? "unknown"}${product.unitPrice ? ` (${product.unitPrice.display})` : ""}`,
` Rating: ${product.rating ?? "unknown"} stars; reviews: ${product.reviewCount ?? "unknown"}`,
` Delivery: ${product.delivery?.display ?? "unknown"}`,
specs ? ` Specs: ${specs}` : "",
product.bullets[0] ? ` Notes: ${product.bullets.slice(0, 2).join(" ")}` : "",
product.missingFields.length > 0 ? ` Missing: ${product.missingFields.join(", ")}` : "",
product.isSponsored ? " Sponsored: yes" : ""
].filter(Boolean);
return lines.join("\n");
function escapeCell(value: string): string {
return value.replace(/\|/g, "\\|").replace(/\s+/g, " ").trim();
}
function marker(passes: boolean | undefined, enabled: boolean): string {
if (!enabled) {
return "";
}
return passes ? " OK" : " NO";
}
function widthCell(product: ProductSearchResult, filters: ProductFilters): string {
const width = extractWidthInches(product);
const passes = width !== undefined && (filters.widthComparison === "gt" ? width > (filters.minWidthInches ?? 0) : width >= (filters.minWidthInches ?? 0));
return `${formatWidthInches(width)}${marker(passes, filters.minWidthInches !== undefined)}`;
}
function primeCell(product: ProductSearchResult, filters: ProductFilters): string {
if (product.delivery?.prime) {
return `Prime${marker(true, Boolean(filters.requirePrime))}`;
}
return `not verified${marker(false, Boolean(filters.requirePrime))}`;
}
function deliveryCell(product: ProductSearchResult, filters: ProductFilters): string {
const display = product.delivery?.display ?? "unknown";
if (!filters.deliveryBy) {
return display;
}
const normalized = display.toLowerCase();
const passes = filters.deliveryBy === "today"
? /\btoday\b|same[- ]day/.test(normalized)
: filters.deliveryBy === "tomorrow" || filters.deliveryBy === "overnight"
? /\btomorrow\b|overnight|next[- ]day|one[- ]day/.test(normalized)
: normalized.includes(filters.deliveryBy.toLowerCase());
return `${display}${marker(passes, true)}`;
}
function resultTable(products: ProductSearchResult[], filters: ProductFilters): string[] {
const rows = [
"| # | Product | Price | Rating | Reviews | Width | Prime | Delivery | Link |",
"|---|---|---:|---:|---:|---:|---|---|---|",
...products.map((product, index) => [
String(index + 1),
escapeCell(product.title),
product.price?.display ?? "unknown",
`${product.rating ?? "unknown"} stars`,
product.reviewCount?.toLocaleString("en-US") ?? "unknown",
widthCell(product, filters),
primeCell(product, filters),
escapeCell(deliveryCell(product, filters)),
`[Amazon](${product.url})`
].join(" | "))
.map((row) => `| ${row} |`)
];
return rows;
}
function metadataLines(products: ProductSearchResult[]): string[] {
const lines: string[] = [];
for (const product of products) {
const notes = [
product.missingFields.length > 0 ? `missing ${product.missingFields.join(", ")}` : "",
product.isSponsored ? "sponsored" : "",
product.extractionNotes.length > 0 ? product.extractionNotes.join("; ") : ""
].filter(Boolean);
if (notes.length > 0) {
lines.push(`- ${product.title}: ${notes.join("; ")}`);
}
}
return lines;
}
export function createMarkdownReport(response: SearchProductsResponse): string {
@@ -63,7 +126,12 @@ export function createMarkdownReport(response: SearchProductsResponse): string {
`Results returned: ${response.results.length} (filtered out: ${response.filteredOutCount})`,
response.warnings.length > 0 ? `Warnings: ${response.warnings.join("; ")}` : "",
"",
...response.results.map((product, index) => formatProduct(product, index + 1))
"## Best Matches",
"",
response.results.length > 0 ? "" : "No products matched all requested filters.",
...resultTable(response.results, response.filters),
"",
...metadataLines(response.results)
].filter((line) => line !== "");
return `${lines.join("\n")}\n`;
}
+16 -5
View File
@@ -50,20 +50,31 @@ function detectChallenge(html: string): boolean {
return /robot check|enter the characters you see|captcha|automated access|access denied/i.test(html);
}
function deliveryFromText(text: string): DeliverySummary | undefined {
function deliveryFromText(text: string, primeSignal = false): DeliverySummary | undefined {
const compact = text.replace(/\s+/g, " ").trim();
const deliveryMatch = compact.match(/((?:FREE\s+)?delivery[^.]*?(?:Tomorrow|Today|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)?)/i);
const deliveryMatch = compact.match(/((?:FREE\s+)?delivery\b[^.]+)/i);
if (!deliveryMatch) {
return undefined;
return primeSignal ? { display: "Prime delivery available", prime: true } : undefined;
}
const display = deliveryMatch[1].trim();
return {
display,
free: /\bfree\b/i.test(display),
prime: /\bprime\b/i.test(compact)
prime: primeSignal || /\bprime\b/i.test(display)
};
}
function hasPrimeSignal(card: HTMLElement): boolean {
const attributeText = card.querySelectorAll("[class], [aria-label], img[alt]")
.map((node) => [
attrOf(node, "class") ?? "",
attrOf(node, "aria-label") ?? "",
attrOf(node, "alt") ?? ""
].join(" "))
.join(" ");
return /a-icon-prime|prime-logo|amazon\s+prime|\bprime\b/i.test(attributeText);
}
function firstText(card: HTMLElement, selectors: string[]): string {
for (const selector of selectors) {
const value = textOf(card.querySelector(selector));
@@ -111,7 +122,7 @@ export function extractSearchPage(html: string, currentUrl: string): SearchPageE
const ariaText = card.querySelectorAll("[aria-label]")
.map((node) => attrOf(node, "aria-label") ?? "")
.join(" ");
const delivery = deliveryFromText(allText);
const delivery = deliveryFromText(allText, hasPrimeSignal(card));
const product: ProductSearchResult = {
asin,
title,
+3
View File
@@ -15,11 +15,14 @@ export interface ProductFilters {
reviewCountComparison?: "gt" | "gte";
maxPrice?: number;
maxUnitPrice?: number;
minWidthInches?: number;
widthComparison?: "gt" | "gte";
includeKeywords: string[];
excludeKeywords: string[];
requirePrime?: boolean;
requireFreeDelivery?: boolean;
deliveryBy?: string;
sortBy?: "relevance" | "price";
}
export interface ProductSearchResult {