feat(amazon-shopping): parse filters and extract search candidates

This commit is contained in:
2026-04-15 18:31:44 -05:00
parent 8ad532545d
commit ef326896f4
15 changed files with 822 additions and 10 deletions
+134 -1
View File
@@ -8,7 +8,8 @@
"name": "amazon-shopping-scripts",
"version": "1.0.0",
"dependencies": {
"minimist": "^1.2.8"
"minimist": "^1.2.8",
"node-html-parser": "^7.1.0"
},
"devDependencies": {
"@types/minimist": "^1.2.5",
@@ -476,6 +477,107 @@
"undici-types": "~7.16.0"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
"license": "ISC"
},
"node_modules/css-select": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
"integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
"integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "BSD-2-Clause"
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"license": "BSD-2-Clause",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
"integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
"license": "BSD-2-Clause",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/esbuild": {
"version": "0.27.7",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz",
@@ -546,6 +648,15 @@
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/he": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
"license": "MIT",
"bin": {
"he": "bin/he"
}
},
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
@@ -555,6 +666,28 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/node-html-parser": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-7.1.0.tgz",
"integrity": "sha512-iJo8b2uYGT40Y8BTyy5ufL6IVbN8rbm/1QK2xffXU/1a/v3AAa0d1YAoqBNYqaS4R/HajkWIpIfdE6KcyFh1AQ==",
"license": "MIT",
"dependencies": {
"css-select": "^5.1.0",
"he": "1.2.0"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+2 -1
View File
@@ -10,7 +10,8 @@
"typecheck": "tsc --noEmit"
},
"dependencies": {
"minimist": "^1.2.8"
"minimist": "^1.2.8",
"node-html-parser": "^7.1.0"
},
"devDependencies": {
"@types/minimist": "^1.2.5",
+7 -8
View File
@@ -3,6 +3,7 @@
import minimist from "minimist";
import { fileURLToPath } from "node:url";
import { parseNaturalLanguageRequest } from "./query-parser.js";
import type { ProductFilters, SearchProductsRequest, SearchProductsResponse } from "./types.js";
export interface CliDeps {
@@ -76,12 +77,13 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest {
alias: { h: "help" }
});
const query = String(args.query ?? args._.join(" ")).trim();
if (!query) {
const rawQuery = String(args.query ?? args._.join(" ")).trim();
if (!rawQuery) {
throw new Error("A product query is required");
}
const limit = parsePositiveInteger(args.limit, "limit") ?? 15;
const natural = parseNaturalLanguageRequest(rawQuery);
const limit = parsePositiveInteger(args.limit, "limit") ?? natural.limit ?? 15;
if (limit > 30 && !args["allow-large-limit"]) {
throw new Error("Requested limits above 30 require --allow-large-limit or a batched run");
}
@@ -91,10 +93,7 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest {
throw new Error("max-search-pages must be 5 or less");
}
const filters: ProductFilters = {
includeKeywords: [],
excludeKeywords: []
};
const filters: ProductFilters = { ...natural.filters };
const minRating = parseNumber(args["min-rating"], "min-rating");
const minReviews = parsePositiveInteger(args["min-reviews"], "min-reviews");
const maxPrice = parseNumber(args["max-price"], "max-price");
@@ -108,7 +107,7 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest {
const markdown = Boolean(args.markdown);
return {
query,
query: natural.query || rawQuery,
filters,
limit,
maxSearchPages,
+112
View File
@@ -0,0 +1,112 @@
import type { MoneyValue, StarBreakdown, UnitCountExtraction } from "./types.js";
export function parseMoney(text: string | undefined | null): MoneyValue | undefined {
if (!text) {
return undefined;
}
const compact = text.replace(/\s+/g, " ").trim();
const match = compact.match(/\$\s*([0-9][0-9,]*(?:\.[0-9]{1,2})?)/);
if (!match) {
return undefined;
}
const amount = Number(match[1].replace(/,/g, ""));
if (!Number.isFinite(amount)) {
return undefined;
}
return {
amount,
currency: "USD",
display: compact
};
}
export function parseUnitPrice(text: string | undefined | null): MoneyValue | undefined {
if (!text || !(/[/]\s*\d|\$\s*\d/.test(text))) {
return undefined;
}
if (!/(\/|\bper\b|\beach\b|\bcount\b)/i.test(text)) {
return undefined;
}
return parseMoney(text);
}
export function parseRating(text: string | undefined | null): number | undefined {
if (!text) {
return undefined;
}
const match = text.match(/([0-5](?:\.[0-9])?)\s*(?:out of\s*)?5\s*stars?/i)
?? text.match(/\brated\s+([0-5](?:\.[0-9])?)/i);
if (!match) {
return undefined;
}
const rating = Number(match[1]);
return Number.isFinite(rating) ? rating : undefined;
}
export function parseReviewCount(text: string | undefined | null): number | undefined {
if (!text) {
return undefined;
}
const match = text.match(/([0-9][0-9,]*)\s*(?:ratings?|reviews?)/i);
if (!match) {
return undefined;
}
const count = Number(match[1].replace(/,/g, ""));
return Number.isInteger(count) ? count : undefined;
}
export function parseStarBreakdown(text: string | undefined | null): StarBreakdown | undefined {
if (!text) {
return undefined;
}
const breakdown: Partial<Omit<StarBreakdown, "basis">> = {};
const words: Record<string, keyof Omit<StarBreakdown, "basis">> = {
"5": "five",
"4": "four",
"3": "three",
"2": "two",
"1": "one"
};
const percentMatches = [...text.matchAll(/([1-5])\s*star\s*([0-9]{1,3})\s*%/gi)];
if (percentMatches.length === 0) {
return undefined;
}
for (const match of percentMatches) {
const key = words[match[1]];
if (key) {
breakdown[key] = Number(match[2]);
}
}
return {
...breakdown,
basis: "percent"
};
}
export function extractUnitCount(text: string | undefined | null): UnitCountExtraction | undefined {
if (!text) {
return undefined;
}
const patterns = [
{ pattern: /(\d{1,4})\s*[- ]?(?:count|ct)\b/i, confidence: "high" as const },
{ pattern: /\bpack\s+of\s+(\d{1,4})\b/i, confidence: "high" as const },
{ pattern: /\b(\d{1,4})\s*[- ]?pack\b/i, confidence: "high" as const },
{ pattern: /\bset\s+of\s+(\d{1,4})\b/i, confidence: "medium" as const },
{ pattern: /\b(\d{1,4})\s+(?:bulbs?|cables?|pieces?|pcs)\b/i, confidence: "low" as const }
];
for (const { pattern, confidence } of patterns) {
const match = text.match(pattern);
if (!match) {
continue;
}
const count = Number(match[1]);
if (Number.isInteger(count) && count > 0) {
return {
count,
confidence,
source: match[0]
};
}
}
return undefined;
}
@@ -0,0 +1,68 @@
import type { ParsedNaturalLanguageRequest, ProductFilters } from "./types.js";
function cleanQuery(text: string): string {
return text
.replace(/\b(?:that|and|with|have)\b/gi, " ")
.replace(/\s+/g, " ")
.replace(/\s+(and|or|a)$/i, "")
.trim();
}
function removeMatched(text: string, match: RegExpMatchArray | null): string {
if (!match) {
return text;
}
return text.replace(match[0], " ");
}
export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguageRequest {
let remaining = input.trim();
const filters: ProductFilters = {
includeKeywords: [],
excludeKeywords: []
};
let limit: number | undefined;
const limitMatch = remaining.match(/\b(?:return|limit|top)\s+(\d{1,3})\b/i);
if (limitMatch) {
limit = Number(limitMatch[1]);
remaining = removeMatched(remaining, limitMatch);
}
const unitPriceMatch = remaining.match(/\b(?:cost\s+)?(?:less than|under|below)\s+\$([0-9]+(?:\.[0-9]{1,2})?)\s*(?:each|per\b|\/\s*(?:count|unit|item))\b/i);
if (unitPriceMatch) {
filters.maxUnitPrice = Number(unitPriceMatch[1]);
remaining = removeMatched(remaining, unitPriceMatch);
}
const maxPriceMatch = remaining.match(/\b(?:cost\s+)?(?:less than|under|below)\s+\$([0-9]+(?:\.[0-9]{1,2})?)\b/i);
if (maxPriceMatch) {
filters.maxPrice = Number(maxPriceMatch[1]);
remaining = removeMatched(remaining, maxPriceMatch);
}
const exclusiveReviews = remaining.match(/\b(?:over|more than|above)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i);
const inclusiveReviews = remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i);
const reviewMatch = exclusiveReviews ?? inclusiveReviews;
if (reviewMatch) {
filters.minReviews = Number(reviewMatch[1].replace(/,/g, ""));
filters.reviewCountComparison = exclusiveReviews ? "gt" : "gte";
remaining = removeMatched(remaining, reviewMatch);
}
const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:review score of\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i);
const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i)
?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i);
const ratingMatch = exclusiveRating ?? inclusiveRating;
if (ratingMatch) {
filters.minRating = Number(ratingMatch[1]);
filters.ratingComparison = exclusiveRating ? "gt" : "gte";
remaining = removeMatched(remaining, ratingMatch);
}
return {
query: cleanQuery(remaining),
filters,
limit
};
}
+144
View File
@@ -0,0 +1,144 @@
import { HTMLElement, parse } from "node-html-parser";
import { parseMoney, parseRating, parseReviewCount, parseUnitPrice } from "./parsers.js";
import type { DeliverySummary, ProductSearchResult, SearchPageExtraction } from "./types.js";
function textOf(node: HTMLElement | null | undefined): string {
return node?.textContent.replace(/\s+/g, " ").trim() ?? "";
}
function attrOf(node: HTMLElement | null | undefined, name: string): string | undefined {
return node?.getAttribute(name) ?? undefined;
}
function absoluteAmazonUrl(href: string | undefined, currentUrl = "https://www.amazon.com/"): string | undefined {
if (!href) {
return undefined;
}
if (href.startsWith("https://www.amazon.com")) {
return href;
}
try {
const parsed = new URL(href, currentUrl);
if (parsed.hostname !== "www.amazon.com") {
return undefined;
}
return parsed.toString();
} catch {
return undefined;
}
}
function normalizeProductUrl(asin: string, href: string | undefined, currentUrl: string): string {
const absolute = absoluteAmazonUrl(href, currentUrl);
if (!absolute) {
return `https://www.amazon.com/dp/${asin}`;
}
try {
const url = new URL(absolute);
const match = url.pathname.match(/\/(?:dp|gp\/product)\/([A-Z0-9]{8,14})/i);
if (match) {
return `https://www.amazon.com/dp/${match[1].toUpperCase()}`;
}
} catch {
return `https://www.amazon.com/dp/${asin}`;
}
return `https://www.amazon.com/dp/${asin}`;
}
function detectChallenge(html: string): boolean {
return /robot check|enter the characters you see|captcha|automated access|access denied/i.test(html);
}
function deliveryFromText(text: string): DeliverySummary | undefined {
const compact = text.replace(/\s+/g, " ").trim();
const deliveryMatch = compact.match(/((?:FREE\s+)?delivery[^.]*?(?:Tomorrow|Today|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)?)/i);
if (!deliveryMatch) {
return undefined;
}
const display = deliveryMatch[1].trim();
return {
display,
free: /\bfree\b/i.test(display),
prime: /\bprime\b/i.test(compact)
};
}
function firstText(card: HTMLElement, selectors: string[]): string {
for (const selector of selectors) {
const value = textOf(card.querySelector(selector));
if (value) {
return value;
}
}
return "";
}
function firstUnitPriceText(card: HTMLElement): string {
for (const node of card.querySelectorAll(".a-color-secondary, .a-size-base, span")) {
const value = textOf(node);
if (parseUnitPrice(value)) {
return value;
}
}
return "";
}
export function extractSearchPage(html: string, currentUrl: string): SearchPageExtraction {
if (detectChallenge(html)) {
return {
status: "challenge",
products: [],
warnings: ["Amazon returned a challenge or blocked page; stopping without bypass."],
};
}
const root = parse(html);
const cards = root.querySelectorAll("[data-asin]")
.filter((card) => /^[A-Z0-9]{8,14}$/i.test(card.getAttribute("data-asin") ?? ""));
const products: ProductSearchResult[] = [];
for (const card of cards) {
const asin = (card.getAttribute("data-asin") ?? "").toUpperCase();
const link = card.querySelector("h2 a") ?? card.querySelector("a[href*='/dp/']") ?? card.querySelector("a[href*='/gp/product/']");
const title = textOf(link) || firstText(card, ["h2", "[data-cy='title-recipe']"]);
if (!title) {
continue;
}
const priceText = firstText(card, [".a-price .a-offscreen", ".a-price"]);
const allText = textOf(card);
const unitPriceText = firstUnitPriceText(card);
const ariaText = card.querySelectorAll("[aria-label]")
.map((node) => attrOf(node, "aria-label") ?? "")
.join(" ");
const delivery = deliveryFromText(allText);
const product: ProductSearchResult = {
asin,
title,
url: normalizeProductUrl(asin, attrOf(link, "href"), currentUrl),
imageUrl: attrOf(card.querySelector("img"), "src"),
price: parseMoney(priceText),
unitPrice: parseUnitPrice(unitPriceText),
rating: parseRating(ariaText || allText),
reviewCount: parseReviewCount(ariaText || allText),
delivery,
specs: [],
bullets: [],
isSponsored: /\bsponsored\b/i.test(allText),
matchedFilters: [],
missingFields: [],
extractionNotes: []
};
products.push(product);
}
const nextHref = attrOf(root.querySelector(".s-pagination-next[href]"), "href");
const nextPageUrl = absoluteAmazonUrl(nextHref, currentUrl);
return {
status: "ok",
products,
warnings: [],
nextPageUrl: nextPageUrl ?? undefined
};
}
+21
View File
@@ -10,7 +10,9 @@ export interface SearchProductsRequest {
export interface ProductFilters {
minRating?: number;
ratingComparison?: "gt" | "gte";
minReviews?: number;
reviewCountComparison?: "gt" | "gte";
maxPrice?: number;
maxUnitPrice?: number;
includeKeywords: string[];
@@ -82,3 +84,22 @@ export interface SearchProductsResponse {
automation: "web-automation/CloakBrowser";
};
}
export interface ParsedNaturalLanguageRequest {
query: string;
filters: ProductFilters;
limit?: number;
}
export interface UnitCountExtraction {
count: number;
confidence: "high" | "medium" | "low";
source: string;
}
export interface SearchPageExtraction {
status: "ok" | "challenge";
products: ProductSearchResult[];
warnings: string[];
nextPageUrl?: string;
}
@@ -0,0 +1,71 @@
import { access } from "node:fs/promises";
import { constants } from "node:fs";
import { dirname, join, resolve } from "node:path";
import { fileURLToPath } from "node:url";
export interface RuntimeResolverOptions {
env?: NodeJS.ProcessEnv;
homeDir?: string;
skillDir?: string;
}
export interface WebAutomationRuntime {
scriptsDir: string;
checkInstall: {
cwd: string;
command: string;
args: string[];
};
}
async function assertFile(path: string, label: string): Promise<void> {
try {
await access(path, constants.F_OK);
} catch {
throw new Error(`web-automation runtime is missing ${label}: ${path}`);
}
}
async function assertExecutableOrFile(path: string, label: string): Promise<void> {
try {
await access(path, constants.X_OK);
} catch {
await assertFile(path, label);
}
}
function defaultSkillDir(): string {
return resolve(dirname(fileURLToPath(import.meta.url)), "..");
}
export async function resolveWebAutomationRuntime(options: RuntimeResolverOptions = {}): Promise<WebAutomationRuntime> {
const env = options.env ?? process.env;
const homeDir = options.homeDir ?? process.env.HOME ?? "";
const skillDir = options.skillDir ?? defaultSkillDir();
const candidates = [
env.AMAZON_SHOPPING_WEB_AUTOMATION_DIR,
homeDir ? join(homeDir, ".openclaw", "workspace", "skills", "web-automation", "scripts") : undefined,
resolve(skillDir, "..", "web-automation", "scripts")
].filter((candidate): candidate is string => Boolean(candidate));
const errors: string[] = [];
for (const scriptsDir of candidates) {
try {
await assertFile(join(scriptsDir, "check-install.js"), "check-install.js");
await assertFile(join(scriptsDir, "package.json"), "package.json");
await assertExecutableOrFile(join(scriptsDir, "node_modules", ".bin", "tsx"), "node_modules/.bin/tsx");
return {
scriptsDir,
checkInstall: {
cwd: scriptsDir,
command: "node",
args: ["check-install.js"]
}
};
} catch (error: unknown) {
errors.push(error instanceof Error ? error.message : String(error));
}
}
throw new Error(`Unable to locate usable web-automation runtime.\n${errors.join("\n")}`);
}
+14
View File
@@ -64,6 +64,20 @@ describe("amazon-shopping CLI", () => {
assert.equal(parseCliRequest(["usb c cable", "--json", "--markdown"]).output, "both");
});
it("normalizes natural-language filters for the target request", () => {
const request = parseCliRequest([
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars",
"--dry-run"
]);
assert.equal(request.query, "100w led bulbs");
assert.equal(request.filters.maxUnitPrice, 4);
assert.equal(request.filters.minReviews, 200);
assert.equal(request.filters.reviewCountComparison, "gt");
assert.equal(request.filters.minRating, 4.5);
assert.equal(request.filters.ratingComparison, "gt");
});
it("rejects limits below one", () => {
assert.throws(
() => parseCliRequest(["usb c cable", "--limit", "0"]),
+3
View File
@@ -0,0 +1,3 @@
# Fixtures
Fixtures in this directory are hand-crafted sanitized HTML snippets. They are not live Amazon snapshots and contain no cookies, account details, delivery location, scripts, tracking identifiers, or browser profile data.
@@ -0,0 +1,23 @@
<!-- Hand-crafted sanitized fixture. Not a live Amazon snapshot. -->
<html>
<body>
<div data-component-type="s-search-result" data-asin="B0TESTLED1">
<h2><a class="a-link-normal s-line-clamp-2" href="/Bright-Daylight-Equivalent/dp/B0TESTLED1/ref=sr_1_1">Bright Daylight 100W Equivalent LED Bulbs, 50 Count</a></h2>
<span class="a-price"><span class="a-offscreen">$18.99</span></span>
<span class="a-size-base a-color-secondary">$0.38/Count</span>
<span aria-label="4.6 out of 5 stars"></span>
<a aria-label="1,234 ratings"></a>
<div class="a-row a-size-base a-color-secondary">FREE delivery Tomorrow</div>
<img class="s-image" src="https://m.media-amazon.com/images/I/test-led.jpg" />
</div>
<div data-component-type="s-search-result" data-asin="B0TESTLED2">
<span>Sponsored</span>
<h2><a href="https://www.amazon.com/gp/product/B0TESTLED2">Value LED Bulbs Soft White, Pack of 24</a></h2>
<span class="a-price"><span class="a-offscreen">$21.99</span></span>
<span aria-label="4.3 out of 5 stars"></span>
<a aria-label="543 ratings"></a>
<div>Delivery Friday</div>
</div>
<a class="s-pagination-next" href="/s?k=led+bulbs&amp;page=2">Next</a>
</body>
</html>
@@ -0,0 +1,75 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import {
extractUnitCount,
parseMoney,
parseRating,
parseReviewCount,
parseStarBreakdown,
parseUnitPrice
} from "../src/parsers.js";
describe("parsers", () => {
it("parses USD money", () => {
assert.deepEqual(parseMoney("$19.99"), { amount: 19.99, currency: "USD", display: "$19.99" });
});
it("parses rating text", () => {
assert.equal(parseRating("4.6 out of 5 stars"), 4.6);
});
it("parses review count text", () => {
assert.equal(parseReviewCount("1,234 ratings"), 1234);
});
it("parses visible star histogram percentages", () => {
assert.deepEqual(parseStarBreakdown("5 star 72% 4 star 15% 3 star 7% 2 star 3% 1 star 3%"), {
five: 72,
four: 15,
three: 7,
two: 3,
one: 3,
basis: "percent"
});
});
it("extracts high-confidence unit counts", () => {
assert.deepEqual(extractUnitCount("LED bulbs, 100 Count, daylight"), {
count: 100,
confidence: "high",
source: "100 Count"
});
assert.deepEqual(extractUnitCount("Pack of 6 USB-C cables"), {
count: 6,
confidence: "high",
source: "Pack of 6"
});
});
it("distinguishes lower-confidence unit count phrases", () => {
assert.deepEqual(extractUnitCount("Set of 8 replacement filters"), {
count: 8,
confidence: "medium",
source: "Set of 8"
});
assert.deepEqual(extractUnitCount("6 bulbs soft white"), {
count: 6,
confidence: "low",
source: "6 bulbs"
});
});
it("parses visible unit prices", () => {
assert.deepEqual(parseUnitPrice("$0.33/Count"), {
amount: 0.33,
currency: "USD",
display: "$0.33/Count"
});
});
it("parses whole-dollar and one-decimal prices", () => {
assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" });
assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" });
});
});
@@ -0,0 +1,37 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import { parseNaturalLanguageRequest } from "../src/query-parser.js";
describe("parseNaturalLanguageRequest", () => {
it("extracts the target LED bulb filters from natural language", () => {
const parsed = parseNaturalLanguageRequest(
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars"
);
assert.equal(parsed.query, "100w led bulbs");
assert.equal(parsed.filters.maxUnitPrice, 4);
assert.equal(parsed.filters.minReviews, 200);
assert.equal(parsed.filters.reviewCountComparison, "gt");
assert.equal(parsed.filters.minRating, 4.5);
assert.equal(parsed.filters.ratingComparison, "gt");
});
it("distinguishes inclusive review and rating phrasing", () => {
const parsed = parseNaturalLanguageRequest("usb c charger at least 500 reviews and 4.3 stars or better");
assert.equal(parsed.query, "usb c charger");
assert.equal(parsed.filters.minReviews, 500);
assert.equal(parsed.filters.reviewCountComparison, "gte");
assert.equal(parsed.filters.minRating, 4.3);
assert.equal(parsed.filters.ratingComparison, "gte");
});
it("extracts limit and max product price phrases", () => {
const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30");
assert.equal(parsed.query, "wireless mouse");
assert.equal(parsed.limit, 5);
assert.equal(parsed.filters.maxPrice, 30);
});
});
@@ -0,0 +1,65 @@
import assert from "node:assert/strict";
import { readFile } from "node:fs/promises";
import { join } from "node:path";
import { describe, it } from "node:test";
import { extractSearchPage } from "../src/search-page.js";
const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html");
describe("extractSearchPage", () => {
it("extracts normalized product candidates from sanitized search HTML", async () => {
const html = await readFile(fixturePath, "utf8");
const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 2);
assert.equal(extracted.products[0]?.asin, "B0TESTLED1");
assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1");
assert.equal(extracted.products[0]?.price?.amount, 18.99);
assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38);
assert.equal(extracted.products[0]?.rating, 4.6);
assert.equal(extracted.products[0]?.reviewCount, 1234);
assert.equal(extracted.products[0]?.delivery?.free, true);
assert.equal(extracted.products[0]?.isSponsored, false);
assert.equal(extracted.products[1]?.isSponsored, true);
assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2");
});
it("detects Amazon challenge pages", () => {
const extracted = extractSearchPage("<html><title>Robot Check</title><body>Enter the characters you see below</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "challenge");
assert.match(extracted.warnings[0] ?? "", /challenge/i);
assert.equal(extracted.products.length, 0);
});
it("returns ok with no products for empty or cardless pages", () => {
const extracted = extractSearchPage("<html><body>No results</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.deepEqual(extracted.products, []);
assert.equal(extracted.nextPageUrl, undefined);
});
it("skips malformed ASINs and cards without titles", () => {
const extracted = extractSearchPage(`
<div data-asin="bad"><h2><a href="/dp/bad">Bad ASIN</a></h2></div>
<div data-asin="B0VALID1234"></div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 0);
});
it("keeps candidates with missing price and records missing price later", () => {
const extracted = extractSearchPage(`
<div data-asin="B0NOPRICE1">
<h2><a href="/dp/B0NOPRICE1">No Price Product</a></h2>
</div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.products.length, 1);
assert.equal(extracted.products[0]?.price, undefined);
});
});
@@ -0,0 +1,46 @@
import assert from "node:assert/strict";
import { mkdtemp, mkdir, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { describe, it } from "node:test";
import { resolveWebAutomationRuntime } from "../src/web-automation-runtime.js";
async function createRuntime() {
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-"));
await writeFile(join(dir, "check-install.js"), "console.log('ok');\n");
await writeFile(join(dir, "package.json"), "{\"type\":\"module\"}\n");
await mkdir(join(dir, "node_modules", ".bin"), { recursive: true });
await writeFile(join(dir, "node_modules", ".bin", "tsx"), "#!/usr/bin/env node\n");
return dir;
}
describe("resolveWebAutomationRuntime", () => {
it("uses AMAZON_SHOPPING_WEB_AUTOMATION_DIR first", async () => {
const runtimeDir = await createRuntime();
const resolved = await resolveWebAutomationRuntime({
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: runtimeDir },
homeDir: "/missing-home",
skillDir: "/missing-skill"
});
assert.equal(resolved.scriptsDir, runtimeDir);
assert.deepEqual(resolved.checkInstall, {
cwd: runtimeDir,
command: "node",
args: ["check-install.js"]
});
});
it("returns a clear error when required files are missing", async () => {
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-missing-"));
await assert.rejects(
() => resolveWebAutomationRuntime({
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: dir },
homeDir: "/missing-home",
skillDir: "/missing-skill"
}),
/check-install.js/
);
});
});