From ef326896f467abfcad10bbc3561f8950578282bc Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Wed, 15 Apr 2026 18:31:44 -0500 Subject: [PATCH] feat(amazon-shopping): parse filters and extract search candidates --- skills/amazon-shopping/package-lock.json | 135 +++++++++++++++- skills/amazon-shopping/package.json | 3 +- skills/amazon-shopping/src/cli.ts | 15 +- skills/amazon-shopping/src/parsers.ts | 112 ++++++++++++++ skills/amazon-shopping/src/query-parser.ts | 68 +++++++++ skills/amazon-shopping/src/search-page.ts | 144 ++++++++++++++++++ skills/amazon-shopping/src/types.ts | 21 +++ .../src/web-automation-runtime.ts | 71 +++++++++ skills/amazon-shopping/tests/cli.test.ts | 14 ++ .../amazon-shopping/tests/fixtures/README.md | 3 + .../tests/fixtures/search-results.html | 23 +++ skills/amazon-shopping/tests/parsers.test.ts | 75 +++++++++ .../tests/query-parser.test.ts | 37 +++++ .../amazon-shopping/tests/search-page.test.ts | 65 ++++++++ .../tests/web-automation-runtime.test.ts | 46 ++++++ 15 files changed, 822 insertions(+), 10 deletions(-) create mode 100644 skills/amazon-shopping/src/parsers.ts create mode 100644 skills/amazon-shopping/src/query-parser.ts create mode 100644 skills/amazon-shopping/src/search-page.ts create mode 100644 skills/amazon-shopping/src/web-automation-runtime.ts create mode 100644 skills/amazon-shopping/tests/fixtures/README.md create mode 100644 skills/amazon-shopping/tests/fixtures/search-results.html create mode 100644 skills/amazon-shopping/tests/parsers.test.ts create mode 100644 skills/amazon-shopping/tests/query-parser.test.ts create mode 100644 skills/amazon-shopping/tests/search-page.test.ts create mode 100644 skills/amazon-shopping/tests/web-automation-runtime.test.ts diff --git a/skills/amazon-shopping/package-lock.json b/skills/amazon-shopping/package-lock.json index 866a352..595997f 100644 --- a/skills/amazon-shopping/package-lock.json +++ b/skills/amazon-shopping/package-lock.json @@ -8,7 +8,8 @@ "name": "amazon-shopping-scripts", "version": "1.0.0", "dependencies": { - "minimist": "^1.2.8" + "minimist": "^1.2.8", + "node-html-parser": "^7.1.0" }, "devDependencies": { "@types/minimist": "^1.2.5", @@ -476,6 +477,107 @@ "undici-types": "~7.16.0" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/esbuild": { "version": "0.27.7", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", @@ -546,6 +648,15 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, "node_modules/minimist": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", @@ -555,6 +666,28 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/node-html-parser": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-7.1.0.tgz", + "integrity": "sha512-iJo8b2uYGT40Y8BTyy5ufL6IVbN8rbm/1QK2xffXU/1a/v3AAa0d1YAoqBNYqaS4R/HajkWIpIfdE6KcyFh1AQ==", + "license": "MIT", + "dependencies": { + "css-select": "^5.1.0", + "he": "1.2.0" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", diff --git a/skills/amazon-shopping/package.json b/skills/amazon-shopping/package.json index 6f9c400..e39c656 100644 --- a/skills/amazon-shopping/package.json +++ b/skills/amazon-shopping/package.json @@ -10,7 +10,8 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "minimist": "^1.2.8" + "minimist": "^1.2.8", + "node-html-parser": "^7.1.0" }, "devDependencies": { "@types/minimist": "^1.2.5", diff --git a/skills/amazon-shopping/src/cli.ts b/skills/amazon-shopping/src/cli.ts index a2bd9f1..665fd2e 100644 --- a/skills/amazon-shopping/src/cli.ts +++ b/skills/amazon-shopping/src/cli.ts @@ -3,6 +3,7 @@ import minimist from "minimist"; import { fileURLToPath } from "node:url"; +import { parseNaturalLanguageRequest } from "./query-parser.js"; import type { ProductFilters, SearchProductsRequest, SearchProductsResponse } from "./types.js"; export interface CliDeps { @@ -76,12 +77,13 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest { alias: { h: "help" } }); - const query = String(args.query ?? args._.join(" ")).trim(); - if (!query) { + const rawQuery = String(args.query ?? args._.join(" ")).trim(); + if (!rawQuery) { throw new Error("A product query is required"); } - const limit = parsePositiveInteger(args.limit, "limit") ?? 15; + const natural = parseNaturalLanguageRequest(rawQuery); + const limit = parsePositiveInteger(args.limit, "limit") ?? natural.limit ?? 15; if (limit > 30 && !args["allow-large-limit"]) { throw new Error("Requested limits above 30 require --allow-large-limit or a batched run"); } @@ -91,10 +93,7 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest { throw new Error("max-search-pages must be 5 or less"); } - const filters: ProductFilters = { - includeKeywords: [], - excludeKeywords: [] - }; + const filters: ProductFilters = { ...natural.filters }; const minRating = parseNumber(args["min-rating"], "min-rating"); const minReviews = parsePositiveInteger(args["min-reviews"], "min-reviews"); const maxPrice = parseNumber(args["max-price"], "max-price"); @@ -108,7 +107,7 @@ export function parseCliRequest(argv: string[]): SearchProductsRequest { const markdown = Boolean(args.markdown); return { - query, + query: natural.query || rawQuery, filters, limit, maxSearchPages, diff --git a/skills/amazon-shopping/src/parsers.ts b/skills/amazon-shopping/src/parsers.ts new file mode 100644 index 0000000..ab7dcab --- /dev/null +++ b/skills/amazon-shopping/src/parsers.ts @@ -0,0 +1,112 @@ +import type { MoneyValue, StarBreakdown, UnitCountExtraction } from "./types.js"; + +export function parseMoney(text: string | undefined | null): MoneyValue | undefined { + if (!text) { + return undefined; + } + const compact = text.replace(/\s+/g, " ").trim(); + const match = compact.match(/\$\s*([0-9][0-9,]*(?:\.[0-9]{1,2})?)/); + if (!match) { + return undefined; + } + const amount = Number(match[1].replace(/,/g, "")); + if (!Number.isFinite(amount)) { + return undefined; + } + return { + amount, + currency: "USD", + display: compact + }; +} + +export function parseUnitPrice(text: string | undefined | null): MoneyValue | undefined { + if (!text || !(/[/]\s*\d|\$\s*\d/.test(text))) { + return undefined; + } + if (!/(\/|\bper\b|\beach\b|\bcount\b)/i.test(text)) { + return undefined; + } + return parseMoney(text); +} + +export function parseRating(text: string | undefined | null): number | undefined { + if (!text) { + return undefined; + } + const match = text.match(/([0-5](?:\.[0-9])?)\s*(?:out of\s*)?5\s*stars?/i) + ?? text.match(/\brated\s+([0-5](?:\.[0-9])?)/i); + if (!match) { + return undefined; + } + const rating = Number(match[1]); + return Number.isFinite(rating) ? rating : undefined; +} + +export function parseReviewCount(text: string | undefined | null): number | undefined { + if (!text) { + return undefined; + } + const match = text.match(/([0-9][0-9,]*)\s*(?:ratings?|reviews?)/i); + if (!match) { + return undefined; + } + const count = Number(match[1].replace(/,/g, "")); + return Number.isInteger(count) ? count : undefined; +} + +export function parseStarBreakdown(text: string | undefined | null): StarBreakdown | undefined { + if (!text) { + return undefined; + } + const breakdown: Partial> = {}; + const words: Record> = { + "5": "five", + "4": "four", + "3": "three", + "2": "two", + "1": "one" + }; + const percentMatches = [...text.matchAll(/([1-5])\s*star\s*([0-9]{1,3})\s*%/gi)]; + if (percentMatches.length === 0) { + return undefined; + } + for (const match of percentMatches) { + const key = words[match[1]]; + if (key) { + breakdown[key] = Number(match[2]); + } + } + return { + ...breakdown, + basis: "percent" + }; +} + +export function extractUnitCount(text: string | undefined | null): UnitCountExtraction | undefined { + if (!text) { + return undefined; + } + const patterns = [ + { pattern: /(\d{1,4})\s*[- ]?(?:count|ct)\b/i, confidence: "high" as const }, + { pattern: /\bpack\s+of\s+(\d{1,4})\b/i, confidence: "high" as const }, + { pattern: /\b(\d{1,4})\s*[- ]?pack\b/i, confidence: "high" as const }, + { pattern: /\bset\s+of\s+(\d{1,4})\b/i, confidence: "medium" as const }, + { pattern: /\b(\d{1,4})\s+(?:bulbs?|cables?|pieces?|pcs)\b/i, confidence: "low" as const } + ]; + for (const { pattern, confidence } of patterns) { + const match = text.match(pattern); + if (!match) { + continue; + } + const count = Number(match[1]); + if (Number.isInteger(count) && count > 0) { + return { + count, + confidence, + source: match[0] + }; + } + } + return undefined; +} diff --git a/skills/amazon-shopping/src/query-parser.ts b/skills/amazon-shopping/src/query-parser.ts new file mode 100644 index 0000000..da80e41 --- /dev/null +++ b/skills/amazon-shopping/src/query-parser.ts @@ -0,0 +1,68 @@ +import type { ParsedNaturalLanguageRequest, ProductFilters } from "./types.js"; + +function cleanQuery(text: string): string { + return text + .replace(/\b(?:that|and|with|have)\b/gi, " ") + .replace(/\s+/g, " ") + .replace(/\s+(and|or|a)$/i, "") + .trim(); +} + +function removeMatched(text: string, match: RegExpMatchArray | null): string { + if (!match) { + return text; + } + return text.replace(match[0], " "); +} + +export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguageRequest { + let remaining = input.trim(); + const filters: ProductFilters = { + includeKeywords: [], + excludeKeywords: [] + }; + let limit: number | undefined; + + const limitMatch = remaining.match(/\b(?:return|limit|top)\s+(\d{1,3})\b/i); + if (limitMatch) { + limit = Number(limitMatch[1]); + remaining = removeMatched(remaining, limitMatch); + } + + const unitPriceMatch = remaining.match(/\b(?:cost\s+)?(?:less than|under|below)\s+\$([0-9]+(?:\.[0-9]{1,2})?)\s*(?:each|per\b|\/\s*(?:count|unit|item))\b/i); + if (unitPriceMatch) { + filters.maxUnitPrice = Number(unitPriceMatch[1]); + remaining = removeMatched(remaining, unitPriceMatch); + } + + const maxPriceMatch = remaining.match(/\b(?:cost\s+)?(?:less than|under|below)\s+\$([0-9]+(?:\.[0-9]{1,2})?)\b/i); + if (maxPriceMatch) { + filters.maxPrice = Number(maxPriceMatch[1]); + remaining = removeMatched(remaining, maxPriceMatch); + } + + const exclusiveReviews = remaining.match(/\b(?:over|more than|above)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i); + const inclusiveReviews = remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-9][0-9,]*)\s*(?:reviews?|ratings?)\b/i); + const reviewMatch = exclusiveReviews ?? inclusiveReviews; + if (reviewMatch) { + filters.minReviews = Number(reviewMatch[1].replace(/,/g, "")); + filters.reviewCountComparison = exclusiveReviews ? "gt" : "gte"; + remaining = removeMatched(remaining, reviewMatch); + } + + const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:review score of\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i); + const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i) + ?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i); + const ratingMatch = exclusiveRating ?? inclusiveRating; + if (ratingMatch) { + filters.minRating = Number(ratingMatch[1]); + filters.ratingComparison = exclusiveRating ? "gt" : "gte"; + remaining = removeMatched(remaining, ratingMatch); + } + + return { + query: cleanQuery(remaining), + filters, + limit + }; +} diff --git a/skills/amazon-shopping/src/search-page.ts b/skills/amazon-shopping/src/search-page.ts new file mode 100644 index 0000000..ee0ece8 --- /dev/null +++ b/skills/amazon-shopping/src/search-page.ts @@ -0,0 +1,144 @@ +import { HTMLElement, parse } from "node-html-parser"; + +import { parseMoney, parseRating, parseReviewCount, parseUnitPrice } from "./parsers.js"; +import type { DeliverySummary, ProductSearchResult, SearchPageExtraction } from "./types.js"; + +function textOf(node: HTMLElement | null | undefined): string { + return node?.textContent.replace(/\s+/g, " ").trim() ?? ""; +} + +function attrOf(node: HTMLElement | null | undefined, name: string): string | undefined { + return node?.getAttribute(name) ?? undefined; +} + +function absoluteAmazonUrl(href: string | undefined, currentUrl = "https://www.amazon.com/"): string | undefined { + if (!href) { + return undefined; + } + if (href.startsWith("https://www.amazon.com")) { + return href; + } + try { + const parsed = new URL(href, currentUrl); + if (parsed.hostname !== "www.amazon.com") { + return undefined; + } + return parsed.toString(); + } catch { + return undefined; + } +} + +function normalizeProductUrl(asin: string, href: string | undefined, currentUrl: string): string { + const absolute = absoluteAmazonUrl(href, currentUrl); + if (!absolute) { + return `https://www.amazon.com/dp/${asin}`; + } + try { + const url = new URL(absolute); + const match = url.pathname.match(/\/(?:dp|gp\/product)\/([A-Z0-9]{8,14})/i); + if (match) { + return `https://www.amazon.com/dp/${match[1].toUpperCase()}`; + } + } catch { + return `https://www.amazon.com/dp/${asin}`; + } + return `https://www.amazon.com/dp/${asin}`; +} + +function detectChallenge(html: string): boolean { + return /robot check|enter the characters you see|captcha|automated access|access denied/i.test(html); +} + +function deliveryFromText(text: string): DeliverySummary | undefined { + const compact = text.replace(/\s+/g, " ").trim(); + const deliveryMatch = compact.match(/((?:FREE\s+)?delivery[^.]*?(?:Tomorrow|Today|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)?)/i); + if (!deliveryMatch) { + return undefined; + } + const display = deliveryMatch[1].trim(); + return { + display, + free: /\bfree\b/i.test(display), + prime: /\bprime\b/i.test(compact) + }; +} + +function firstText(card: HTMLElement, selectors: string[]): string { + for (const selector of selectors) { + const value = textOf(card.querySelector(selector)); + if (value) { + return value; + } + } + return ""; +} + +function firstUnitPriceText(card: HTMLElement): string { + for (const node of card.querySelectorAll(".a-color-secondary, .a-size-base, span")) { + const value = textOf(node); + if (parseUnitPrice(value)) { + return value; + } + } + return ""; +} + +export function extractSearchPage(html: string, currentUrl: string): SearchPageExtraction { + if (detectChallenge(html)) { + return { + status: "challenge", + products: [], + warnings: ["Amazon returned a challenge or blocked page; stopping without bypass."], + }; + } + + const root = parse(html); + const cards = root.querySelectorAll("[data-asin]") + .filter((card) => /^[A-Z0-9]{8,14}$/i.test(card.getAttribute("data-asin") ?? "")); + const products: ProductSearchResult[] = []; + + for (const card of cards) { + const asin = (card.getAttribute("data-asin") ?? "").toUpperCase(); + const link = card.querySelector("h2 a") ?? card.querySelector("a[href*='/dp/']") ?? card.querySelector("a[href*='/gp/product/']"); + const title = textOf(link) || firstText(card, ["h2", "[data-cy='title-recipe']"]); + if (!title) { + continue; + } + const priceText = firstText(card, [".a-price .a-offscreen", ".a-price"]); + const allText = textOf(card); + const unitPriceText = firstUnitPriceText(card); + const ariaText = card.querySelectorAll("[aria-label]") + .map((node) => attrOf(node, "aria-label") ?? "") + .join(" "); + const delivery = deliveryFromText(allText); + const product: ProductSearchResult = { + asin, + title, + url: normalizeProductUrl(asin, attrOf(link, "href"), currentUrl), + imageUrl: attrOf(card.querySelector("img"), "src"), + price: parseMoney(priceText), + unitPrice: parseUnitPrice(unitPriceText), + rating: parseRating(ariaText || allText), + reviewCount: parseReviewCount(ariaText || allText), + delivery, + specs: [], + bullets: [], + isSponsored: /\bsponsored\b/i.test(allText), + matchedFilters: [], + missingFields: [], + extractionNotes: [] + }; + products.push(product); + } + + const nextHref = attrOf(root.querySelector(".s-pagination-next[href]"), "href"); + const nextPageUrl = absoluteAmazonUrl(nextHref, currentUrl); + + return { + status: "ok", + products, + warnings: [], + nextPageUrl: nextPageUrl ?? undefined + }; +} diff --git a/skills/amazon-shopping/src/types.ts b/skills/amazon-shopping/src/types.ts index c86f804..258d462 100644 --- a/skills/amazon-shopping/src/types.ts +++ b/skills/amazon-shopping/src/types.ts @@ -10,7 +10,9 @@ export interface SearchProductsRequest { export interface ProductFilters { minRating?: number; + ratingComparison?: "gt" | "gte"; minReviews?: number; + reviewCountComparison?: "gt" | "gte"; maxPrice?: number; maxUnitPrice?: number; includeKeywords: string[]; @@ -82,3 +84,22 @@ export interface SearchProductsResponse { automation: "web-automation/CloakBrowser"; }; } + +export interface ParsedNaturalLanguageRequest { + query: string; + filters: ProductFilters; + limit?: number; +} + +export interface UnitCountExtraction { + count: number; + confidence: "high" | "medium" | "low"; + source: string; +} + +export interface SearchPageExtraction { + status: "ok" | "challenge"; + products: ProductSearchResult[]; + warnings: string[]; + nextPageUrl?: string; +} diff --git a/skills/amazon-shopping/src/web-automation-runtime.ts b/skills/amazon-shopping/src/web-automation-runtime.ts new file mode 100644 index 0000000..f085068 --- /dev/null +++ b/skills/amazon-shopping/src/web-automation-runtime.ts @@ -0,0 +1,71 @@ +import { access } from "node:fs/promises"; +import { constants } from "node:fs"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +export interface RuntimeResolverOptions { + env?: NodeJS.ProcessEnv; + homeDir?: string; + skillDir?: string; +} + +export interface WebAutomationRuntime { + scriptsDir: string; + checkInstall: { + cwd: string; + command: string; + args: string[]; + }; +} + +async function assertFile(path: string, label: string): Promise { + try { + await access(path, constants.F_OK); + } catch { + throw new Error(`web-automation runtime is missing ${label}: ${path}`); + } +} + +async function assertExecutableOrFile(path: string, label: string): Promise { + try { + await access(path, constants.X_OK); + } catch { + await assertFile(path, label); + } +} + +function defaultSkillDir(): string { + return resolve(dirname(fileURLToPath(import.meta.url)), ".."); +} + +export async function resolveWebAutomationRuntime(options: RuntimeResolverOptions = {}): Promise { + const env = options.env ?? process.env; + const homeDir = options.homeDir ?? process.env.HOME ?? ""; + const skillDir = options.skillDir ?? defaultSkillDir(); + const candidates = [ + env.AMAZON_SHOPPING_WEB_AUTOMATION_DIR, + homeDir ? join(homeDir, ".openclaw", "workspace", "skills", "web-automation", "scripts") : undefined, + resolve(skillDir, "..", "web-automation", "scripts") + ].filter((candidate): candidate is string => Boolean(candidate)); + + const errors: string[] = []; + for (const scriptsDir of candidates) { + try { + await assertFile(join(scriptsDir, "check-install.js"), "check-install.js"); + await assertFile(join(scriptsDir, "package.json"), "package.json"); + await assertExecutableOrFile(join(scriptsDir, "node_modules", ".bin", "tsx"), "node_modules/.bin/tsx"); + return { + scriptsDir, + checkInstall: { + cwd: scriptsDir, + command: "node", + args: ["check-install.js"] + } + }; + } catch (error: unknown) { + errors.push(error instanceof Error ? error.message : String(error)); + } + } + + throw new Error(`Unable to locate usable web-automation runtime.\n${errors.join("\n")}`); +} diff --git a/skills/amazon-shopping/tests/cli.test.ts b/skills/amazon-shopping/tests/cli.test.ts index 5a3ebf2..4e6610c 100644 --- a/skills/amazon-shopping/tests/cli.test.ts +++ b/skills/amazon-shopping/tests/cli.test.ts @@ -64,6 +64,20 @@ describe("amazon-shopping CLI", () => { assert.equal(parseCliRequest(["usb c cable", "--json", "--markdown"]).output, "both"); }); + it("normalizes natural-language filters for the target request", () => { + const request = parseCliRequest([ + "100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars", + "--dry-run" + ]); + + assert.equal(request.query, "100w led bulbs"); + assert.equal(request.filters.maxUnitPrice, 4); + assert.equal(request.filters.minReviews, 200); + assert.equal(request.filters.reviewCountComparison, "gt"); + assert.equal(request.filters.minRating, 4.5); + assert.equal(request.filters.ratingComparison, "gt"); + }); + it("rejects limits below one", () => { assert.throws( () => parseCliRequest(["usb c cable", "--limit", "0"]), diff --git a/skills/amazon-shopping/tests/fixtures/README.md b/skills/amazon-shopping/tests/fixtures/README.md new file mode 100644 index 0000000..7261407 --- /dev/null +++ b/skills/amazon-shopping/tests/fixtures/README.md @@ -0,0 +1,3 @@ +# Fixtures + +Fixtures in this directory are hand-crafted sanitized HTML snippets. They are not live Amazon snapshots and contain no cookies, account details, delivery location, scripts, tracking identifiers, or browser profile data. diff --git a/skills/amazon-shopping/tests/fixtures/search-results.html b/skills/amazon-shopping/tests/fixtures/search-results.html new file mode 100644 index 0000000..1d64819 --- /dev/null +++ b/skills/amazon-shopping/tests/fixtures/search-results.html @@ -0,0 +1,23 @@ + + + +
+

Bright Daylight 100W Equivalent LED Bulbs, 50 Count

+ $18.99 + $0.38/Count + + +
FREE delivery Tomorrow
+ +
+
+ Sponsored +

Value LED Bulbs Soft White, Pack of 24

+ $21.99 + + +
Delivery Friday
+
+ Next + + diff --git a/skills/amazon-shopping/tests/parsers.test.ts b/skills/amazon-shopping/tests/parsers.test.ts new file mode 100644 index 0000000..3daea47 --- /dev/null +++ b/skills/amazon-shopping/tests/parsers.test.ts @@ -0,0 +1,75 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { + extractUnitCount, + parseMoney, + parseRating, + parseReviewCount, + parseStarBreakdown, + parseUnitPrice +} from "../src/parsers.js"; + +describe("parsers", () => { + it("parses USD money", () => { + assert.deepEqual(parseMoney("$19.99"), { amount: 19.99, currency: "USD", display: "$19.99" }); + }); + + it("parses rating text", () => { + assert.equal(parseRating("4.6 out of 5 stars"), 4.6); + }); + + it("parses review count text", () => { + assert.equal(parseReviewCount("1,234 ratings"), 1234); + }); + + it("parses visible star histogram percentages", () => { + assert.deepEqual(parseStarBreakdown("5 star 72% 4 star 15% 3 star 7% 2 star 3% 1 star 3%"), { + five: 72, + four: 15, + three: 7, + two: 3, + one: 3, + basis: "percent" + }); + }); + + it("extracts high-confidence unit counts", () => { + assert.deepEqual(extractUnitCount("LED bulbs, 100 Count, daylight"), { + count: 100, + confidence: "high", + source: "100 Count" + }); + assert.deepEqual(extractUnitCount("Pack of 6 USB-C cables"), { + count: 6, + confidence: "high", + source: "Pack of 6" + }); + }); + + it("distinguishes lower-confidence unit count phrases", () => { + assert.deepEqual(extractUnitCount("Set of 8 replacement filters"), { + count: 8, + confidence: "medium", + source: "Set of 8" + }); + assert.deepEqual(extractUnitCount("6 bulbs soft white"), { + count: 6, + confidence: "low", + source: "6 bulbs" + }); + }); + + it("parses visible unit prices", () => { + assert.deepEqual(parseUnitPrice("$0.33/Count"), { + amount: 0.33, + currency: "USD", + display: "$0.33/Count" + }); + }); + + it("parses whole-dollar and one-decimal prices", () => { + assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" }); + assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" }); + }); +}); diff --git a/skills/amazon-shopping/tests/query-parser.test.ts b/skills/amazon-shopping/tests/query-parser.test.ts new file mode 100644 index 0000000..7088018 --- /dev/null +++ b/skills/amazon-shopping/tests/query-parser.test.ts @@ -0,0 +1,37 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { parseNaturalLanguageRequest } from "../src/query-parser.js"; + +describe("parseNaturalLanguageRequest", () => { + it("extracts the target LED bulb filters from natural language", () => { + const parsed = parseNaturalLanguageRequest( + "100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars" + ); + + assert.equal(parsed.query, "100w led bulbs"); + assert.equal(parsed.filters.maxUnitPrice, 4); + assert.equal(parsed.filters.minReviews, 200); + assert.equal(parsed.filters.reviewCountComparison, "gt"); + assert.equal(parsed.filters.minRating, 4.5); + assert.equal(parsed.filters.ratingComparison, "gt"); + }); + + it("distinguishes inclusive review and rating phrasing", () => { + const parsed = parseNaturalLanguageRequest("usb c charger at least 500 reviews and 4.3 stars or better"); + + assert.equal(parsed.query, "usb c charger"); + assert.equal(parsed.filters.minReviews, 500); + assert.equal(parsed.filters.reviewCountComparison, "gte"); + assert.equal(parsed.filters.minRating, 4.3); + assert.equal(parsed.filters.ratingComparison, "gte"); + }); + + it("extracts limit and max product price phrases", () => { + const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30"); + + assert.equal(parsed.query, "wireless mouse"); + assert.equal(parsed.limit, 5); + assert.equal(parsed.filters.maxPrice, 30); + }); +}); diff --git a/skills/amazon-shopping/tests/search-page.test.ts b/skills/amazon-shopping/tests/search-page.test.ts new file mode 100644 index 0000000..89688cd --- /dev/null +++ b/skills/amazon-shopping/tests/search-page.test.ts @@ -0,0 +1,65 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; +import { describe, it } from "node:test"; + +import { extractSearchPage } from "../src/search-page.js"; + +const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html"); + +describe("extractSearchPage", () => { + it("extracts normalized product candidates from sanitized search HTML", async () => { + const html = await readFile(fixturePath, "utf8"); + const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs"); + + assert.equal(extracted.status, "ok"); + assert.equal(extracted.products.length, 2); + assert.equal(extracted.products[0]?.asin, "B0TESTLED1"); + assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1"); + assert.equal(extracted.products[0]?.price?.amount, 18.99); + assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38); + assert.equal(extracted.products[0]?.rating, 4.6); + assert.equal(extracted.products[0]?.reviewCount, 1234); + assert.equal(extracted.products[0]?.delivery?.free, true); + assert.equal(extracted.products[0]?.isSponsored, false); + assert.equal(extracted.products[1]?.isSponsored, true); + assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2"); + }); + + it("detects Amazon challenge pages", () => { + const extracted = extractSearchPage("Robot CheckEnter the characters you see below", "https://www.amazon.com/s?k=x"); + + assert.equal(extracted.status, "challenge"); + assert.match(extracted.warnings[0] ?? "", /challenge/i); + assert.equal(extracted.products.length, 0); + }); + + it("returns ok with no products for empty or cardless pages", () => { + const extracted = extractSearchPage("No results", "https://www.amazon.com/s?k=x"); + + assert.equal(extracted.status, "ok"); + assert.deepEqual(extracted.products, []); + assert.equal(extracted.nextPageUrl, undefined); + }); + + it("skips malformed ASINs and cards without titles", () => { + const extracted = extractSearchPage(` + +
+ `, "https://www.amazon.com/s?k=x"); + + assert.equal(extracted.status, "ok"); + assert.equal(extracted.products.length, 0); + }); + + it("keeps candidates with missing price and records missing price later", () => { + const extracted = extractSearchPage(` + + `, "https://www.amazon.com/s?k=x"); + + assert.equal(extracted.products.length, 1); + assert.equal(extracted.products[0]?.price, undefined); + }); +}); diff --git a/skills/amazon-shopping/tests/web-automation-runtime.test.ts b/skills/amazon-shopping/tests/web-automation-runtime.test.ts new file mode 100644 index 0000000..d46ad97 --- /dev/null +++ b/skills/amazon-shopping/tests/web-automation-runtime.test.ts @@ -0,0 +1,46 @@ +import assert from "node:assert/strict"; +import { mkdtemp, mkdir, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, it } from "node:test"; + +import { resolveWebAutomationRuntime } from "../src/web-automation-runtime.js"; + +async function createRuntime() { + const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-")); + await writeFile(join(dir, "check-install.js"), "console.log('ok');\n"); + await writeFile(join(dir, "package.json"), "{\"type\":\"module\"}\n"); + await mkdir(join(dir, "node_modules", ".bin"), { recursive: true }); + await writeFile(join(dir, "node_modules", ".bin", "tsx"), "#!/usr/bin/env node\n"); + return dir; +} + +describe("resolveWebAutomationRuntime", () => { + it("uses AMAZON_SHOPPING_WEB_AUTOMATION_DIR first", async () => { + const runtimeDir = await createRuntime(); + const resolved = await resolveWebAutomationRuntime({ + env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: runtimeDir }, + homeDir: "/missing-home", + skillDir: "/missing-skill" + }); + + assert.equal(resolved.scriptsDir, runtimeDir); + assert.deepEqual(resolved.checkInstall, { + cwd: runtimeDir, + command: "node", + args: ["check-install.js"] + }); + }); + + it("returns a clear error when required files are missing", async () => { + const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-missing-")); + await assert.rejects( + () => resolveWebAutomationRuntime({ + env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: dir }, + homeDir: "/missing-home", + skillDir: "/missing-skill" + }), + /check-install.js/ + ); + }); +});