diff --git a/skills/amazon-shopping/src/browser.ts b/skills/amazon-shopping/src/browser.ts new file mode 100644 index 0000000..a07db5a --- /dev/null +++ b/skills/amazon-shopping/src/browser.ts @@ -0,0 +1,228 @@ +import { execFile } from "node:child_process"; +import { pathToFileURL } from "node:url"; +import { join } from "node:path"; +import { promisify } from "node:util"; + +import { extractDetailPage } from "./detail-page.js"; +import { applyFiltersAndLimit } from "./filters.js"; +import { createResponse } from "./report.js"; +import { extractSearchPage } from "./search-page.js"; +import type { ProductSearchResult, SearchProductsRequest, SearchProductsResponse } from "./types.js"; +import { resolveWebAutomationRuntime } from "./web-automation-runtime.js"; + +const execFileAsync = promisify(execFile); +const AMAZON_ROOT = "https://www.amazon.com"; +const DEFAULT_WAIT_MS = 4500; + +export type HttpClassification = "ok" | "retryable" | "challenge"; + +interface BrowserDeps { + fetchText?: (url: string) => Promise; + sleep?: (ms: number) => Promise; + now?: () => Date; +} + +export function plannedAmazonPaths(asins: string[]): string[] { + return [ + "/s", + ...asins.flatMap((asin) => [`/dp/${asin}`, `/gp/product/${asin}`]) + ]; +} + +export function classifyHttpStatus(status: number | null | undefined): HttpClassification { + if (status === 429 || status === 503) return "retryable"; + if (status === 401 || status === 403) return "challenge"; + return "ok"; +} + +export function isPathAllowedByRobots(robots: string, userAgent: string, path: string): boolean { + const groups: Array<{ agents: string[]; disallows: string[] }> = []; + let current: { agents: string[]; disallows: string[] } | undefined; + let hasDirectives = false; + + const lines = robots.split(/\r?\n/); + + for (const rawLine of lines) { + const line = rawLine.replace(/#.*/, "").trim(); + if (!line) continue; + const [rawKey, ...rest] = line.split(":"); + const key = rawKey.trim().toLowerCase(); + const value = rest.join(":").trim(); + + if (key === "user-agent") { + if (!current || hasDirectives) { + current = { agents: [], disallows: [] }; + groups.push(current); + hasDirectives = false; + } + current.agents.push(value.toLowerCase()); + continue; + } + + if (key === "disallow") { + hasDirectives = true; + if (current && value) { + current.disallows.push(value); + } + } + } + + const normalizedAgent = userAgent.toLowerCase(); + const exactGroups = groups.filter((group) => group.agents.includes(normalizedAgent)); + const matchedGroups = exactGroups.length > 0 ? exactGroups : groups.filter((group) => group.agents.includes("*")); + const disallows = matchedGroups.flatMap((group) => group.disallows); + + return !disallows.some((rule) => path.startsWith(rule)); +} + +async function defaultFetchText(url: string): Promise { + const response = await fetch(url); + return response.text(); +} + +async function checkRobots(paths: string[], deps: BrowserDeps): Promise { + const warnings: string[] = []; + const robots = await (deps.fetchText ?? defaultFetchText)(`${AMAZON_ROOT}/robots.txt`); + for (const path of paths) { + if (!isPathAllowedByRobots(robots, "*", path)) { + warnings.push(`Amazon robots directives disallow planned path: ${path}`); + } + } + return warnings; +} + +async function loadCloakBrowser(runtimeDir: string): Promise<{ + ensureBinary?: () => Promise; + launchContext: (options: Record) => Promise; +}> { + const moduleUrl = pathToFileURL(join(runtimeDir, "node_modules", "cloakbrowser", "dist", "index.js")).toString(); + return import(moduleUrl) as Promise; +} + +async function checkRuntime(): Promise { + const runtime = await resolveWebAutomationRuntime(); + await execFileAsync(runtime.checkInstall.command, runtime.checkInstall.args, { cwd: runtime.checkInstall.cwd }); + return runtime.scriptsDir; +} + +function searchUrl(query: string, pageNumber: number): string { + const url = new URL("/s", AMAZON_ROOT); + url.searchParams.set("k", query); + if (pageNumber > 1) { + url.searchParams.set("page", String(pageNumber)); + } + return url.toString(); +} + +async function pageHtml(page: any, url: string, deps: BrowserDeps): Promise<{ html: string; status: number | null }> { + let lastStatus: number | null = null; + for (let attempt = 0; attempt < 3; attempt += 1) { + const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45000 }); + await page.waitForTimeout?.(DEFAULT_WAIT_MS); + lastStatus = response?.status?.() ?? null; + if (classifyHttpStatus(lastStatus) !== "retryable") { + return { + html: await page.content(), + status: lastStatus + }; + } + await (deps.sleep ?? ((ms) => new Promise((resolve) => setTimeout(resolve, ms))))((2 ** attempt) * 1000 + Math.floor(Math.random() * 500)); + } + return { + html: await page.content(), + status: lastStatus + }; +} + +async function enrichDetails(page: any, products: ProductSearchResult[], deps: BrowserDeps): Promise { + const enriched: ProductSearchResult[] = []; + for (const product of products) { + await (deps.sleep ?? ((ms) => new Promise((resolve) => setTimeout(resolve, ms))))(1500 + Math.floor(Math.random() * 1500)); + const loaded = await pageHtml(page, product.url, deps); + const classification = classifyHttpStatus(loaded.status); + if (classification === "challenge") { + enriched.push({ + ...product, + extractionNotes: [...product.extractionNotes, "Detail page returned a challenge/block status."] + }); + continue; + } + enriched.push(extractDetailPage(loaded.html, product)); + } + return enriched; +} + +export async function searchProducts(request: SearchProductsRequest, deps: BrowserDeps = {}): Promise { + const warnings: string[] = []; + const robotsWarnings = await checkRobots(plannedAmazonPaths([]), deps); + if (robotsWarnings.length > 0) { + return createResponse({ + query: request.query, + filters: request.filters, + limit: request.limit, + maxSearchPages: request.maxSearchPages, + results: [], + filteredOutCount: 0, + warnings: robotsWarnings, + now: deps.now + }); + } + + const runtimeDir = await checkRuntime(); + const cloak = await loadCloakBrowser(runtimeDir); + await cloak.ensureBinary?.(); + const context = await cloak.launchContext({ + headless: process.env.CLOAKBROWSER_HEADLESS !== "false", + locale: "en-US", + viewport: { width: 1440, height: 900 }, + humanize: true + }); + const page = await context.newPage(); + + try { + const candidates: ProductSearchResult[] = []; + let nextUrl: string | undefined = searchUrl(request.query, 1); + for (let pageNumber = 1; pageNumber <= request.maxSearchPages && nextUrl; pageNumber += 1) { + const loaded = await pageHtml(page, nextUrl, deps); + const classification = classifyHttpStatus(loaded.status); + if (classification === "challenge" || classification === "retryable") { + warnings.push(`Amazon returned status ${loaded.status}; stopping without bypass.`); + break; + } + const extracted = extractSearchPage(loaded.html, nextUrl); + warnings.push(...extracted.warnings); + if (extracted.status === "challenge") { + break; + } + candidates.push(...extracted.products); + if (candidates.length >= request.limit * 3) { + break; + } + nextUrl = extracted.nextPageUrl ?? (pageNumber + 1 <= request.maxSearchPages ? searchUrl(request.query, pageNumber + 1) : undefined); + } + + let detailCandidates = candidates; + if (!request.skipDetails) { + const detailPaths = plannedAmazonPaths(candidates.map((candidate) => candidate.asin)).filter((path) => path !== "/s"); + const detailRobotsWarnings = await checkRobots(detailPaths, deps); + if (detailRobotsWarnings.length > 0) { + warnings.push(...detailRobotsWarnings, "Detail enrichment skipped because robots directives disallow at least one planned detail path."); + } else { + detailCandidates = await enrichDetails(page, candidates.slice(0, request.limit * 3), deps); + } + } + const filtered = applyFiltersAndLimit(detailCandidates, request.filters, request.limit); + return createResponse({ + query: request.query, + filters: request.filters, + limit: request.limit, + maxSearchPages: request.maxSearchPages, + results: filtered.results, + filteredOutCount: filtered.filteredOutCount, + warnings, + now: deps.now + }); + } finally { + await context.close(); + } +} diff --git a/skills/amazon-shopping/src/cli.ts b/skills/amazon-shopping/src/cli.ts index 665fd2e..cf1331c 100644 --- a/skills/amazon-shopping/src/cli.ts +++ b/skills/amazon-shopping/src/cli.ts @@ -3,7 +3,9 @@ import minimist from "minimist"; import { fileURLToPath } from "node:url"; +import { searchProducts } from "./browser.js"; import { parseNaturalLanguageRequest } from "./query-parser.js"; +import { createMarkdownReport } from "./report.js"; import type { ProductFilters, SearchProductsRequest, SearchProductsResponse } from "./types.js"; export interface CliDeps { @@ -138,7 +140,19 @@ async function defaultSearchProducts(request: SearchProductsRequest, deps: CliDe if (request.dryRun) { return createDryRunResponse(request, deps.now ?? (() => new Date())); } - throw new Error("Live Amazon search is not implemented yet. Use --dry-run until browser orchestration is installed."); + return searchProducts(request, { now: deps.now }); +} + +function writeResponse(response: SearchProductsResponse, output: SearchProductsRequest["output"], deps: CliDeps): void { + if (output === "markdown") { + deps.stdout.write(createMarkdownReport(response)); + return; + } + if (output === "both") { + deps.stdout.write(`${JSON.stringify(response, null, 2)}\n\n${createMarkdownReport(response)}`); + return; + } + deps.stdout.write(`${JSON.stringify(response, null, 2)}\n`); } export async function runCli( @@ -156,7 +170,7 @@ export async function runCli( const response = deps.searchProducts ? await deps.searchProducts(request) : await defaultSearchProducts(request, deps); - deps.stdout.write(`${JSON.stringify(response, null, 2)}\n`); + writeResponse(response, request.output, deps); return 0; } catch (error: unknown) { const message = error instanceof Error ? error.message : String(error); diff --git a/skills/amazon-shopping/src/detail-page.ts b/skills/amazon-shopping/src/detail-page.ts new file mode 100644 index 0000000..bc66726 --- /dev/null +++ b/skills/amazon-shopping/src/detail-page.ts @@ -0,0 +1,133 @@ +import { HTMLElement, parse } from "node-html-parser"; + +import { parseMoney, parseRating, parseReviewCount, parseStarBreakdown } from "./parsers.js"; +import type { DeliverySummary, ProductSearchResult, ProductSpec } from "./types.js"; + +function textOf(node: HTMLElement | null | undefined): string { + return cleanText(node?.textContent ?? ""); +} + +function attrOf(node: HTMLElement | null | undefined, name: string): string { + return cleanText(node?.getAttribute(name) ?? ""); +} + +function cleanText(text: string): string { + return text + .replace(/\s+/g, " ") + .replace(/\s*\{".*$/g, "") + .trim(); +} + +function isScriptLike(text: string): boolean { + return /\(function\s*\(|window\.|P\.when|ue\.count|tracking\(\)|logShoppableMetrics|buying options|add to cart/i.test(text); +} + +function firstText(root: HTMLElement, selectors: string[]): string { + for (const selector of selectors) { + const text = textOf(root.querySelector(selector)); + if (text) { + return text; + } + } + return ""; +} + +function extractBullets(root: HTMLElement): string[] { + const spanBullets = root.querySelectorAll("#feature-bullets li span") + .map((node) => textOf(node)) + .filter((text) => text && !/make sure this fits/i.test(text)); + if (spanBullets.length > 0) { + return spanBullets; + } + return root.querySelectorAll("#feature-bullets li") + .map((node) => textOf(node)) + .filter((text) => text && !/make sure this fits/i.test(text)); +} + +function extractSpecs(root: HTMLElement): ProductSpec[] { + const specs: ProductSpec[] = []; + const seen = new Set(); + const excludedNames = new Set(["customer reviews"]); + for (const row of root.querySelectorAll("tr")) { + const cells = row.querySelectorAll("th,td").map((cell) => textOf(cell)).filter(Boolean); + if (cells.length >= 2) { + const name = cells[0]; + const value = cells.slice(1).join(" "); + const key = name.toLowerCase(); + if (seen.has(key) || excludedNames.has(key) || isScriptLike(name) || isScriptLike(value)) { + continue; + } + seen.add(key); + specs.push({ name, value }); + } + } + return specs; +} + +function extractHistogramText(root: HTMLElement): string { + const rows = root.querySelectorAll("#histogramTable tr, [aria-label*='star'] tr"); + const parts: string[] = []; + for (const row of rows) { + const cells = row.querySelectorAll("th,td").map((cell) => textOf(cell)).filter(Boolean); + if (cells.length >= 2) { + parts.push(`${cells[0]} ${cells[1]}`); + } + } + return parts.join(" "); +} + +function deliveryFromText(text: string): DeliverySummary | undefined { + const display = text.replace(/\s+/g, " ").trim(); + if (!display) { + return undefined; + } + return { + display, + free: /\bfree\b/i.test(display), + prime: /\bprime\b/i.test(display) + }; +} + +export function extractDetailPage(html: string, base: ProductSearchResult): ProductSearchResult { + const root = parse(html); + const title = firstText(root, ["#productTitle", "h1"]) || base.title; + const priceText = firstText(root, [ + "#corePriceDisplay_desktop_feature_div .a-offscreen", + ".a-price .a-offscreen", + ".a-price" + ]); + const deliveryText = firstText(root, [ + "#mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE", + "#deliveryBlockMessage", + "[data-csa-c-delivery-price]" + ]); + const availability = firstText(root, ["#availability", "#availabilityInsideBuyBox_feature_div"]); + const seller = firstText(root, ["#merchant-info", "#sellerProfileTriggerId"]); + const ratingText = attrOf(root.querySelector("#acrPopover"), "title") || textOf(root.querySelector("#acrPopover")); + const reviewText = textOf(root.querySelector("#acrCustomerReviewText")); + const histogram = parseStarBreakdown(extractHistogramText(root)); + + const product: ProductSearchResult = { + ...base, + title, + price: parseMoney(priceText) ?? base.price, + rating: parseRating(ratingText) ?? base.rating, + reviewCount: parseReviewCount(reviewText) ?? base.reviewCount, + delivery: deliveryFromText(deliveryText) ?? base.delivery, + availability: availability || base.availability, + seller: seller || base.seller, + bullets: extractBullets(root), + specs: extractSpecs(root), + starBreakdown: histogram ?? base.starBreakdown, + missingFields: [...base.missingFields], + extractionNotes: [...base.extractionNotes] + }; + + for (const field of ["price", "delivery", "rating", "reviewCount", "starBreakdown"] as const) { + if (product[field] === undefined && !product.missingFields.includes(field)) { + product.missingFields.push(field); + } + } + + return product; +} diff --git a/skills/amazon-shopping/src/filters.ts b/skills/amazon-shopping/src/filters.ts new file mode 100644 index 0000000..00a60bb --- /dev/null +++ b/skills/amazon-shopping/src/filters.ts @@ -0,0 +1,86 @@ +import type { FilteredProducts, ProductFilters, ProductSearchResult } from "./types.js"; + +function passesMin(value: number | undefined, threshold: number, comparison: "gt" | "gte" | undefined): boolean { + if (value === undefined) { + return false; + } + return comparison === "gt" ? value > threshold : value >= threshold; +} + +function filterReasons(product: ProductSearchResult, filters: ProductFilters): string[] { + const reasons: string[] = []; + if (filters.minRating !== undefined && !passesMin(product.rating, filters.minRating, filters.ratingComparison)) { + reasons.push(product.rating === undefined ? "rating unknown" : `rating ${product.rating} below filter`); + } + if (filters.minReviews !== undefined && !passesMin(product.reviewCount, filters.minReviews, filters.reviewCountComparison)) { + reasons.push(product.reviewCount === undefined ? "review count unknown" : `review count ${product.reviewCount} below filter`); + } + if (filters.maxPrice !== undefined) { + if (!product.price) { + reasons.push("price unknown"); + } else if (product.price.amount > filters.maxPrice) { + reasons.push(`price ${product.price.display} above filter`); + } + } + if (filters.maxUnitPrice !== undefined) { + if (!product.unitPrice) { + reasons.push("unit price unknown"); + } else if (product.unitPrice.amount > filters.maxUnitPrice) { + reasons.push(`unit price ${product.unitPrice.display} above filter`); + } + } + if (filters.requirePrime && !product.delivery?.prime) { + reasons.push("Prime delivery not verified"); + } + if (filters.requireFreeDelivery && !product.delivery?.free) { + reasons.push("free delivery not verified"); + } + return reasons; +} + +function rankProducts(a: ProductSearchResult, b: ProductSearchResult): number { + const ratingDiff = (b.rating ?? -1) - (a.rating ?? -1); + if (ratingDiff !== 0) return ratingDiff; + const reviewDiff = (b.reviewCount ?? -1) - (a.reviewCount ?? -1); + if (reviewDiff !== 0) return reviewDiff; + return (a.price?.amount ?? Number.POSITIVE_INFINITY) - (b.price?.amount ?? Number.POSITIVE_INFINITY); +} + +export function applyFiltersAndLimit( + products: ProductSearchResult[], + filters: ProductFilters, + limit: number +): FilteredProducts { + const filteredOutReasons: Record = {}; + const uniqueProducts = new Map(); + for (const product of products) { + if (!uniqueProducts.has(product.asin)) { + uniqueProducts.set(product.asin, product); + } + } + const passing: ProductSearchResult[] = []; + + for (const product of uniqueProducts.values()) { + const reasons = filterReasons(product, filters); + if (reasons.length > 0) { + filteredOutReasons[product.asin] = reasons; + continue; + } + passing.push({ + ...product, + matchedFilters: [ + ...product.matchedFilters, + ...(filters.minRating !== undefined ? [`rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}`] : []), + ...(filters.minReviews !== undefined ? [`reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}`] : []), + ...(filters.maxPrice !== undefined ? [`price <= ${filters.maxPrice}`] : []), + ...(filters.maxUnitPrice !== undefined ? [`unit price <= ${filters.maxUnitPrice}`] : []) + ] + }); + } + + return { + results: passing.sort(rankProducts).slice(0, limit), + filteredOutCount: uniqueProducts.size - passing.length, + filteredOutReasons + }; +} diff --git a/skills/amazon-shopping/src/parsers.ts b/skills/amazon-shopping/src/parsers.ts index ab7dcab..1c37183 100644 --- a/skills/amazon-shopping/src/parsers.ts +++ b/skills/amazon-shopping/src/parsers.ts @@ -21,13 +21,19 @@ export function parseMoney(text: string | undefined | null): MoneyValue | undefi } export function parseUnitPrice(text: string | undefined | null): MoneyValue | undefined { - if (!text || !(/[/]\s*\d|\$\s*\d/.test(text))) { + if (!text) { return undefined; } - if (!/(\/|\bper\b|\beach\b|\bcount\b)/i.test(text)) { + const compact = text.replace(/\s+/g, " ").trim(); + const unitMatch = compact.match(/(\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)(?:\s*\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)?\s*(?:\/|\bper\b\s*)\s*(?:count|unit|item|piece|pack|each)\b/i) + ?? compact.match(/(\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)\s*(?:each)\b/i); + if (!unitMatch) { return undefined; } - return parseMoney(text); + const display = unitMatch[0] + .replace(/\$\s*([0-9][0-9,]*(?:\.[0-9]{1,2})?)\s*\$\s*\1/i, "$$$1") + .replace(/\s+/g, ""); + return parseMoney(display); } export function parseRating(text: string | undefined | null): number | undefined { diff --git a/skills/amazon-shopping/src/query-parser.ts b/skills/amazon-shopping/src/query-parser.ts index da80e41..ed9be69 100644 --- a/skills/amazon-shopping/src/query-parser.ts +++ b/skills/amazon-shopping/src/query-parser.ts @@ -50,7 +50,7 @@ export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguag remaining = removeMatched(remaining, reviewMatch); } - const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:review score of\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i); + const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:(?:review score|rating)\s+of\s+|rating\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i); const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i) ?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i); const ratingMatch = exclusiveRating ?? inclusiveRating; diff --git a/skills/amazon-shopping/src/report.ts b/skills/amazon-shopping/src/report.ts new file mode 100644 index 0000000..9c7f8b5 --- /dev/null +++ b/skills/amazon-shopping/src/report.ts @@ -0,0 +1,69 @@ +import type { ProductFilters, ProductSearchResult, SearchProductsResponse } from "./types.js"; + +export interface ResponseInput { + query: string; + filters: ProductFilters; + limit: number; + maxSearchPages: number; + results: ProductSearchResult[]; + filteredOutCount: number; + warnings: string[]; + now?: () => Date; +} + +export function createResponse(input: ResponseInput): SearchProductsResponse { + return { + query: input.query, + filters: input.filters, + limit: input.limit, + maxSearchPages: input.maxSearchPages, + results: input.results, + filteredOutCount: input.filteredOutCount, + warnings: input.warnings, + source: { + site: "amazon.com", + scrapedAt: (input.now ?? (() => new Date()))().toISOString(), + automation: "web-automation/CloakBrowser" + } + }; +} + +function formatFilters(filters: ProductFilters): string { + const parts = [ + filters.minRating !== undefined ? `rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}` : "", + filters.minReviews !== undefined ? `reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}` : "", + filters.maxPrice !== undefined ? `price <= $${filters.maxPrice}` : "", + filters.maxUnitPrice !== undefined ? `unit price <= $${filters.maxUnitPrice}` : "" + ].filter(Boolean); + return parts.length > 0 ? parts.join(", ") : "none"; +} + +function formatProduct(product: ProductSearchResult, index: number): string { + const specs = product.specs.slice(0, 3).map((spec) => `${spec.name}: ${spec.value}`).join("; "); + const lines = [ + `${index}. ${product.title}`, + ` Link: ${product.url}`, + ` Price: ${product.price?.display ?? "unknown"}${product.unitPrice ? ` (${product.unitPrice.display})` : ""}`, + ` Rating: ${product.rating ?? "unknown"} stars; reviews: ${product.reviewCount ?? "unknown"}`, + ` Delivery: ${product.delivery?.display ?? "unknown"}`, + specs ? ` Specs: ${specs}` : "", + product.bullets[0] ? ` Notes: ${product.bullets.slice(0, 2).join(" ")}` : "", + product.missingFields.length > 0 ? ` Missing: ${product.missingFields.join(", ")}` : "", + product.isSponsored ? " Sponsored: yes" : "" + ].filter(Boolean); + return lines.join("\n"); +} + +export function createMarkdownReport(response: SearchProductsResponse): string { + const lines = [ + `# Amazon Shopping Results`, + "", + `Query: ${response.query}`, + `Filters: ${formatFilters(response.filters)}`, + `Results returned: ${response.results.length} (filtered out: ${response.filteredOutCount})`, + response.warnings.length > 0 ? `Warnings: ${response.warnings.join("; ")}` : "", + "", + ...response.results.map((product, index) => formatProduct(product, index + 1)) + ].filter((line) => line !== ""); + return `${lines.join("\n")}\n`; +} diff --git a/skills/amazon-shopping/src/types.ts b/skills/amazon-shopping/src/types.ts index 258d462..14eff23 100644 --- a/skills/amazon-shopping/src/types.ts +++ b/skills/amazon-shopping/src/types.ts @@ -103,3 +103,9 @@ export interface SearchPageExtraction { warnings: string[]; nextPageUrl?: string; } + +export interface FilteredProducts { + results: ProductSearchResult[]; + filteredOutCount: number; + filteredOutReasons: Record; +} diff --git a/skills/amazon-shopping/tests/browser.test.ts b/skills/amazon-shopping/tests/browser.test.ts new file mode 100644 index 0000000..acc5742 --- /dev/null +++ b/skills/amazon-shopping/tests/browser.test.ts @@ -0,0 +1,43 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { classifyHttpStatus, isPathAllowedByRobots, plannedAmazonPaths } from "../src/browser.js"; + +describe("browser compliance helpers", () => { + it("plans only search and product-detail paths", () => { + assert.deepEqual(plannedAmazonPaths(["B0TEST0001"]), ["/s", "/dp/B0TEST0001", "/gp/product/B0TEST0001"]); + }); + + it("honors robots disallow rules for planned paths", () => { + const robots = ` +User-agent: * +Disallow: /cart +Disallow: /product-reviews +Disallow: /dp/private +`; + + assert.equal(isPathAllowedByRobots(robots, "*", "/s"), true); + assert.equal(isPathAllowedByRobots(robots, "*", "/product-reviews/B0TEST0001"), false); + assert.equal(isPathAllowedByRobots(robots, "*", "/dp/private/B0TEST0001"), false); + }); + + it("does not leak disallow rules from other user-agent groups", () => { + const robots = ` +User-agent: specialbot +Disallow: /dp + +User-agent: * +Disallow: /cart +`; + + assert.equal(isPathAllowedByRobots(robots, "*", "/dp/B0TEST0001"), true); + assert.equal(isPathAllowedByRobots(robots, "specialbot", "/dp/B0TEST0001"), false); + }); + + it("classifies retryable and challenge statuses", () => { + assert.equal(classifyHttpStatus(429), "retryable"); + assert.equal(classifyHttpStatus(503), "retryable"); + assert.equal(classifyHttpStatus(403), "challenge"); + assert.equal(classifyHttpStatus(200), "ok"); + }); +}); diff --git a/skills/amazon-shopping/tests/detail-page.test.ts b/skills/amazon-shopping/tests/detail-page.test.ts new file mode 100644 index 0000000..69be12e --- /dev/null +++ b/skills/amazon-shopping/tests/detail-page.test.ts @@ -0,0 +1,77 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; +import { describe, it } from "node:test"; + +import { extractDetailPage } from "../src/detail-page.js"; + +const fixturePath = join(import.meta.dirname, "fixtures", "product-detail.html"); + +describe("extractDetailPage", () => { + it("extracts visible product detail fields from sanitized HTML", async () => { + const html = await readFile(fixturePath, "utf8"); + const details = extractDetailPage(html, { + asin: "B0TESTLED1", + title: "Search title", + url: "https://www.amazon.com/dp/B0TESTLED1", + specs: [], + bullets: [], + matchedFilters: [], + missingFields: [], + extractionNotes: [] + }); + + assert.equal(details.title, "Bright Daylight LED Bulbs 100W Equivalent, 50 Count"); + assert.equal(details.price?.amount, 18.99); + assert.equal(details.delivery?.free, true); + assert.equal(details.availability, "In Stock"); + assert.equal(details.seller, "Ships from Amazon.com"); + assert.equal(details.bullets.length, 2); + assert.deepEqual(details.specs[0], { name: "Brand", value: "BrightCo" }); + assert.equal(details.rating, 4.6); + assert.equal(details.reviewCount, 1234); + assert.equal(details.starBreakdown?.five, 72); + }); + + it("records missing detail-only fields", () => { + const details = extractDetailPage("

Sparse Product

", { + asin: "B0SPARSE01", + title: "Sparse", + url: "https://www.amazon.com/dp/B0SPARSE01", + specs: [], + bullets: [], + matchedFilters: [], + missingFields: [], + extractionNotes: [] + }); + + assert.equal(details.price, undefined); + assert.ok(details.missingFields.includes("price")); + assert.ok(details.missingFields.includes("starBreakdown")); + }); + + it("drops script-like spec rows and trims availability metadata", () => { + const details = extractDetailPage(` +

Messy Product

+
In Stock {"merchantId":"secretish"}
+ + + + + +
Special Feature(function(P) { tracking(); }) Real feature text
A19 Add to Cart logShoppableMetrics("x", true)Buying Options
Wattage15 watts
Customer Reviews4.7 out of 5 stars tracking payload
+ `, { + asin: "B0MESSY001", + title: "Messy", + url: "https://www.amazon.com/dp/B0MESSY001", + specs: [], + bullets: [], + matchedFilters: [], + missingFields: [], + extractionNotes: [] + }); + + assert.equal(details.availability, "In Stock"); + assert.deepEqual(details.specs, [{ name: "Wattage", value: "15 watts" }]); + }); +}); diff --git a/skills/amazon-shopping/tests/filters.test.ts b/skills/amazon-shopping/tests/filters.test.ts new file mode 100644 index 0000000..8d4a97c --- /dev/null +++ b/skills/amazon-shopping/tests/filters.test.ts @@ -0,0 +1,75 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { applyFiltersAndLimit } from "../src/filters.js"; +import type { ProductSearchResult } from "../src/types.js"; + +function product(overrides: Partial): ProductSearchResult { + return { + asin: "B0BASE0001", + title: "Base Product", + url: "https://www.amazon.com/dp/B0BASE0001", + specs: [], + bullets: [], + matchedFilters: [], + missingFields: [], + extractionNotes: [], + ...overrides + }; +} + +describe("applyFiltersAndLimit", () => { + it("applies strict rating, review, and unit-price filters", () => { + const result = applyFiltersAndLimit([ + product({ + asin: "B0PASS0001", + rating: 4.6, + reviewCount: 201, + unitPrice: { amount: 3.99, currency: "USD", display: "$3.99/Count" } + }), + product({ + asin: "B0FAIL0001", + rating: 4.5, + reviewCount: 200, + unitPrice: { amount: 3.99, currency: "USD", display: "$3.99/Count" } + }), + product({ + asin: "B0UNKNOWN1", + rating: 4.7, + reviewCount: 300 + }) + ], { + includeKeywords: [], + excludeKeywords: [], + minRating: 4.5, + ratingComparison: "gt", + minReviews: 200, + reviewCountComparison: "gt", + maxUnitPrice: 4 + }, 10); + + assert.deepEqual(result.results.map((item) => item.asin), ["B0PASS0001"]); + assert.equal(result.filteredOutCount, 2); + assert.match(result.filteredOutReasons["B0UNKNOWN1"]?.join(" ") ?? "", /unit price unknown/i); + }); + + it("sorts by rating, reviews, then price", () => { + const result = applyFiltersAndLimit([ + product({ asin: "B0LOWPRICE", rating: 4.7, reviewCount: 1000, price: { amount: 15, currency: "USD", display: "$15.00" } }), + product({ asin: "B0HIGHRATE", rating: 4.9, reviewCount: 100, price: { amount: 40, currency: "USD", display: "$40.00" } }), + product({ asin: "B0MOREREV", rating: 4.7, reviewCount: 2000, price: { amount: 20, currency: "USD", display: "$20.00" } }) + ], { includeKeywords: [], excludeKeywords: [] }, 2); + + assert.deepEqual(result.results.map((item) => item.asin), ["B0HIGHRATE", "B0MOREREV"]); + }); + + it("deduplicates repeated ASINs before limiting", () => { + const result = applyFiltersAndLimit([ + product({ asin: "B0DUP0001", rating: 4.8, reviewCount: 1000 }), + product({ asin: "B0DUP0001", rating: 4.8, reviewCount: 1000 }), + product({ asin: "B0UNIQUE1", rating: 4.7, reviewCount: 900 }) + ], { includeKeywords: [], excludeKeywords: [] }, 10); + + assert.deepEqual(result.results.map((item) => item.asin), ["B0DUP0001", "B0UNIQUE1"]); + }); +}); diff --git a/skills/amazon-shopping/tests/fixtures/product-detail.html b/skills/amazon-shopping/tests/fixtures/product-detail.html new file mode 100644 index 0000000..f8a95c4 --- /dev/null +++ b/skills/amazon-shopping/tests/fixtures/product-detail.html @@ -0,0 +1,30 @@ + + + +

Bright Daylight LED Bulbs 100W Equivalent, 50 Count

+ + $18.99 +
FREE delivery Tomorrow
+
In Stock
+
Ships from Amazon.com
+
+
    +
  • Energy efficient 100W equivalent bulbs.
  • +
  • Daylight color temperature for kitchens and garages.
  • +
+
+ + + +
BrandBrightCo
Light TypeLED
+ + 1,234 ratings + + + + + + +
5 star72%
4 star15%
3 star7%
2 star3%
1 star3%
+ + diff --git a/skills/amazon-shopping/tests/parsers.test.ts b/skills/amazon-shopping/tests/parsers.test.ts index 3daea47..49ee2e8 100644 --- a/skills/amazon-shopping/tests/parsers.test.ts +++ b/skills/amazon-shopping/tests/parsers.test.ts @@ -68,6 +68,14 @@ describe("parsers", () => { }); }); + it("prefers the unit price when product price appears first", () => { + assert.deepEqual(parseUnitPrice("$9.99 ($5.00$5.00/count)"), { + amount: 5, + currency: "USD", + display: "$5.00/count" + }); + }); + it("parses whole-dollar and one-decimal prices", () => { assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" }); assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" }); diff --git a/skills/amazon-shopping/tests/query-parser.test.ts b/skills/amazon-shopping/tests/query-parser.test.ts index 7088018..ecf1dc7 100644 --- a/skills/amazon-shopping/tests/query-parser.test.ts +++ b/skills/amazon-shopping/tests/query-parser.test.ts @@ -27,6 +27,14 @@ describe("parseNaturalLanguageRequest", () => { assert.equal(parsed.filters.ratingComparison, "gte"); }); + it("cleans rating filter phrases from search query text", () => { + const parsed = parseNaturalLanguageRequest("usb c cable with over 1000 reviews and rating over 4 stars"); + + assert.equal(parsed.query, "usb c cable"); + assert.equal(parsed.filters.minReviews, 1000); + assert.equal(parsed.filters.minRating, 4); + }); + it("extracts limit and max product price phrases", () => { const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30"); diff --git a/skills/amazon-shopping/tests/report.test.ts b/skills/amazon-shopping/tests/report.test.ts new file mode 100644 index 0000000..38c8bce --- /dev/null +++ b/skills/amazon-shopping/tests/report.test.ts @@ -0,0 +1,55 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; + +import { createMarkdownReport, createResponse } from "../src/report.js"; + +describe("report", () => { + it("creates a structured JSON response", () => { + const response = createResponse({ + query: "usb c cable", + filters: { includeKeywords: [], excludeKeywords: [], minReviews: 1000 }, + limit: 1, + maxSearchPages: 2, + results: [], + filteredOutCount: 4, + warnings: ["partial extraction"], + now: () => new Date("2026-04-15T00:00:00.000Z") + }); + + assert.equal(response.source.site, "amazon.com"); + assert.equal(response.filteredOutCount, 4); + assert.equal(response.source.scrapedAt, "2026-04-15T00:00:00.000Z"); + }); + + it("creates concise markdown with product details and warnings", () => { + const markdown = createMarkdownReport(createResponse({ + query: "usb c cable", + filters: { includeKeywords: [], excludeKeywords: [] }, + limit: 1, + maxSearchPages: 2, + filteredOutCount: 0, + warnings: ["price missing for one item"], + now: () => new Date("2026-04-15T00:00:00.000Z"), + results: [{ + asin: "B0TEST0001", + title: "USB-C Cable", + url: "https://www.amazon.com/dp/B0TEST0001", + price: { amount: 9.99, currency: "USD", display: "$9.99" }, + rating: 4.7, + reviewCount: 1234, + delivery: { display: "FREE delivery Tomorrow", free: true }, + specs: [{ name: "Length", value: "6 ft" }], + bullets: ["Braided cable"], + matchedFilters: [], + missingFields: ["starBreakdown"], + extractionNotes: [] + }] + })); + + assert.match(markdown, /USB-C Cable/); + assert.match(markdown, /\$9\.99/); + assert.match(markdown, /4\.7 stars/); + assert.match(markdown, /price missing/); + assert.match(markdown, /https:\/\/www\.amazon\.com\/dp\/B0TEST0001/); + }); +});