feat(amazon-shopping): scrape and filter amazon product results
This commit is contained in:
@@ -0,0 +1,228 @@
|
|||||||
|
import { execFile } from "node:child_process";
|
||||||
|
import { pathToFileURL } from "node:url";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { promisify } from "node:util";
|
||||||
|
|
||||||
|
import { extractDetailPage } from "./detail-page.js";
|
||||||
|
import { applyFiltersAndLimit } from "./filters.js";
|
||||||
|
import { createResponse } from "./report.js";
|
||||||
|
import { extractSearchPage } from "./search-page.js";
|
||||||
|
import type { ProductSearchResult, SearchProductsRequest, SearchProductsResponse } from "./types.js";
|
||||||
|
import { resolveWebAutomationRuntime } from "./web-automation-runtime.js";
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
const AMAZON_ROOT = "https://www.amazon.com";
|
||||||
|
const DEFAULT_WAIT_MS = 4500;
|
||||||
|
|
||||||
|
export type HttpClassification = "ok" | "retryable" | "challenge";
|
||||||
|
|
||||||
|
interface BrowserDeps {
|
||||||
|
fetchText?: (url: string) => Promise<string>;
|
||||||
|
sleep?: (ms: number) => Promise<void>;
|
||||||
|
now?: () => Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function plannedAmazonPaths(asins: string[]): string[] {
|
||||||
|
return [
|
||||||
|
"/s",
|
||||||
|
...asins.flatMap((asin) => [`/dp/${asin}`, `/gp/product/${asin}`])
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function classifyHttpStatus(status: number | null | undefined): HttpClassification {
|
||||||
|
if (status === 429 || status === 503) return "retryable";
|
||||||
|
if (status === 401 || status === 403) return "challenge";
|
||||||
|
return "ok";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isPathAllowedByRobots(robots: string, userAgent: string, path: string): boolean {
|
||||||
|
const groups: Array<{ agents: string[]; disallows: string[] }> = [];
|
||||||
|
let current: { agents: string[]; disallows: string[] } | undefined;
|
||||||
|
let hasDirectives = false;
|
||||||
|
|
||||||
|
const lines = robots.split(/\r?\n/);
|
||||||
|
|
||||||
|
for (const rawLine of lines) {
|
||||||
|
const line = rawLine.replace(/#.*/, "").trim();
|
||||||
|
if (!line) continue;
|
||||||
|
const [rawKey, ...rest] = line.split(":");
|
||||||
|
const key = rawKey.trim().toLowerCase();
|
||||||
|
const value = rest.join(":").trim();
|
||||||
|
|
||||||
|
if (key === "user-agent") {
|
||||||
|
if (!current || hasDirectives) {
|
||||||
|
current = { agents: [], disallows: [] };
|
||||||
|
groups.push(current);
|
||||||
|
hasDirectives = false;
|
||||||
|
}
|
||||||
|
current.agents.push(value.toLowerCase());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key === "disallow") {
|
||||||
|
hasDirectives = true;
|
||||||
|
if (current && value) {
|
||||||
|
current.disallows.push(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizedAgent = userAgent.toLowerCase();
|
||||||
|
const exactGroups = groups.filter((group) => group.agents.includes(normalizedAgent));
|
||||||
|
const matchedGroups = exactGroups.length > 0 ? exactGroups : groups.filter((group) => group.agents.includes("*"));
|
||||||
|
const disallows = matchedGroups.flatMap((group) => group.disallows);
|
||||||
|
|
||||||
|
return !disallows.some((rule) => path.startsWith(rule));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function defaultFetchText(url: string): Promise<string> {
|
||||||
|
const response = await fetch(url);
|
||||||
|
return response.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkRobots(paths: string[], deps: BrowserDeps): Promise<string[]> {
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const robots = await (deps.fetchText ?? defaultFetchText)(`${AMAZON_ROOT}/robots.txt`);
|
||||||
|
for (const path of paths) {
|
||||||
|
if (!isPathAllowedByRobots(robots, "*", path)) {
|
||||||
|
warnings.push(`Amazon robots directives disallow planned path: ${path}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadCloakBrowser(runtimeDir: string): Promise<{
|
||||||
|
ensureBinary?: () => Promise<void>;
|
||||||
|
launchContext: (options: Record<string, unknown>) => Promise<any>;
|
||||||
|
}> {
|
||||||
|
const moduleUrl = pathToFileURL(join(runtimeDir, "node_modules", "cloakbrowser", "dist", "index.js")).toString();
|
||||||
|
return import(moduleUrl) as Promise<any>;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkRuntime(): Promise<string> {
|
||||||
|
const runtime = await resolveWebAutomationRuntime();
|
||||||
|
await execFileAsync(runtime.checkInstall.command, runtime.checkInstall.args, { cwd: runtime.checkInstall.cwd });
|
||||||
|
return runtime.scriptsDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
function searchUrl(query: string, pageNumber: number): string {
|
||||||
|
const url = new URL("/s", AMAZON_ROOT);
|
||||||
|
url.searchParams.set("k", query);
|
||||||
|
if (pageNumber > 1) {
|
||||||
|
url.searchParams.set("page", String(pageNumber));
|
||||||
|
}
|
||||||
|
return url.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pageHtml(page: any, url: string, deps: BrowserDeps): Promise<{ html: string; status: number | null }> {
|
||||||
|
let lastStatus: number | null = null;
|
||||||
|
for (let attempt = 0; attempt < 3; attempt += 1) {
|
||||||
|
const response = await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45000 });
|
||||||
|
await page.waitForTimeout?.(DEFAULT_WAIT_MS);
|
||||||
|
lastStatus = response?.status?.() ?? null;
|
||||||
|
if (classifyHttpStatus(lastStatus) !== "retryable") {
|
||||||
|
return {
|
||||||
|
html: await page.content(),
|
||||||
|
status: lastStatus
|
||||||
|
};
|
||||||
|
}
|
||||||
|
await (deps.sleep ?? ((ms) => new Promise((resolve) => setTimeout(resolve, ms))))((2 ** attempt) * 1000 + Math.floor(Math.random() * 500));
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
html: await page.content(),
|
||||||
|
status: lastStatus
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function enrichDetails(page: any, products: ProductSearchResult[], deps: BrowserDeps): Promise<ProductSearchResult[]> {
|
||||||
|
const enriched: ProductSearchResult[] = [];
|
||||||
|
for (const product of products) {
|
||||||
|
await (deps.sleep ?? ((ms) => new Promise((resolve) => setTimeout(resolve, ms))))(1500 + Math.floor(Math.random() * 1500));
|
||||||
|
const loaded = await pageHtml(page, product.url, deps);
|
||||||
|
const classification = classifyHttpStatus(loaded.status);
|
||||||
|
if (classification === "challenge") {
|
||||||
|
enriched.push({
|
||||||
|
...product,
|
||||||
|
extractionNotes: [...product.extractionNotes, "Detail page returned a challenge/block status."]
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
enriched.push(extractDetailPage(loaded.html, product));
|
||||||
|
}
|
||||||
|
return enriched;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchProducts(request: SearchProductsRequest, deps: BrowserDeps = {}): Promise<SearchProductsResponse> {
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const robotsWarnings = await checkRobots(plannedAmazonPaths([]), deps);
|
||||||
|
if (robotsWarnings.length > 0) {
|
||||||
|
return createResponse({
|
||||||
|
query: request.query,
|
||||||
|
filters: request.filters,
|
||||||
|
limit: request.limit,
|
||||||
|
maxSearchPages: request.maxSearchPages,
|
||||||
|
results: [],
|
||||||
|
filteredOutCount: 0,
|
||||||
|
warnings: robotsWarnings,
|
||||||
|
now: deps.now
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const runtimeDir = await checkRuntime();
|
||||||
|
const cloak = await loadCloakBrowser(runtimeDir);
|
||||||
|
await cloak.ensureBinary?.();
|
||||||
|
const context = await cloak.launchContext({
|
||||||
|
headless: process.env.CLOAKBROWSER_HEADLESS !== "false",
|
||||||
|
locale: "en-US",
|
||||||
|
viewport: { width: 1440, height: 900 },
|
||||||
|
humanize: true
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const candidates: ProductSearchResult[] = [];
|
||||||
|
let nextUrl: string | undefined = searchUrl(request.query, 1);
|
||||||
|
for (let pageNumber = 1; pageNumber <= request.maxSearchPages && nextUrl; pageNumber += 1) {
|
||||||
|
const loaded = await pageHtml(page, nextUrl, deps);
|
||||||
|
const classification = classifyHttpStatus(loaded.status);
|
||||||
|
if (classification === "challenge" || classification === "retryable") {
|
||||||
|
warnings.push(`Amazon returned status ${loaded.status}; stopping without bypass.`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const extracted = extractSearchPage(loaded.html, nextUrl);
|
||||||
|
warnings.push(...extracted.warnings);
|
||||||
|
if (extracted.status === "challenge") {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
candidates.push(...extracted.products);
|
||||||
|
if (candidates.length >= request.limit * 3) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nextUrl = extracted.nextPageUrl ?? (pageNumber + 1 <= request.maxSearchPages ? searchUrl(request.query, pageNumber + 1) : undefined);
|
||||||
|
}
|
||||||
|
|
||||||
|
let detailCandidates = candidates;
|
||||||
|
if (!request.skipDetails) {
|
||||||
|
const detailPaths = plannedAmazonPaths(candidates.map((candidate) => candidate.asin)).filter((path) => path !== "/s");
|
||||||
|
const detailRobotsWarnings = await checkRobots(detailPaths, deps);
|
||||||
|
if (detailRobotsWarnings.length > 0) {
|
||||||
|
warnings.push(...detailRobotsWarnings, "Detail enrichment skipped because robots directives disallow at least one planned detail path.");
|
||||||
|
} else {
|
||||||
|
detailCandidates = await enrichDetails(page, candidates.slice(0, request.limit * 3), deps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const filtered = applyFiltersAndLimit(detailCandidates, request.filters, request.limit);
|
||||||
|
return createResponse({
|
||||||
|
query: request.query,
|
||||||
|
filters: request.filters,
|
||||||
|
limit: request.limit,
|
||||||
|
maxSearchPages: request.maxSearchPages,
|
||||||
|
results: filtered.results,
|
||||||
|
filteredOutCount: filtered.filteredOutCount,
|
||||||
|
warnings,
|
||||||
|
now: deps.now
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
await context.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,7 +3,9 @@
|
|||||||
import minimist from "minimist";
|
import minimist from "minimist";
|
||||||
import { fileURLToPath } from "node:url";
|
import { fileURLToPath } from "node:url";
|
||||||
|
|
||||||
|
import { searchProducts } from "./browser.js";
|
||||||
import { parseNaturalLanguageRequest } from "./query-parser.js";
|
import { parseNaturalLanguageRequest } from "./query-parser.js";
|
||||||
|
import { createMarkdownReport } from "./report.js";
|
||||||
import type { ProductFilters, SearchProductsRequest, SearchProductsResponse } from "./types.js";
|
import type { ProductFilters, SearchProductsRequest, SearchProductsResponse } from "./types.js";
|
||||||
|
|
||||||
export interface CliDeps {
|
export interface CliDeps {
|
||||||
@@ -138,7 +140,19 @@ async function defaultSearchProducts(request: SearchProductsRequest, deps: CliDe
|
|||||||
if (request.dryRun) {
|
if (request.dryRun) {
|
||||||
return createDryRunResponse(request, deps.now ?? (() => new Date()));
|
return createDryRunResponse(request, deps.now ?? (() => new Date()));
|
||||||
}
|
}
|
||||||
throw new Error("Live Amazon search is not implemented yet. Use --dry-run until browser orchestration is installed.");
|
return searchProducts(request, { now: deps.now });
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeResponse(response: SearchProductsResponse, output: SearchProductsRequest["output"], deps: CliDeps): void {
|
||||||
|
if (output === "markdown") {
|
||||||
|
deps.stdout.write(createMarkdownReport(response));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (output === "both") {
|
||||||
|
deps.stdout.write(`${JSON.stringify(response, null, 2)}\n\n${createMarkdownReport(response)}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
deps.stdout.write(`${JSON.stringify(response, null, 2)}\n`);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function runCli(
|
export async function runCli(
|
||||||
@@ -156,7 +170,7 @@ export async function runCli(
|
|||||||
const response = deps.searchProducts
|
const response = deps.searchProducts
|
||||||
? await deps.searchProducts(request)
|
? await deps.searchProducts(request)
|
||||||
: await defaultSearchProducts(request, deps);
|
: await defaultSearchProducts(request, deps);
|
||||||
deps.stdout.write(`${JSON.stringify(response, null, 2)}\n`);
|
writeResponse(response, request.output, deps);
|
||||||
return 0;
|
return 0;
|
||||||
} catch (error: unknown) {
|
} catch (error: unknown) {
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
|||||||
@@ -0,0 +1,133 @@
|
|||||||
|
import { HTMLElement, parse } from "node-html-parser";
|
||||||
|
|
||||||
|
import { parseMoney, parseRating, parseReviewCount, parseStarBreakdown } from "./parsers.js";
|
||||||
|
import type { DeliverySummary, ProductSearchResult, ProductSpec } from "./types.js";
|
||||||
|
|
||||||
|
function textOf(node: HTMLElement | null | undefined): string {
|
||||||
|
return cleanText(node?.textContent ?? "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function attrOf(node: HTMLElement | null | undefined, name: string): string {
|
||||||
|
return cleanText(node?.getAttribute(name) ?? "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanText(text: string): string {
|
||||||
|
return text
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.replace(/\s*\{".*$/g, "")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isScriptLike(text: string): boolean {
|
||||||
|
return /\(function\s*\(|window\.|P\.when|ue\.count|tracking\(\)|logShoppableMetrics|buying options|add to cart/i.test(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
function firstText(root: HTMLElement, selectors: string[]): string {
|
||||||
|
for (const selector of selectors) {
|
||||||
|
const text = textOf(root.querySelector(selector));
|
||||||
|
if (text) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractBullets(root: HTMLElement): string[] {
|
||||||
|
const spanBullets = root.querySelectorAll("#feature-bullets li span")
|
||||||
|
.map((node) => textOf(node))
|
||||||
|
.filter((text) => text && !/make sure this fits/i.test(text));
|
||||||
|
if (spanBullets.length > 0) {
|
||||||
|
return spanBullets;
|
||||||
|
}
|
||||||
|
return root.querySelectorAll("#feature-bullets li")
|
||||||
|
.map((node) => textOf(node))
|
||||||
|
.filter((text) => text && !/make sure this fits/i.test(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractSpecs(root: HTMLElement): ProductSpec[] {
|
||||||
|
const specs: ProductSpec[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const excludedNames = new Set(["customer reviews"]);
|
||||||
|
for (const row of root.querySelectorAll("tr")) {
|
||||||
|
const cells = row.querySelectorAll("th,td").map((cell) => textOf(cell)).filter(Boolean);
|
||||||
|
if (cells.length >= 2) {
|
||||||
|
const name = cells[0];
|
||||||
|
const value = cells.slice(1).join(" ");
|
||||||
|
const key = name.toLowerCase();
|
||||||
|
if (seen.has(key) || excludedNames.has(key) || isScriptLike(name) || isScriptLike(value)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seen.add(key);
|
||||||
|
specs.push({ name, value });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return specs;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractHistogramText(root: HTMLElement): string {
|
||||||
|
const rows = root.querySelectorAll("#histogramTable tr, [aria-label*='star'] tr");
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const row of rows) {
|
||||||
|
const cells = row.querySelectorAll("th,td").map((cell) => textOf(cell)).filter(Boolean);
|
||||||
|
if (cells.length >= 2) {
|
||||||
|
parts.push(`${cells[0]} ${cells[1]}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts.join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function deliveryFromText(text: string): DeliverySummary | undefined {
|
||||||
|
const display = text.replace(/\s+/g, " ").trim();
|
||||||
|
if (!display) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
display,
|
||||||
|
free: /\bfree\b/i.test(display),
|
||||||
|
prime: /\bprime\b/i.test(display)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractDetailPage(html: string, base: ProductSearchResult): ProductSearchResult {
|
||||||
|
const root = parse(html);
|
||||||
|
const title = firstText(root, ["#productTitle", "h1"]) || base.title;
|
||||||
|
const priceText = firstText(root, [
|
||||||
|
"#corePriceDisplay_desktop_feature_div .a-offscreen",
|
||||||
|
".a-price .a-offscreen",
|
||||||
|
".a-price"
|
||||||
|
]);
|
||||||
|
const deliveryText = firstText(root, [
|
||||||
|
"#mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE",
|
||||||
|
"#deliveryBlockMessage",
|
||||||
|
"[data-csa-c-delivery-price]"
|
||||||
|
]);
|
||||||
|
const availability = firstText(root, ["#availability", "#availabilityInsideBuyBox_feature_div"]);
|
||||||
|
const seller = firstText(root, ["#merchant-info", "#sellerProfileTriggerId"]);
|
||||||
|
const ratingText = attrOf(root.querySelector("#acrPopover"), "title") || textOf(root.querySelector("#acrPopover"));
|
||||||
|
const reviewText = textOf(root.querySelector("#acrCustomerReviewText"));
|
||||||
|
const histogram = parseStarBreakdown(extractHistogramText(root));
|
||||||
|
|
||||||
|
const product: ProductSearchResult = {
|
||||||
|
...base,
|
||||||
|
title,
|
||||||
|
price: parseMoney(priceText) ?? base.price,
|
||||||
|
rating: parseRating(ratingText) ?? base.rating,
|
||||||
|
reviewCount: parseReviewCount(reviewText) ?? base.reviewCount,
|
||||||
|
delivery: deliveryFromText(deliveryText) ?? base.delivery,
|
||||||
|
availability: availability || base.availability,
|
||||||
|
seller: seller || base.seller,
|
||||||
|
bullets: extractBullets(root),
|
||||||
|
specs: extractSpecs(root),
|
||||||
|
starBreakdown: histogram ?? base.starBreakdown,
|
||||||
|
missingFields: [...base.missingFields],
|
||||||
|
extractionNotes: [...base.extractionNotes]
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const field of ["price", "delivery", "rating", "reviewCount", "starBreakdown"] as const) {
|
||||||
|
if (product[field] === undefined && !product.missingFields.includes(field)) {
|
||||||
|
product.missingFields.push(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return product;
|
||||||
|
}
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
import type { FilteredProducts, ProductFilters, ProductSearchResult } from "./types.js";
|
||||||
|
|
||||||
|
function passesMin(value: number | undefined, threshold: number, comparison: "gt" | "gte" | undefined): boolean {
|
||||||
|
if (value === undefined) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return comparison === "gt" ? value > threshold : value >= threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterReasons(product: ProductSearchResult, filters: ProductFilters): string[] {
|
||||||
|
const reasons: string[] = [];
|
||||||
|
if (filters.minRating !== undefined && !passesMin(product.rating, filters.minRating, filters.ratingComparison)) {
|
||||||
|
reasons.push(product.rating === undefined ? "rating unknown" : `rating ${product.rating} below filter`);
|
||||||
|
}
|
||||||
|
if (filters.minReviews !== undefined && !passesMin(product.reviewCount, filters.minReviews, filters.reviewCountComparison)) {
|
||||||
|
reasons.push(product.reviewCount === undefined ? "review count unknown" : `review count ${product.reviewCount} below filter`);
|
||||||
|
}
|
||||||
|
if (filters.maxPrice !== undefined) {
|
||||||
|
if (!product.price) {
|
||||||
|
reasons.push("price unknown");
|
||||||
|
} else if (product.price.amount > filters.maxPrice) {
|
||||||
|
reasons.push(`price ${product.price.display} above filter`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (filters.maxUnitPrice !== undefined) {
|
||||||
|
if (!product.unitPrice) {
|
||||||
|
reasons.push("unit price unknown");
|
||||||
|
} else if (product.unitPrice.amount > filters.maxUnitPrice) {
|
||||||
|
reasons.push(`unit price ${product.unitPrice.display} above filter`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (filters.requirePrime && !product.delivery?.prime) {
|
||||||
|
reasons.push("Prime delivery not verified");
|
||||||
|
}
|
||||||
|
if (filters.requireFreeDelivery && !product.delivery?.free) {
|
||||||
|
reasons.push("free delivery not verified");
|
||||||
|
}
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
|
||||||
|
function rankProducts(a: ProductSearchResult, b: ProductSearchResult): number {
|
||||||
|
const ratingDiff = (b.rating ?? -1) - (a.rating ?? -1);
|
||||||
|
if (ratingDiff !== 0) return ratingDiff;
|
||||||
|
const reviewDiff = (b.reviewCount ?? -1) - (a.reviewCount ?? -1);
|
||||||
|
if (reviewDiff !== 0) return reviewDiff;
|
||||||
|
return (a.price?.amount ?? Number.POSITIVE_INFINITY) - (b.price?.amount ?? Number.POSITIVE_INFINITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function applyFiltersAndLimit(
|
||||||
|
products: ProductSearchResult[],
|
||||||
|
filters: ProductFilters,
|
||||||
|
limit: number
|
||||||
|
): FilteredProducts {
|
||||||
|
const filteredOutReasons: Record<string, string[]> = {};
|
||||||
|
const uniqueProducts = new Map<string, ProductSearchResult>();
|
||||||
|
for (const product of products) {
|
||||||
|
if (!uniqueProducts.has(product.asin)) {
|
||||||
|
uniqueProducts.set(product.asin, product);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const passing: ProductSearchResult[] = [];
|
||||||
|
|
||||||
|
for (const product of uniqueProducts.values()) {
|
||||||
|
const reasons = filterReasons(product, filters);
|
||||||
|
if (reasons.length > 0) {
|
||||||
|
filteredOutReasons[product.asin] = reasons;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
passing.push({
|
||||||
|
...product,
|
||||||
|
matchedFilters: [
|
||||||
|
...product.matchedFilters,
|
||||||
|
...(filters.minRating !== undefined ? [`rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}`] : []),
|
||||||
|
...(filters.minReviews !== undefined ? [`reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}`] : []),
|
||||||
|
...(filters.maxPrice !== undefined ? [`price <= ${filters.maxPrice}`] : []),
|
||||||
|
...(filters.maxUnitPrice !== undefined ? [`unit price <= ${filters.maxUnitPrice}`] : [])
|
||||||
|
]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
results: passing.sort(rankProducts).slice(0, limit),
|
||||||
|
filteredOutCount: uniqueProducts.size - passing.length,
|
||||||
|
filteredOutReasons
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -21,13 +21,19 @@ export function parseMoney(text: string | undefined | null): MoneyValue | undefi
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function parseUnitPrice(text: string | undefined | null): MoneyValue | undefined {
|
export function parseUnitPrice(text: string | undefined | null): MoneyValue | undefined {
|
||||||
if (!text || !(/[/]\s*\d|\$\s*\d/.test(text))) {
|
if (!text) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
if (!/(\/|\bper\b|\beach\b|\bcount\b)/i.test(text)) {
|
const compact = text.replace(/\s+/g, " ").trim();
|
||||||
|
const unitMatch = compact.match(/(\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)(?:\s*\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)?\s*(?:\/|\bper\b\s*)\s*(?:count|unit|item|piece|pack|each)\b/i)
|
||||||
|
?? compact.match(/(\$\s*[0-9][0-9,]*(?:\.[0-9]{1,2})?)\s*(?:each)\b/i);
|
||||||
|
if (!unitMatch) {
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
return parseMoney(text);
|
const display = unitMatch[0]
|
||||||
|
.replace(/\$\s*([0-9][0-9,]*(?:\.[0-9]{1,2})?)\s*\$\s*\1/i, "$$$1")
|
||||||
|
.replace(/\s+/g, "");
|
||||||
|
return parseMoney(display);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parseRating(text: string | undefined | null): number | undefined {
|
export function parseRating(text: string | undefined | null): number | undefined {
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ export function parseNaturalLanguageRequest(input: string): ParsedNaturalLanguag
|
|||||||
remaining = removeMatched(remaining, reviewMatch);
|
remaining = removeMatched(remaining, reviewMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:review score of\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i);
|
const exclusiveRating = remaining.match(/\b(?:a\s+)?(?:(?:review score|rating)\s+of\s+|rating\s+)?(?:more than|over|above|rated above)\s+([0-5](?:\.[0-9])?)\s*(?:stars?)?\b/i);
|
||||||
const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i)
|
const inclusiveRating = remaining.match(/\b([0-5](?:\.[0-9])?)\s*stars?\s+or\s+better\b/i)
|
||||||
?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i);
|
?? remaining.match(/\b(?:at least|minimum|min\.?)\s+([0-5](?:\.[0-9])?)\s*(?:stars?|rating)?\b/i);
|
||||||
const ratingMatch = exclusiveRating ?? inclusiveRating;
|
const ratingMatch = exclusiveRating ?? inclusiveRating;
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
import type { ProductFilters, ProductSearchResult, SearchProductsResponse } from "./types.js";
|
||||||
|
|
||||||
|
export interface ResponseInput {
|
||||||
|
query: string;
|
||||||
|
filters: ProductFilters;
|
||||||
|
limit: number;
|
||||||
|
maxSearchPages: number;
|
||||||
|
results: ProductSearchResult[];
|
||||||
|
filteredOutCount: number;
|
||||||
|
warnings: string[];
|
||||||
|
now?: () => Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createResponse(input: ResponseInput): SearchProductsResponse {
|
||||||
|
return {
|
||||||
|
query: input.query,
|
||||||
|
filters: input.filters,
|
||||||
|
limit: input.limit,
|
||||||
|
maxSearchPages: input.maxSearchPages,
|
||||||
|
results: input.results,
|
||||||
|
filteredOutCount: input.filteredOutCount,
|
||||||
|
warnings: input.warnings,
|
||||||
|
source: {
|
||||||
|
site: "amazon.com",
|
||||||
|
scrapedAt: (input.now ?? (() => new Date()))().toISOString(),
|
||||||
|
automation: "web-automation/CloakBrowser"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatFilters(filters: ProductFilters): string {
|
||||||
|
const parts = [
|
||||||
|
filters.minRating !== undefined ? `rating ${filters.ratingComparison ?? "gte"} ${filters.minRating}` : "",
|
||||||
|
filters.minReviews !== undefined ? `reviews ${filters.reviewCountComparison ?? "gte"} ${filters.minReviews}` : "",
|
||||||
|
filters.maxPrice !== undefined ? `price <= $${filters.maxPrice}` : "",
|
||||||
|
filters.maxUnitPrice !== undefined ? `unit price <= $${filters.maxUnitPrice}` : ""
|
||||||
|
].filter(Boolean);
|
||||||
|
return parts.length > 0 ? parts.join(", ") : "none";
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatProduct(product: ProductSearchResult, index: number): string {
|
||||||
|
const specs = product.specs.slice(0, 3).map((spec) => `${spec.name}: ${spec.value}`).join("; ");
|
||||||
|
const lines = [
|
||||||
|
`${index}. ${product.title}`,
|
||||||
|
` Link: ${product.url}`,
|
||||||
|
` Price: ${product.price?.display ?? "unknown"}${product.unitPrice ? ` (${product.unitPrice.display})` : ""}`,
|
||||||
|
` Rating: ${product.rating ?? "unknown"} stars; reviews: ${product.reviewCount ?? "unknown"}`,
|
||||||
|
` Delivery: ${product.delivery?.display ?? "unknown"}`,
|
||||||
|
specs ? ` Specs: ${specs}` : "",
|
||||||
|
product.bullets[0] ? ` Notes: ${product.bullets.slice(0, 2).join(" ")}` : "",
|
||||||
|
product.missingFields.length > 0 ? ` Missing: ${product.missingFields.join(", ")}` : "",
|
||||||
|
product.isSponsored ? " Sponsored: yes" : ""
|
||||||
|
].filter(Boolean);
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createMarkdownReport(response: SearchProductsResponse): string {
|
||||||
|
const lines = [
|
||||||
|
`# Amazon Shopping Results`,
|
||||||
|
"",
|
||||||
|
`Query: ${response.query}`,
|
||||||
|
`Filters: ${formatFilters(response.filters)}`,
|
||||||
|
`Results returned: ${response.results.length} (filtered out: ${response.filteredOutCount})`,
|
||||||
|
response.warnings.length > 0 ? `Warnings: ${response.warnings.join("; ")}` : "",
|
||||||
|
"",
|
||||||
|
...response.results.map((product, index) => formatProduct(product, index + 1))
|
||||||
|
].filter((line) => line !== "");
|
||||||
|
return `${lines.join("\n")}\n`;
|
||||||
|
}
|
||||||
@@ -103,3 +103,9 @@ export interface SearchPageExtraction {
|
|||||||
warnings: string[];
|
warnings: string[];
|
||||||
nextPageUrl?: string;
|
nextPageUrl?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface FilteredProducts {
|
||||||
|
results: ProductSearchResult[];
|
||||||
|
filteredOutCount: number;
|
||||||
|
filteredOutReasons: Record<string, string[]>;
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,43 @@
|
|||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { describe, it } from "node:test";
|
||||||
|
|
||||||
|
import { classifyHttpStatus, isPathAllowedByRobots, plannedAmazonPaths } from "../src/browser.js";
|
||||||
|
|
||||||
|
describe("browser compliance helpers", () => {
|
||||||
|
it("plans only search and product-detail paths", () => {
|
||||||
|
assert.deepEqual(plannedAmazonPaths(["B0TEST0001"]), ["/s", "/dp/B0TEST0001", "/gp/product/B0TEST0001"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("honors robots disallow rules for planned paths", () => {
|
||||||
|
const robots = `
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /cart
|
||||||
|
Disallow: /product-reviews
|
||||||
|
Disallow: /dp/private
|
||||||
|
`;
|
||||||
|
|
||||||
|
assert.equal(isPathAllowedByRobots(robots, "*", "/s"), true);
|
||||||
|
assert.equal(isPathAllowedByRobots(robots, "*", "/product-reviews/B0TEST0001"), false);
|
||||||
|
assert.equal(isPathAllowedByRobots(robots, "*", "/dp/private/B0TEST0001"), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not leak disallow rules from other user-agent groups", () => {
|
||||||
|
const robots = `
|
||||||
|
User-agent: specialbot
|
||||||
|
Disallow: /dp
|
||||||
|
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /cart
|
||||||
|
`;
|
||||||
|
|
||||||
|
assert.equal(isPathAllowedByRobots(robots, "*", "/dp/B0TEST0001"), true);
|
||||||
|
assert.equal(isPathAllowedByRobots(robots, "specialbot", "/dp/B0TEST0001"), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("classifies retryable and challenge statuses", () => {
|
||||||
|
assert.equal(classifyHttpStatus(429), "retryable");
|
||||||
|
assert.equal(classifyHttpStatus(503), "retryable");
|
||||||
|
assert.equal(classifyHttpStatus(403), "challenge");
|
||||||
|
assert.equal(classifyHttpStatus(200), "ok");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { readFile } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { describe, it } from "node:test";
|
||||||
|
|
||||||
|
import { extractDetailPage } from "../src/detail-page.js";
|
||||||
|
|
||||||
|
const fixturePath = join(import.meta.dirname, "fixtures", "product-detail.html");
|
||||||
|
|
||||||
|
describe("extractDetailPage", () => {
|
||||||
|
it("extracts visible product detail fields from sanitized HTML", async () => {
|
||||||
|
const html = await readFile(fixturePath, "utf8");
|
||||||
|
const details = extractDetailPage(html, {
|
||||||
|
asin: "B0TESTLED1",
|
||||||
|
title: "Search title",
|
||||||
|
url: "https://www.amazon.com/dp/B0TESTLED1",
|
||||||
|
specs: [],
|
||||||
|
bullets: [],
|
||||||
|
matchedFilters: [],
|
||||||
|
missingFields: [],
|
||||||
|
extractionNotes: []
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(details.title, "Bright Daylight LED Bulbs 100W Equivalent, 50 Count");
|
||||||
|
assert.equal(details.price?.amount, 18.99);
|
||||||
|
assert.equal(details.delivery?.free, true);
|
||||||
|
assert.equal(details.availability, "In Stock");
|
||||||
|
assert.equal(details.seller, "Ships from Amazon.com");
|
||||||
|
assert.equal(details.bullets.length, 2);
|
||||||
|
assert.deepEqual(details.specs[0], { name: "Brand", value: "BrightCo" });
|
||||||
|
assert.equal(details.rating, 4.6);
|
||||||
|
assert.equal(details.reviewCount, 1234);
|
||||||
|
assert.equal(details.starBreakdown?.five, 72);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("records missing detail-only fields", () => {
|
||||||
|
const details = extractDetailPage("<html><body><h1 id=\"productTitle\">Sparse Product</h1></body></html>", {
|
||||||
|
asin: "B0SPARSE01",
|
||||||
|
title: "Sparse",
|
||||||
|
url: "https://www.amazon.com/dp/B0SPARSE01",
|
||||||
|
specs: [],
|
||||||
|
bullets: [],
|
||||||
|
matchedFilters: [],
|
||||||
|
missingFields: [],
|
||||||
|
extractionNotes: []
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(details.price, undefined);
|
||||||
|
assert.ok(details.missingFields.includes("price"));
|
||||||
|
assert.ok(details.missingFields.includes("starBreakdown"));
|
||||||
|
});
|
||||||
|
|
||||||
|
it("drops script-like spec rows and trims availability metadata", () => {
|
||||||
|
const details = extractDetailPage(`
|
||||||
|
<h1 id="productTitle">Messy Product</h1>
|
||||||
|
<div id="availability">In Stock {"merchantId":"secretish"}</div>
|
||||||
|
<table>
|
||||||
|
<tr><td>Special Feature</td><td>(function(P) { tracking(); }) Real feature text</td></tr>
|
||||||
|
<tr><td>A19 Add to Cart logShoppableMetrics("x", true)</td><td>Buying Options</td></tr>
|
||||||
|
<tr><td>Wattage</td><td>15 watts</td></tr>
|
||||||
|
<tr><td>Customer Reviews</td><td>4.7 out of 5 stars tracking payload</td></tr>
|
||||||
|
</table>
|
||||||
|
`, {
|
||||||
|
asin: "B0MESSY001",
|
||||||
|
title: "Messy",
|
||||||
|
url: "https://www.amazon.com/dp/B0MESSY001",
|
||||||
|
specs: [],
|
||||||
|
bullets: [],
|
||||||
|
matchedFilters: [],
|
||||||
|
missingFields: [],
|
||||||
|
extractionNotes: []
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(details.availability, "In Stock");
|
||||||
|
assert.deepEqual(details.specs, [{ name: "Wattage", value: "15 watts" }]);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { describe, it } from "node:test";
|
||||||
|
|
||||||
|
import { applyFiltersAndLimit } from "../src/filters.js";
|
||||||
|
import type { ProductSearchResult } from "../src/types.js";
|
||||||
|
|
||||||
|
function product(overrides: Partial<ProductSearchResult>): ProductSearchResult {
|
||||||
|
return {
|
||||||
|
asin: "B0BASE0001",
|
||||||
|
title: "Base Product",
|
||||||
|
url: "https://www.amazon.com/dp/B0BASE0001",
|
||||||
|
specs: [],
|
||||||
|
bullets: [],
|
||||||
|
matchedFilters: [],
|
||||||
|
missingFields: [],
|
||||||
|
extractionNotes: [],
|
||||||
|
...overrides
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("applyFiltersAndLimit", () => {
|
||||||
|
it("applies strict rating, review, and unit-price filters", () => {
|
||||||
|
const result = applyFiltersAndLimit([
|
||||||
|
product({
|
||||||
|
asin: "B0PASS0001",
|
||||||
|
rating: 4.6,
|
||||||
|
reviewCount: 201,
|
||||||
|
unitPrice: { amount: 3.99, currency: "USD", display: "$3.99/Count" }
|
||||||
|
}),
|
||||||
|
product({
|
||||||
|
asin: "B0FAIL0001",
|
||||||
|
rating: 4.5,
|
||||||
|
reviewCount: 200,
|
||||||
|
unitPrice: { amount: 3.99, currency: "USD", display: "$3.99/Count" }
|
||||||
|
}),
|
||||||
|
product({
|
||||||
|
asin: "B0UNKNOWN1",
|
||||||
|
rating: 4.7,
|
||||||
|
reviewCount: 300
|
||||||
|
})
|
||||||
|
], {
|
||||||
|
includeKeywords: [],
|
||||||
|
excludeKeywords: [],
|
||||||
|
minRating: 4.5,
|
||||||
|
ratingComparison: "gt",
|
||||||
|
minReviews: 200,
|
||||||
|
reviewCountComparison: "gt",
|
||||||
|
maxUnitPrice: 4
|
||||||
|
}, 10);
|
||||||
|
|
||||||
|
assert.deepEqual(result.results.map((item) => item.asin), ["B0PASS0001"]);
|
||||||
|
assert.equal(result.filteredOutCount, 2);
|
||||||
|
assert.match(result.filteredOutReasons["B0UNKNOWN1"]?.join(" ") ?? "", /unit price unknown/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sorts by rating, reviews, then price", () => {
|
||||||
|
const result = applyFiltersAndLimit([
|
||||||
|
product({ asin: "B0LOWPRICE", rating: 4.7, reviewCount: 1000, price: { amount: 15, currency: "USD", display: "$15.00" } }),
|
||||||
|
product({ asin: "B0HIGHRATE", rating: 4.9, reviewCount: 100, price: { amount: 40, currency: "USD", display: "$40.00" } }),
|
||||||
|
product({ asin: "B0MOREREV", rating: 4.7, reviewCount: 2000, price: { amount: 20, currency: "USD", display: "$20.00" } })
|
||||||
|
], { includeKeywords: [], excludeKeywords: [] }, 2);
|
||||||
|
|
||||||
|
assert.deepEqual(result.results.map((item) => item.asin), ["B0HIGHRATE", "B0MOREREV"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("deduplicates repeated ASINs before limiting", () => {
|
||||||
|
const result = applyFiltersAndLimit([
|
||||||
|
product({ asin: "B0DUP0001", rating: 4.8, reviewCount: 1000 }),
|
||||||
|
product({ asin: "B0DUP0001", rating: 4.8, reviewCount: 1000 }),
|
||||||
|
product({ asin: "B0UNIQUE1", rating: 4.7, reviewCount: 900 })
|
||||||
|
], { includeKeywords: [], excludeKeywords: [] }, 10);
|
||||||
|
|
||||||
|
assert.deepEqual(result.results.map((item) => item.asin), ["B0DUP0001", "B0UNIQUE1"]);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
<!-- Hand-crafted sanitized fixture. Not a live Amazon snapshot. -->
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1 id="productTitle">Bright Daylight LED Bulbs 100W Equivalent, 50 Count</h1>
|
||||||
|
<span id="productTitle_feature_div"></span>
|
||||||
|
<span class="a-price"><span class="a-offscreen">$18.99</span></span>
|
||||||
|
<div id="mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE">FREE delivery Tomorrow</div>
|
||||||
|
<div id="availability">In Stock</div>
|
||||||
|
<div id="merchant-info">Ships from Amazon.com</div>
|
||||||
|
<div id="feature-bullets">
|
||||||
|
<ul>
|
||||||
|
<li><span>Energy efficient 100W equivalent bulbs.</span></li>
|
||||||
|
<li><span>Daylight color temperature for kitchens and garages.</span></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<table id="productOverview_feature_div">
|
||||||
|
<tr><td>Brand</td><td>BrightCo</td></tr>
|
||||||
|
<tr><td>Light Type</td><td>LED</td></tr>
|
||||||
|
</table>
|
||||||
|
<span id="acrPopover" title="4.6 out of 5 stars"></span>
|
||||||
|
<span id="acrCustomerReviewText">1,234 ratings</span>
|
||||||
|
<table id="histogramTable">
|
||||||
|
<tr><td>5 star</td><td>72%</td></tr>
|
||||||
|
<tr><td>4 star</td><td>15%</td></tr>
|
||||||
|
<tr><td>3 star</td><td>7%</td></tr>
|
||||||
|
<tr><td>2 star</td><td>3%</td></tr>
|
||||||
|
<tr><td>1 star</td><td>3%</td></tr>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -68,6 +68,14 @@ describe("parsers", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("prefers the unit price when product price appears first", () => {
|
||||||
|
assert.deepEqual(parseUnitPrice("$9.99 ($5.00$5.00/count)"), {
|
||||||
|
amount: 5,
|
||||||
|
currency: "USD",
|
||||||
|
display: "$5.00/count"
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("parses whole-dollar and one-decimal prices", () => {
|
it("parses whole-dollar and one-decimal prices", () => {
|
||||||
assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" });
|
assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" });
|
||||||
assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" });
|
assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" });
|
||||||
|
|||||||
@@ -27,6 +27,14 @@ describe("parseNaturalLanguageRequest", () => {
|
|||||||
assert.equal(parsed.filters.ratingComparison, "gte");
|
assert.equal(parsed.filters.ratingComparison, "gte");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("cleans rating filter phrases from search query text", () => {
|
||||||
|
const parsed = parseNaturalLanguageRequest("usb c cable with over 1000 reviews and rating over 4 stars");
|
||||||
|
|
||||||
|
assert.equal(parsed.query, "usb c cable");
|
||||||
|
assert.equal(parsed.filters.minReviews, 1000);
|
||||||
|
assert.equal(parsed.filters.minRating, 4);
|
||||||
|
});
|
||||||
|
|
||||||
it("extracts limit and max product price phrases", () => {
|
it("extracts limit and max product price phrases", () => {
|
||||||
const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30");
|
const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30");
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import assert from "node:assert/strict";
|
||||||
|
import { describe, it } from "node:test";
|
||||||
|
|
||||||
|
import { createMarkdownReport, createResponse } from "../src/report.js";
|
||||||
|
|
||||||
|
describe("report", () => {
|
||||||
|
it("creates a structured JSON response", () => {
|
||||||
|
const response = createResponse({
|
||||||
|
query: "usb c cable",
|
||||||
|
filters: { includeKeywords: [], excludeKeywords: [], minReviews: 1000 },
|
||||||
|
limit: 1,
|
||||||
|
maxSearchPages: 2,
|
||||||
|
results: [],
|
||||||
|
filteredOutCount: 4,
|
||||||
|
warnings: ["partial extraction"],
|
||||||
|
now: () => new Date("2026-04-15T00:00:00.000Z")
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(response.source.site, "amazon.com");
|
||||||
|
assert.equal(response.filteredOutCount, 4);
|
||||||
|
assert.equal(response.source.scrapedAt, "2026-04-15T00:00:00.000Z");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("creates concise markdown with product details and warnings", () => {
|
||||||
|
const markdown = createMarkdownReport(createResponse({
|
||||||
|
query: "usb c cable",
|
||||||
|
filters: { includeKeywords: [], excludeKeywords: [] },
|
||||||
|
limit: 1,
|
||||||
|
maxSearchPages: 2,
|
||||||
|
filteredOutCount: 0,
|
||||||
|
warnings: ["price missing for one item"],
|
||||||
|
now: () => new Date("2026-04-15T00:00:00.000Z"),
|
||||||
|
results: [{
|
||||||
|
asin: "B0TEST0001",
|
||||||
|
title: "USB-C Cable",
|
||||||
|
url: "https://www.amazon.com/dp/B0TEST0001",
|
||||||
|
price: { amount: 9.99, currency: "USD", display: "$9.99" },
|
||||||
|
rating: 4.7,
|
||||||
|
reviewCount: 1234,
|
||||||
|
delivery: { display: "FREE delivery Tomorrow", free: true },
|
||||||
|
specs: [{ name: "Length", value: "6 ft" }],
|
||||||
|
bullets: ["Braided cable"],
|
||||||
|
matchedFilters: [],
|
||||||
|
missingFields: ["starBreakdown"],
|
||||||
|
extractionNotes: []
|
||||||
|
}]
|
||||||
|
}));
|
||||||
|
|
||||||
|
assert.match(markdown, /USB-C Cable/);
|
||||||
|
assert.match(markdown, /\$9\.99/);
|
||||||
|
assert.match(markdown, /4\.7 stars/);
|
||||||
|
assert.match(markdown, /price missing/);
|
||||||
|
assert.match(markdown, /https:\/\/www\.amazon\.com\/dp\/B0TEST0001/);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user