feat(amazon-shopping): parse filters and extract search candidates

This commit is contained in:
2026-04-15 18:31:44 -05:00
parent 8ad532545d
commit ef326896f4
15 changed files with 822 additions and 10 deletions
+14
View File
@@ -64,6 +64,20 @@ describe("amazon-shopping CLI", () => {
assert.equal(parseCliRequest(["usb c cable", "--json", "--markdown"]).output, "both");
});
it("normalizes natural-language filters for the target request", () => {
const request = parseCliRequest([
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars",
"--dry-run"
]);
assert.equal(request.query, "100w led bulbs");
assert.equal(request.filters.maxUnitPrice, 4);
assert.equal(request.filters.minReviews, 200);
assert.equal(request.filters.reviewCountComparison, "gt");
assert.equal(request.filters.minRating, 4.5);
assert.equal(request.filters.ratingComparison, "gt");
});
it("rejects limits below one", () => {
assert.throws(
() => parseCliRequest(["usb c cable", "--limit", "0"]),
+3
View File
@@ -0,0 +1,3 @@
# Fixtures
Fixtures in this directory are hand-crafted sanitized HTML snippets. They are not live Amazon snapshots and contain no cookies, account details, delivery location, scripts, tracking identifiers, or browser profile data.
@@ -0,0 +1,23 @@
<!-- Hand-crafted sanitized fixture. Not a live Amazon snapshot. -->
<html>
<body>
<div data-component-type="s-search-result" data-asin="B0TESTLED1">
<h2><a class="a-link-normal s-line-clamp-2" href="/Bright-Daylight-Equivalent/dp/B0TESTLED1/ref=sr_1_1">Bright Daylight 100W Equivalent LED Bulbs, 50 Count</a></h2>
<span class="a-price"><span class="a-offscreen">$18.99</span></span>
<span class="a-size-base a-color-secondary">$0.38/Count</span>
<span aria-label="4.6 out of 5 stars"></span>
<a aria-label="1,234 ratings"></a>
<div class="a-row a-size-base a-color-secondary">FREE delivery Tomorrow</div>
<img class="s-image" src="https://m.media-amazon.com/images/I/test-led.jpg" />
</div>
<div data-component-type="s-search-result" data-asin="B0TESTLED2">
<span>Sponsored</span>
<h2><a href="https://www.amazon.com/gp/product/B0TESTLED2">Value LED Bulbs Soft White, Pack of 24</a></h2>
<span class="a-price"><span class="a-offscreen">$21.99</span></span>
<span aria-label="4.3 out of 5 stars"></span>
<a aria-label="543 ratings"></a>
<div>Delivery Friday</div>
</div>
<a class="s-pagination-next" href="/s?k=led+bulbs&amp;page=2">Next</a>
</body>
</html>
@@ -0,0 +1,75 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import {
extractUnitCount,
parseMoney,
parseRating,
parseReviewCount,
parseStarBreakdown,
parseUnitPrice
} from "../src/parsers.js";
describe("parsers", () => {
it("parses USD money", () => {
assert.deepEqual(parseMoney("$19.99"), { amount: 19.99, currency: "USD", display: "$19.99" });
});
it("parses rating text", () => {
assert.equal(parseRating("4.6 out of 5 stars"), 4.6);
});
it("parses review count text", () => {
assert.equal(parseReviewCount("1,234 ratings"), 1234);
});
it("parses visible star histogram percentages", () => {
assert.deepEqual(parseStarBreakdown("5 star 72% 4 star 15% 3 star 7% 2 star 3% 1 star 3%"), {
five: 72,
four: 15,
three: 7,
two: 3,
one: 3,
basis: "percent"
});
});
it("extracts high-confidence unit counts", () => {
assert.deepEqual(extractUnitCount("LED bulbs, 100 Count, daylight"), {
count: 100,
confidence: "high",
source: "100 Count"
});
assert.deepEqual(extractUnitCount("Pack of 6 USB-C cables"), {
count: 6,
confidence: "high",
source: "Pack of 6"
});
});
it("distinguishes lower-confidence unit count phrases", () => {
assert.deepEqual(extractUnitCount("Set of 8 replacement filters"), {
count: 8,
confidence: "medium",
source: "Set of 8"
});
assert.deepEqual(extractUnitCount("6 bulbs soft white"), {
count: 6,
confidence: "low",
source: "6 bulbs"
});
});
it("parses visible unit prices", () => {
assert.deepEqual(parseUnitPrice("$0.33/Count"), {
amount: 0.33,
currency: "USD",
display: "$0.33/Count"
});
});
it("parses whole-dollar and one-decimal prices", () => {
assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" });
assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" });
});
});
@@ -0,0 +1,37 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import { parseNaturalLanguageRequest } from "../src/query-parser.js";
describe("parseNaturalLanguageRequest", () => {
it("extracts the target LED bulb filters from natural language", () => {
const parsed = parseNaturalLanguageRequest(
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars"
);
assert.equal(parsed.query, "100w led bulbs");
assert.equal(parsed.filters.maxUnitPrice, 4);
assert.equal(parsed.filters.minReviews, 200);
assert.equal(parsed.filters.reviewCountComparison, "gt");
assert.equal(parsed.filters.minRating, 4.5);
assert.equal(parsed.filters.ratingComparison, "gt");
});
it("distinguishes inclusive review and rating phrasing", () => {
const parsed = parseNaturalLanguageRequest("usb c charger at least 500 reviews and 4.3 stars or better");
assert.equal(parsed.query, "usb c charger");
assert.equal(parsed.filters.minReviews, 500);
assert.equal(parsed.filters.reviewCountComparison, "gte");
assert.equal(parsed.filters.minRating, 4.3);
assert.equal(parsed.filters.ratingComparison, "gte");
});
it("extracts limit and max product price phrases", () => {
const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30");
assert.equal(parsed.query, "wireless mouse");
assert.equal(parsed.limit, 5);
assert.equal(parsed.filters.maxPrice, 30);
});
});
@@ -0,0 +1,65 @@
import assert from "node:assert/strict";
import { readFile } from "node:fs/promises";
import { join } from "node:path";
import { describe, it } from "node:test";
import { extractSearchPage } from "../src/search-page.js";
const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html");
describe("extractSearchPage", () => {
it("extracts normalized product candidates from sanitized search HTML", async () => {
const html = await readFile(fixturePath, "utf8");
const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 2);
assert.equal(extracted.products[0]?.asin, "B0TESTLED1");
assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1");
assert.equal(extracted.products[0]?.price?.amount, 18.99);
assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38);
assert.equal(extracted.products[0]?.rating, 4.6);
assert.equal(extracted.products[0]?.reviewCount, 1234);
assert.equal(extracted.products[0]?.delivery?.free, true);
assert.equal(extracted.products[0]?.isSponsored, false);
assert.equal(extracted.products[1]?.isSponsored, true);
assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2");
});
it("detects Amazon challenge pages", () => {
const extracted = extractSearchPage("<html><title>Robot Check</title><body>Enter the characters you see below</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "challenge");
assert.match(extracted.warnings[0] ?? "", /challenge/i);
assert.equal(extracted.products.length, 0);
});
it("returns ok with no products for empty or cardless pages", () => {
const extracted = extractSearchPage("<html><body>No results</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.deepEqual(extracted.products, []);
assert.equal(extracted.nextPageUrl, undefined);
});
it("skips malformed ASINs and cards without titles", () => {
const extracted = extractSearchPage(`
<div data-asin="bad"><h2><a href="/dp/bad">Bad ASIN</a></h2></div>
<div data-asin="B0VALID1234"></div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 0);
});
it("keeps candidates with missing price and records missing price later", () => {
const extracted = extractSearchPage(`
<div data-asin="B0NOPRICE1">
<h2><a href="/dp/B0NOPRICE1">No Price Product</a></h2>
</div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.products.length, 1);
assert.equal(extracted.products[0]?.price, undefined);
});
});
@@ -0,0 +1,46 @@
import assert from "node:assert/strict";
import { mkdtemp, mkdir, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { describe, it } from "node:test";
import { resolveWebAutomationRuntime } from "../src/web-automation-runtime.js";
async function createRuntime() {
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-"));
await writeFile(join(dir, "check-install.js"), "console.log('ok');\n");
await writeFile(join(dir, "package.json"), "{\"type\":\"module\"}\n");
await mkdir(join(dir, "node_modules", ".bin"), { recursive: true });
await writeFile(join(dir, "node_modules", ".bin", "tsx"), "#!/usr/bin/env node\n");
return dir;
}
describe("resolveWebAutomationRuntime", () => {
it("uses AMAZON_SHOPPING_WEB_AUTOMATION_DIR first", async () => {
const runtimeDir = await createRuntime();
const resolved = await resolveWebAutomationRuntime({
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: runtimeDir },
homeDir: "/missing-home",
skillDir: "/missing-skill"
});
assert.equal(resolved.scriptsDir, runtimeDir);
assert.deepEqual(resolved.checkInstall, {
cwd: runtimeDir,
command: "node",
args: ["check-install.js"]
});
});
it("returns a clear error when required files are missing", async () => {
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-missing-"));
await assert.rejects(
() => resolveWebAutomationRuntime({
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: dir },
homeDir: "/missing-home",
skillDir: "/missing-skill"
}),
/check-install.js/
);
});
});