feat(amazon-shopping): parse filters and extract search candidates
This commit is contained in:
@@ -64,6 +64,20 @@ describe("amazon-shopping CLI", () => {
|
||||
assert.equal(parseCliRequest(["usb c cable", "--json", "--markdown"]).output, "both");
|
||||
});
|
||||
|
||||
it("normalizes natural-language filters for the target request", () => {
|
||||
const request = parseCliRequest([
|
||||
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars",
|
||||
"--dry-run"
|
||||
]);
|
||||
|
||||
assert.equal(request.query, "100w led bulbs");
|
||||
assert.equal(request.filters.maxUnitPrice, 4);
|
||||
assert.equal(request.filters.minReviews, 200);
|
||||
assert.equal(request.filters.reviewCountComparison, "gt");
|
||||
assert.equal(request.filters.minRating, 4.5);
|
||||
assert.equal(request.filters.ratingComparison, "gt");
|
||||
});
|
||||
|
||||
it("rejects limits below one", () => {
|
||||
assert.throws(
|
||||
() => parseCliRequest(["usb c cable", "--limit", "0"]),
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
# Fixtures
|
||||
|
||||
Fixtures in this directory are hand-crafted sanitized HTML snippets. They are not live Amazon snapshots and contain no cookies, account details, delivery location, scripts, tracking identifiers, or browser profile data.
|
||||
@@ -0,0 +1,23 @@
|
||||
<!-- Hand-crafted sanitized fixture. Not a live Amazon snapshot. -->
|
||||
<html>
|
||||
<body>
|
||||
<div data-component-type="s-search-result" data-asin="B0TESTLED1">
|
||||
<h2><a class="a-link-normal s-line-clamp-2" href="/Bright-Daylight-Equivalent/dp/B0TESTLED1/ref=sr_1_1">Bright Daylight 100W Equivalent LED Bulbs, 50 Count</a></h2>
|
||||
<span class="a-price"><span class="a-offscreen">$18.99</span></span>
|
||||
<span class="a-size-base a-color-secondary">$0.38/Count</span>
|
||||
<span aria-label="4.6 out of 5 stars"></span>
|
||||
<a aria-label="1,234 ratings"></a>
|
||||
<div class="a-row a-size-base a-color-secondary">FREE delivery Tomorrow</div>
|
||||
<img class="s-image" src="https://m.media-amazon.com/images/I/test-led.jpg" />
|
||||
</div>
|
||||
<div data-component-type="s-search-result" data-asin="B0TESTLED2">
|
||||
<span>Sponsored</span>
|
||||
<h2><a href="https://www.amazon.com/gp/product/B0TESTLED2">Value LED Bulbs Soft White, Pack of 24</a></h2>
|
||||
<span class="a-price"><span class="a-offscreen">$21.99</span></span>
|
||||
<span aria-label="4.3 out of 5 stars"></span>
|
||||
<a aria-label="543 ratings"></a>
|
||||
<div>Delivery Friday</div>
|
||||
</div>
|
||||
<a class="s-pagination-next" href="/s?k=led+bulbs&page=2">Next</a>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,75 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import {
|
||||
extractUnitCount,
|
||||
parseMoney,
|
||||
parseRating,
|
||||
parseReviewCount,
|
||||
parseStarBreakdown,
|
||||
parseUnitPrice
|
||||
} from "../src/parsers.js";
|
||||
|
||||
describe("parsers", () => {
|
||||
it("parses USD money", () => {
|
||||
assert.deepEqual(parseMoney("$19.99"), { amount: 19.99, currency: "USD", display: "$19.99" });
|
||||
});
|
||||
|
||||
it("parses rating text", () => {
|
||||
assert.equal(parseRating("4.6 out of 5 stars"), 4.6);
|
||||
});
|
||||
|
||||
it("parses review count text", () => {
|
||||
assert.equal(parseReviewCount("1,234 ratings"), 1234);
|
||||
});
|
||||
|
||||
it("parses visible star histogram percentages", () => {
|
||||
assert.deepEqual(parseStarBreakdown("5 star 72% 4 star 15% 3 star 7% 2 star 3% 1 star 3%"), {
|
||||
five: 72,
|
||||
four: 15,
|
||||
three: 7,
|
||||
two: 3,
|
||||
one: 3,
|
||||
basis: "percent"
|
||||
});
|
||||
});
|
||||
|
||||
it("extracts high-confidence unit counts", () => {
|
||||
assert.deepEqual(extractUnitCount("LED bulbs, 100 Count, daylight"), {
|
||||
count: 100,
|
||||
confidence: "high",
|
||||
source: "100 Count"
|
||||
});
|
||||
assert.deepEqual(extractUnitCount("Pack of 6 USB-C cables"), {
|
||||
count: 6,
|
||||
confidence: "high",
|
||||
source: "Pack of 6"
|
||||
});
|
||||
});
|
||||
|
||||
it("distinguishes lower-confidence unit count phrases", () => {
|
||||
assert.deepEqual(extractUnitCount("Set of 8 replacement filters"), {
|
||||
count: 8,
|
||||
confidence: "medium",
|
||||
source: "Set of 8"
|
||||
});
|
||||
assert.deepEqual(extractUnitCount("6 bulbs soft white"), {
|
||||
count: 6,
|
||||
confidence: "low",
|
||||
source: "6 bulbs"
|
||||
});
|
||||
});
|
||||
|
||||
it("parses visible unit prices", () => {
|
||||
assert.deepEqual(parseUnitPrice("$0.33/Count"), {
|
||||
amount: 0.33,
|
||||
currency: "USD",
|
||||
display: "$0.33/Count"
|
||||
});
|
||||
});
|
||||
|
||||
it("parses whole-dollar and one-decimal prices", () => {
|
||||
assert.deepEqual(parseMoney("$20"), { amount: 20, currency: "USD", display: "$20" });
|
||||
assert.deepEqual(parseMoney("$19.9"), { amount: 19.9, currency: "USD", display: "$19.9" });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,37 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { parseNaturalLanguageRequest } from "../src/query-parser.js";
|
||||
|
||||
describe("parseNaturalLanguageRequest", () => {
|
||||
it("extracts the target LED bulb filters from natural language", () => {
|
||||
const parsed = parseNaturalLanguageRequest(
|
||||
"100w led bulbs that cost less than $4 each and have over 200 reviews with a review score of more than 4.5 stars"
|
||||
);
|
||||
|
||||
assert.equal(parsed.query, "100w led bulbs");
|
||||
assert.equal(parsed.filters.maxUnitPrice, 4);
|
||||
assert.equal(parsed.filters.minReviews, 200);
|
||||
assert.equal(parsed.filters.reviewCountComparison, "gt");
|
||||
assert.equal(parsed.filters.minRating, 4.5);
|
||||
assert.equal(parsed.filters.ratingComparison, "gt");
|
||||
});
|
||||
|
||||
it("distinguishes inclusive review and rating phrasing", () => {
|
||||
const parsed = parseNaturalLanguageRequest("usb c charger at least 500 reviews and 4.3 stars or better");
|
||||
|
||||
assert.equal(parsed.query, "usb c charger");
|
||||
assert.equal(parsed.filters.minReviews, 500);
|
||||
assert.equal(parsed.filters.reviewCountComparison, "gte");
|
||||
assert.equal(parsed.filters.minRating, 4.3);
|
||||
assert.equal(parsed.filters.ratingComparison, "gte");
|
||||
});
|
||||
|
||||
it("extracts limit and max product price phrases", () => {
|
||||
const parsed = parseNaturalLanguageRequest("return 5 wireless mouse under $30");
|
||||
|
||||
assert.equal(parsed.query, "wireless mouse");
|
||||
assert.equal(parsed.limit, 5);
|
||||
assert.equal(parsed.filters.maxPrice, 30);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,65 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { extractSearchPage } from "../src/search-page.js";
|
||||
|
||||
const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html");
|
||||
|
||||
describe("extractSearchPage", () => {
|
||||
it("extracts normalized product candidates from sanitized search HTML", async () => {
|
||||
const html = await readFile(fixturePath, "utf8");
|
||||
const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.equal(extracted.products.length, 2);
|
||||
assert.equal(extracted.products[0]?.asin, "B0TESTLED1");
|
||||
assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1");
|
||||
assert.equal(extracted.products[0]?.price?.amount, 18.99);
|
||||
assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38);
|
||||
assert.equal(extracted.products[0]?.rating, 4.6);
|
||||
assert.equal(extracted.products[0]?.reviewCount, 1234);
|
||||
assert.equal(extracted.products[0]?.delivery?.free, true);
|
||||
assert.equal(extracted.products[0]?.isSponsored, false);
|
||||
assert.equal(extracted.products[1]?.isSponsored, true);
|
||||
assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2");
|
||||
});
|
||||
|
||||
it("detects Amazon challenge pages", () => {
|
||||
const extracted = extractSearchPage("<html><title>Robot Check</title><body>Enter the characters you see below</body></html>", "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "challenge");
|
||||
assert.match(extracted.warnings[0] ?? "", /challenge/i);
|
||||
assert.equal(extracted.products.length, 0);
|
||||
});
|
||||
|
||||
it("returns ok with no products for empty or cardless pages", () => {
|
||||
const extracted = extractSearchPage("<html><body>No results</body></html>", "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.deepEqual(extracted.products, []);
|
||||
assert.equal(extracted.nextPageUrl, undefined);
|
||||
});
|
||||
|
||||
it("skips malformed ASINs and cards without titles", () => {
|
||||
const extracted = extractSearchPage(`
|
||||
<div data-asin="bad"><h2><a href="/dp/bad">Bad ASIN</a></h2></div>
|
||||
<div data-asin="B0VALID1234"></div>
|
||||
`, "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.equal(extracted.products.length, 0);
|
||||
});
|
||||
|
||||
it("keeps candidates with missing price and records missing price later", () => {
|
||||
const extracted = extractSearchPage(`
|
||||
<div data-asin="B0NOPRICE1">
|
||||
<h2><a href="/dp/B0NOPRICE1">No Price Product</a></h2>
|
||||
</div>
|
||||
`, "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.products.length, 1);
|
||||
assert.equal(extracted.products[0]?.price, undefined);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,46 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdtemp, mkdir, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { resolveWebAutomationRuntime } from "../src/web-automation-runtime.js";
|
||||
|
||||
async function createRuntime() {
|
||||
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-"));
|
||||
await writeFile(join(dir, "check-install.js"), "console.log('ok');\n");
|
||||
await writeFile(join(dir, "package.json"), "{\"type\":\"module\"}\n");
|
||||
await mkdir(join(dir, "node_modules", ".bin"), { recursive: true });
|
||||
await writeFile(join(dir, "node_modules", ".bin", "tsx"), "#!/usr/bin/env node\n");
|
||||
return dir;
|
||||
}
|
||||
|
||||
describe("resolveWebAutomationRuntime", () => {
|
||||
it("uses AMAZON_SHOPPING_WEB_AUTOMATION_DIR first", async () => {
|
||||
const runtimeDir = await createRuntime();
|
||||
const resolved = await resolveWebAutomationRuntime({
|
||||
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: runtimeDir },
|
||||
homeDir: "/missing-home",
|
||||
skillDir: "/missing-skill"
|
||||
});
|
||||
|
||||
assert.equal(resolved.scriptsDir, runtimeDir);
|
||||
assert.deepEqual(resolved.checkInstall, {
|
||||
cwd: runtimeDir,
|
||||
command: "node",
|
||||
args: ["check-install.js"]
|
||||
});
|
||||
});
|
||||
|
||||
it("returns a clear error when required files are missing", async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), "amazon-shopping-runtime-missing-"));
|
||||
await assert.rejects(
|
||||
() => resolveWebAutomationRuntime({
|
||||
env: { AMAZON_SHOPPING_WEB_AUTOMATION_DIR: dir },
|
||||
homeDir: "/missing-home",
|
||||
skillDir: "/missing-skill"
|
||||
}),
|
||||
/check-install.js/
|
||||
);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user