feat(amazon-shopping): parse filters and extract search candidates
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { extractSearchPage } from "../src/search-page.js";
|
||||
|
||||
const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html");
|
||||
|
||||
describe("extractSearchPage", () => {
|
||||
it("extracts normalized product candidates from sanitized search HTML", async () => {
|
||||
const html = await readFile(fixturePath, "utf8");
|
||||
const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.equal(extracted.products.length, 2);
|
||||
assert.equal(extracted.products[0]?.asin, "B0TESTLED1");
|
||||
assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1");
|
||||
assert.equal(extracted.products[0]?.price?.amount, 18.99);
|
||||
assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38);
|
||||
assert.equal(extracted.products[0]?.rating, 4.6);
|
||||
assert.equal(extracted.products[0]?.reviewCount, 1234);
|
||||
assert.equal(extracted.products[0]?.delivery?.free, true);
|
||||
assert.equal(extracted.products[0]?.isSponsored, false);
|
||||
assert.equal(extracted.products[1]?.isSponsored, true);
|
||||
assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2");
|
||||
});
|
||||
|
||||
it("detects Amazon challenge pages", () => {
|
||||
const extracted = extractSearchPage("<html><title>Robot Check</title><body>Enter the characters you see below</body></html>", "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "challenge");
|
||||
assert.match(extracted.warnings[0] ?? "", /challenge/i);
|
||||
assert.equal(extracted.products.length, 0);
|
||||
});
|
||||
|
||||
it("returns ok with no products for empty or cardless pages", () => {
|
||||
const extracted = extractSearchPage("<html><body>No results</body></html>", "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.deepEqual(extracted.products, []);
|
||||
assert.equal(extracted.nextPageUrl, undefined);
|
||||
});
|
||||
|
||||
it("skips malformed ASINs and cards without titles", () => {
|
||||
const extracted = extractSearchPage(`
|
||||
<div data-asin="bad"><h2><a href="/dp/bad">Bad ASIN</a></h2></div>
|
||||
<div data-asin="B0VALID1234"></div>
|
||||
`, "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.status, "ok");
|
||||
assert.equal(extracted.products.length, 0);
|
||||
});
|
||||
|
||||
it("keeps candidates with missing price and records missing price later", () => {
|
||||
const extracted = extractSearchPage(`
|
||||
<div data-asin="B0NOPRICE1">
|
||||
<h2><a href="/dp/B0NOPRICE1">No Price Product</a></h2>
|
||||
</div>
|
||||
`, "https://www.amazon.com/s?k=x");
|
||||
|
||||
assert.equal(extracted.products.length, 1);
|
||||
assert.equal(extracted.products[0]?.price, undefined);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user