Files

96 lines
4.0 KiB
TypeScript

import assert from "node:assert/strict";
import { readFile } from "node:fs/promises";
import { join } from "node:path";
import { describe, it } from "node:test";
import { extractSearchPage } from "../src/search-page.js";
const fixturePath = join(import.meta.dirname, "fixtures", "search-results.html");
describe("extractSearchPage", () => {
it("extracts normalized product candidates from sanitized search HTML", async () => {
const html = await readFile(fixturePath, "utf8");
const extracted = extractSearchPage(html, "https://www.amazon.com/s?k=led+bulbs");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 2);
assert.equal(extracted.products[0]?.asin, "B0TESTLED1");
assert.equal(extracted.products[0]?.url, "https://www.amazon.com/dp/B0TESTLED1");
assert.equal(extracted.products[0]?.price?.amount, 18.99);
assert.equal(extracted.products[0]?.unitPrice?.amount, 0.38);
assert.equal(extracted.products[0]?.rating, 4.6);
assert.equal(extracted.products[0]?.reviewCount, 1234);
assert.equal(extracted.products[0]?.delivery?.free, true);
assert.equal(extracted.products[0]?.isSponsored, false);
assert.equal(extracted.products[1]?.isSponsored, true);
assert.equal(extracted.nextPageUrl, "https://www.amazon.com/s?k=led+bulbs&page=2");
});
it("detects Amazon challenge pages", () => {
const extracted = extractSearchPage("<html><title>Robot Check</title><body>Enter the characters you see below</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "challenge");
assert.match(extracted.warnings[0] ?? "", /challenge/i);
assert.equal(extracted.products.length, 0);
});
it("returns ok with no products for empty or cardless pages", () => {
const extracted = extractSearchPage("<html><body>No results</body></html>", "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.deepEqual(extracted.products, []);
assert.equal(extracted.nextPageUrl, undefined);
});
it("skips malformed ASINs and cards without titles", () => {
const extracted = extractSearchPage(`
<div data-asin="bad"><h2><a href="/dp/bad">Bad ASIN</a></h2></div>
<div data-asin="B0VALID1234"></div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.status, "ok");
assert.equal(extracted.products.length, 0);
});
it("keeps candidates with missing price and records missing price later", () => {
const extracted = extractSearchPage(`
<div data-asin="B0NOPRICE1">
<h2><a href="/dp/B0NOPRICE1">No Price Product</a></h2>
</div>
`, "https://www.amazon.com/s?k=x");
assert.equal(extracted.products.length, 1);
assert.equal(extracted.products[0]?.price, undefined);
});
it("detects Prime badges even when visible delivery text omits the word Prime", () => {
const extracted = extractSearchPage(`
<div data-asin="B0PRIME123">
<h2><a href="/dp/B0PRIME123">Prime Sofa Bed</a></h2>
<span class="a-price"><span class="a-offscreen">$299.99</span></span>
<span aria-label="4.4 out of 5 stars"></span>
<span aria-label="246 ratings"></span>
<i class="a-icon a-icon-prime" aria-label="Amazon Prime"></i>
<span>FREE delivery Tomorrow</span>
</div>
`, "https://www.amazon.com/s?k=sofa+bed");
assert.equal(extracted.products.length, 1);
assert.equal(extracted.products[0]?.delivery?.prime, true);
assert.equal(extracted.products[0]?.delivery?.free, true);
assert.match(extracted.products[0]?.delivery?.display ?? "", /Tomorrow/);
});
it("does not treat Prime in a product title as Prime delivery", () => {
const extracted = extractSearchPage(`
<div data-asin="B0TITLE123">
<h2><a href="/dp/B0TITLE123">Prime Sofa Bed</a></h2>
<span>FREE delivery Tomorrow</span>
</div>
`, "https://www.amazon.com/s?k=sofa+bed");
assert.equal(extracted.products.length, 1);
assert.equal(extracted.products[0]?.delivery?.prime, false);
});
});