78 lines
2.8 KiB
TypeScript
78 lines
2.8 KiB
TypeScript
import assert from "node:assert/strict";
|
|
import { readFile } from "node:fs/promises";
|
|
import { join } from "node:path";
|
|
import { describe, it } from "node:test";
|
|
|
|
import { extractDetailPage } from "../src/detail-page.js";
|
|
|
|
const fixturePath = join(import.meta.dirname, "fixtures", "product-detail.html");
|
|
|
|
describe("extractDetailPage", () => {
|
|
it("extracts visible product detail fields from sanitized HTML", async () => {
|
|
const html = await readFile(fixturePath, "utf8");
|
|
const details = extractDetailPage(html, {
|
|
asin: "B0TESTLED1",
|
|
title: "Search title",
|
|
url: "https://www.amazon.com/dp/B0TESTLED1",
|
|
specs: [],
|
|
bullets: [],
|
|
matchedFilters: [],
|
|
missingFields: [],
|
|
extractionNotes: []
|
|
});
|
|
|
|
assert.equal(details.title, "Bright Daylight LED Bulbs 100W Equivalent, 50 Count");
|
|
assert.equal(details.price?.amount, 18.99);
|
|
assert.equal(details.delivery?.free, true);
|
|
assert.equal(details.availability, "In Stock");
|
|
assert.equal(details.seller, "Ships from Amazon.com");
|
|
assert.equal(details.bullets.length, 2);
|
|
assert.deepEqual(details.specs[0], { name: "Brand", value: "BrightCo" });
|
|
assert.equal(details.rating, 4.6);
|
|
assert.equal(details.reviewCount, 1234);
|
|
assert.equal(details.starBreakdown?.five, 72);
|
|
});
|
|
|
|
it("records missing detail-only fields", () => {
|
|
const details = extractDetailPage("<html><body><h1 id=\"productTitle\">Sparse Product</h1></body></html>", {
|
|
asin: "B0SPARSE01",
|
|
title: "Sparse",
|
|
url: "https://www.amazon.com/dp/B0SPARSE01",
|
|
specs: [],
|
|
bullets: [],
|
|
matchedFilters: [],
|
|
missingFields: [],
|
|
extractionNotes: []
|
|
});
|
|
|
|
assert.equal(details.price, undefined);
|
|
assert.ok(details.missingFields.includes("price"));
|
|
assert.ok(details.missingFields.includes("starBreakdown"));
|
|
});
|
|
|
|
it("drops script-like spec rows and trims availability metadata", () => {
|
|
const details = extractDetailPage(`
|
|
<h1 id="productTitle">Messy Product</h1>
|
|
<div id="availability">In Stock {"merchantId":"secretish"}</div>
|
|
<table>
|
|
<tr><td>Special Feature</td><td>(function(P) { tracking(); }) Real feature text</td></tr>
|
|
<tr><td>A19 Add to Cart logShoppableMetrics("x", true)</td><td>Buying Options</td></tr>
|
|
<tr><td>Wattage</td><td>15 watts</td></tr>
|
|
<tr><td>Customer Reviews</td><td>4.7 out of 5 stars tracking payload</td></tr>
|
|
</table>
|
|
`, {
|
|
asin: "B0MESSY001",
|
|
title: "Messy",
|
|
url: "https://www.amazon.com/dp/B0MESSY001",
|
|
specs: [],
|
|
bullets: [],
|
|
matchedFilters: [],
|
|
missingFields: [],
|
|
extractionNotes: []
|
|
});
|
|
|
|
assert.equal(details.availability, "In Stock");
|
|
assert.deepEqual(details.specs, [{ name: "Wattage", value: "15 watts" }]);
|
|
});
|
|
});
|