feat(amazon-shopping): scrape and filter amazon product results
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { extractDetailPage } from "../src/detail-page.js";
|
||||
|
||||
const fixturePath = join(import.meta.dirname, "fixtures", "product-detail.html");
|
||||
|
||||
describe("extractDetailPage", () => {
|
||||
it("extracts visible product detail fields from sanitized HTML", async () => {
|
||||
const html = await readFile(fixturePath, "utf8");
|
||||
const details = extractDetailPage(html, {
|
||||
asin: "B0TESTLED1",
|
||||
title: "Search title",
|
||||
url: "https://www.amazon.com/dp/B0TESTLED1",
|
||||
specs: [],
|
||||
bullets: [],
|
||||
matchedFilters: [],
|
||||
missingFields: [],
|
||||
extractionNotes: []
|
||||
});
|
||||
|
||||
assert.equal(details.title, "Bright Daylight LED Bulbs 100W Equivalent, 50 Count");
|
||||
assert.equal(details.price?.amount, 18.99);
|
||||
assert.equal(details.delivery?.free, true);
|
||||
assert.equal(details.availability, "In Stock");
|
||||
assert.equal(details.seller, "Ships from Amazon.com");
|
||||
assert.equal(details.bullets.length, 2);
|
||||
assert.deepEqual(details.specs[0], { name: "Brand", value: "BrightCo" });
|
||||
assert.equal(details.rating, 4.6);
|
||||
assert.equal(details.reviewCount, 1234);
|
||||
assert.equal(details.starBreakdown?.five, 72);
|
||||
});
|
||||
|
||||
it("records missing detail-only fields", () => {
|
||||
const details = extractDetailPage("<html><body><h1 id=\"productTitle\">Sparse Product</h1></body></html>", {
|
||||
asin: "B0SPARSE01",
|
||||
title: "Sparse",
|
||||
url: "https://www.amazon.com/dp/B0SPARSE01",
|
||||
specs: [],
|
||||
bullets: [],
|
||||
matchedFilters: [],
|
||||
missingFields: [],
|
||||
extractionNotes: []
|
||||
});
|
||||
|
||||
assert.equal(details.price, undefined);
|
||||
assert.ok(details.missingFields.includes("price"));
|
||||
assert.ok(details.missingFields.includes("starBreakdown"));
|
||||
});
|
||||
|
||||
it("drops script-like spec rows and trims availability metadata", () => {
|
||||
const details = extractDetailPage(`
|
||||
<h1 id="productTitle">Messy Product</h1>
|
||||
<div id="availability">In Stock {"merchantId":"secretish"}</div>
|
||||
<table>
|
||||
<tr><td>Special Feature</td><td>(function(P) { tracking(); }) Real feature text</td></tr>
|
||||
<tr><td>A19 Add to Cart logShoppableMetrics("x", true)</td><td>Buying Options</td></tr>
|
||||
<tr><td>Wattage</td><td>15 watts</td></tr>
|
||||
<tr><td>Customer Reviews</td><td>4.7 out of 5 stars tracking payload</td></tr>
|
||||
</table>
|
||||
`, {
|
||||
asin: "B0MESSY001",
|
||||
title: "Messy",
|
||||
url: "https://www.amazon.com/dp/B0MESSY001",
|
||||
specs: [],
|
||||
bullets: [],
|
||||
matchedFilters: [],
|
||||
missingFields: [],
|
||||
extractionNotes: []
|
||||
});
|
||||
|
||||
assert.equal(details.availability, "In Stock");
|
||||
assert.deepEqual(details.specs, [{ name: "Wattage", value: "15 watts" }]);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user