feat(amazon-shopping): scrape and filter amazon product results
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
|
||||
import { classifyHttpStatus, isPathAllowedByRobots, plannedAmazonPaths } from "../src/browser.js";
|
||||
|
||||
describe("browser compliance helpers", () => {
|
||||
it("plans only search and product-detail paths", () => {
|
||||
assert.deepEqual(plannedAmazonPaths(["B0TEST0001"]), ["/s", "/dp/B0TEST0001", "/gp/product/B0TEST0001"]);
|
||||
});
|
||||
|
||||
it("honors robots disallow rules for planned paths", () => {
|
||||
const robots = `
|
||||
User-agent: *
|
||||
Disallow: /cart
|
||||
Disallow: /product-reviews
|
||||
Disallow: /dp/private
|
||||
`;
|
||||
|
||||
assert.equal(isPathAllowedByRobots(robots, "*", "/s"), true);
|
||||
assert.equal(isPathAllowedByRobots(robots, "*", "/product-reviews/B0TEST0001"), false);
|
||||
assert.equal(isPathAllowedByRobots(robots, "*", "/dp/private/B0TEST0001"), false);
|
||||
});
|
||||
|
||||
it("does not leak disallow rules from other user-agent groups", () => {
|
||||
const robots = `
|
||||
User-agent: specialbot
|
||||
Disallow: /dp
|
||||
|
||||
User-agent: *
|
||||
Disallow: /cart
|
||||
`;
|
||||
|
||||
assert.equal(isPathAllowedByRobots(robots, "*", "/dp/B0TEST0001"), true);
|
||||
assert.equal(isPathAllowedByRobots(robots, "specialbot", "/dp/B0TEST0001"), false);
|
||||
});
|
||||
|
||||
it("classifies retryable and challenge statuses", () => {
|
||||
assert.equal(classifyHttpStatus(429), "retryable");
|
||||
assert.equal(classifyHttpStatus(503), "retryable");
|
||||
assert.equal(classifyHttpStatus(403), "challenge");
|
||||
assert.equal(classifyHttpStatus(200), "ok");
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user