Files
stef-openclaw-skills/skills/web-automation/scripts/har-photos.js

103 lines
2.9 KiB
JavaScript

#!/usr/bin/env node
import { pathToFileURL } from "node:url";
import {
clickPhotoEntryPoint,
collectRenderedImageCandidates,
createPageSession,
dismissCommonOverlays,
fail,
gotoListing,
normalizeImageCandidates,
parseTarget,
runWithOperationTimeout,
scrollUntilSettled,
sleep,
waitForPhotoExperience,
} from "./real-estate-photo-common.js";
const HAR_LABELS = [
/^Show all photos$/i,
/^View all photos$/i,
/^All photos$/i,
/^Photos$/i,
];
async function getAnnouncedPhotoCount(page) {
return page.evaluate(() => {
const text = document.body?.innerText || "";
const match = text.match(/(\d+)\s+photos/i);
return match ? Number(match[1]) : null;
});
}
export async function extractHarPhotos(rawUrl) {
const requestedUrl = parseTarget(rawUrl);
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
const closeContext = async () => {
await context.close().catch(() => {});
};
try {
return await runWithOperationTimeout(
"HAR photo extraction",
async () => {
await gotoListing(page, requestedUrl);
await dismissCommonOverlays(page);
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
const beforeUrl = page.url();
const clickedLabel = await clickPhotoEntryPoint(page, HAR_LABELS);
await waitForPhotoExperience(page, beforeUrl);
await scrollUntilSettled(page);
await sleep(1200);
const candidates = await collectRenderedImageCandidates(page);
const photos = normalizeImageCandidates(candidates, {
hostIncludes: ["pics.harstatic.com", "photos.harstatic.com"],
minWidth: 240,
minHeight: 180,
});
if (!photos.length) {
fail("HAR photo extraction failed.", "No large image URLs were found after opening the HAR all-photos view.");
}
return {
source: "har",
requestedUrl,
finalUrl: page.url(),
title: await page.title(),
clickedLabel,
expectedPhotoCount,
complete: expectedPhotoCount ? photos.length >= expectedPhotoCount : true,
photoCount: photos.length,
imageUrls: photos.map((photo) => photo.url),
notes: ["Opened HAR all-photos flow and extracted large rendered image URLs from the photo page."],
};
},
{
onTimeout: closeContext
}
);
} catch (error) {
throw new Error(error instanceof Error ? error.message : String(error));
} finally {
await closeContext();
}
}
async function main() {
try {
const result = await extractHarPhotos(process.argv[2]);
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} catch (error) {
fail("HAR photo extraction failed.", error instanceof Error ? error.message : String(error));
}
}
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
main();
}