97 lines
2.7 KiB
JavaScript
97 lines
2.7 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import { pathToFileURL } from "node:url";
|
|
|
|
import {
|
|
clickPhotoEntryPoint,
|
|
collectRenderedImageCandidates,
|
|
createPageSession,
|
|
dismissCommonOverlays,
|
|
fail,
|
|
gotoListing,
|
|
normalizeImageCandidates,
|
|
parseTarget,
|
|
scrollUntilSettled,
|
|
sleep,
|
|
waitForPhotoExperience,
|
|
} from "./real-estate-photo-common.js";
|
|
|
|
const HAR_LABELS = [
|
|
/^Show all photos$/i,
|
|
/^View all photos$/i,
|
|
/^All photos$/i,
|
|
/^Photos$/i,
|
|
];
|
|
|
|
async function getAnnouncedPhotoCount(page) {
|
|
return page.evaluate(() => {
|
|
const text = document.body?.innerText || "";
|
|
const match = text.match(/(\d+)\s+photos/i);
|
|
return match ? Number(match[1]) : null;
|
|
});
|
|
}
|
|
|
|
export async function extractHarPhotos(rawUrl) {
|
|
const requestedUrl = parseTarget(rawUrl);
|
|
const { context, page } = await createPageSession({ headless: process.env.HEADLESS !== "false" });
|
|
|
|
try {
|
|
await gotoListing(page, requestedUrl);
|
|
await dismissCommonOverlays(page);
|
|
|
|
const expectedPhotoCount = await getAnnouncedPhotoCount(page);
|
|
const beforeUrl = page.url();
|
|
const clickedLabel = await clickPhotoEntryPoint(page, HAR_LABELS);
|
|
await waitForPhotoExperience(page, beforeUrl);
|
|
await scrollUntilSettled(page);
|
|
await sleep(1200);
|
|
|
|
const candidates = await collectRenderedImageCandidates(page);
|
|
const photos = normalizeImageCandidates(candidates, {
|
|
hostIncludes: ["pics.harstatic.com", "photos.harstatic.com"],
|
|
minWidth: 240,
|
|
minHeight: 180,
|
|
});
|
|
|
|
if (!photos.length) {
|
|
fail("HAR photo extraction failed.", "No large image URLs were found after opening the HAR all-photos view.");
|
|
}
|
|
|
|
const result = {
|
|
source: "har",
|
|
requestedUrl,
|
|
finalUrl: page.url(),
|
|
title: await page.title(),
|
|
clickedLabel,
|
|
expectedPhotoCount,
|
|
complete: expectedPhotoCount ? photos.length >= expectedPhotoCount : true,
|
|
photoCount: photos.length,
|
|
imageUrls: photos.map((photo) => photo.url),
|
|
notes: ["Opened HAR all-photos flow and extracted large rendered image URLs from the photo page."],
|
|
};
|
|
|
|
await context.close();
|
|
return result;
|
|
} catch (error) {
|
|
try {
|
|
await context.close();
|
|
} catch {
|
|
// Ignore close errors after the primary failure.
|
|
}
|
|
throw new Error(error instanceof Error ? error.message : String(error));
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
try {
|
|
const result = await extractHarPhotos(process.argv[2]);
|
|
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
|
} catch (error) {
|
|
fail("HAR photo extraction failed.", error instanceof Error ? error.message : String(error));
|
|
}
|
|
}
|
|
|
|
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
main();
|
|
}
|