feat(web-automation): implement milestone M2 mirror and docs
This commit is contained in:
@@ -41,8 +41,8 @@ function getCredentials(options?: {
|
||||
username?: string;
|
||||
password?: string;
|
||||
}): { username: string; password: string } | null {
|
||||
const username = options?.username || process.env.CAMOUFOX_USERNAME;
|
||||
const password = options?.password || process.env.CAMOUFOX_PASSWORD;
|
||||
const username = options?.username || process.env.CLOAKBROWSER_USERNAME;
|
||||
const password = options?.password || process.env.CLOAKBROWSER_PASSWORD;
|
||||
|
||||
if (!username || !password) {
|
||||
return null;
|
||||
@@ -450,7 +450,7 @@ export async function navigateAuthenticated(
|
||||
if (!credentials) {
|
||||
throw new Error(
|
||||
'Authentication required but no credentials provided. ' +
|
||||
'Set CAMOUFOX_USERNAME and CAMOUFOX_PASSWORD environment variables.'
|
||||
'Set CLOAKBROWSER_USERNAME and CLOAKBROWSER_PASSWORD environment variables.'
|
||||
);
|
||||
}
|
||||
|
||||
@@ -504,8 +504,8 @@ Usage:
|
||||
Options:
|
||||
-u, --url <url> URL to authenticate (required)
|
||||
-t, --type <type> Auth type: auto, form, or msal (default: auto)
|
||||
--username <user> Username/email (or set CAMOUFOX_USERNAME env var)
|
||||
--password <pass> Password (or set CAMOUFOX_PASSWORD env var)
|
||||
--username <user> Username/email (or set CLOAKBROWSER_USERNAME env var)
|
||||
--password <pass> Password (or set CLOAKBROWSER_PASSWORD env var)
|
||||
--headless <bool> Run in headless mode (default: false for auth)
|
||||
-h, --help Show this help message
|
||||
|
||||
@@ -515,8 +515,8 @@ Auth Types:
|
||||
msal Microsoft SSO (login.microsoftonline.com)
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_USERNAME Default username/email for authentication
|
||||
CAMOUFOX_PASSWORD Default password for authentication
|
||||
CLOAKBROWSER_USERNAME Default username/email for authentication
|
||||
CLOAKBROWSER_PASSWORD Default password for authentication
|
||||
|
||||
Examples:
|
||||
# Interactive login (no credentials, opens browser)
|
||||
@@ -527,11 +527,11 @@ Examples:
|
||||
--username "user@example.com" --password "secret"
|
||||
|
||||
# Microsoft SSO login
|
||||
CAMOUFOX_USERNAME=user@company.com CAMOUFOX_PASSWORD=secret \\
|
||||
CLOAKBROWSER_USERNAME=user@company.com CLOAKBROWSER_PASSWORD=secret \\
|
||||
npx tsx auth.ts --url "https://internal.company.com" --type msal
|
||||
|
||||
Notes:
|
||||
- Session is saved to ~/.camoufox-profile/ for persistence
|
||||
- Session is saved to ~/.cloakbrowser-profile/ for persistence
|
||||
- After successful auth, subsequent browses will be authenticated
|
||||
- Use --headless false if you need to handle MFA manually
|
||||
`);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Browser launcher using Camoufox with persistent profile
|
||||
* Browser launcher using CloakBrowser with persistent profile
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx browse.ts --url "https://example.com"
|
||||
@@ -9,14 +9,13 @@
|
||||
* npx tsx browse.ts --url "https://example.com" --headless false --wait 5000
|
||||
*/
|
||||
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { launchPersistentContext } from 'cloakbrowser';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { existsSync, mkdirSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
|
||||
// Types
|
||||
interface BrowseOptions {
|
||||
url: string;
|
||||
headless?: boolean;
|
||||
@@ -33,55 +32,54 @@ interface BrowseResult {
|
||||
screenshotPath?: string;
|
||||
}
|
||||
|
||||
// Get profile directory
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
const getProfilePath = (): string => {
|
||||
const customPath = process.env.CAMOUFOX_PROFILE_PATH;
|
||||
const customPath = process.env.CLOAKBROWSER_PROFILE_PATH;
|
||||
if (customPath) return customPath;
|
||||
|
||||
const profileDir = join(homedir(), '.camoufox-profile');
|
||||
const profileDir = join(homedir(), '.cloakbrowser-profile');
|
||||
if (!existsSync(profileDir)) {
|
||||
mkdirSync(profileDir, { recursive: true });
|
||||
}
|
||||
return profileDir;
|
||||
};
|
||||
|
||||
// Launch browser with persistent profile
|
||||
export async function launchBrowser(options: {
|
||||
headless?: boolean;
|
||||
}): Promise<BrowserContext> {
|
||||
const profilePath = getProfilePath();
|
||||
const headless =
|
||||
options.headless ??
|
||||
(process.env.CAMOUFOX_HEADLESS ? process.env.CAMOUFOX_HEADLESS === 'true' : true);
|
||||
const envHeadless = process.env.CLOAKBROWSER_HEADLESS;
|
||||
const headless = options.headless ?? (envHeadless ? envHeadless === 'true' : true);
|
||||
|
||||
console.log(`Using profile: ${profilePath}`);
|
||||
console.log(`Headless mode: ${headless}`);
|
||||
|
||||
const browser = await Camoufox({
|
||||
user_data_dir: profilePath,
|
||||
const context = await launchPersistentContext({
|
||||
userDataDir: profilePath,
|
||||
headless,
|
||||
humanize: true,
|
||||
});
|
||||
|
||||
return browser;
|
||||
return context;
|
||||
}
|
||||
|
||||
// Browse to URL and optionally take screenshot
|
||||
export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless });
|
||||
const page = await browser.newPage();
|
||||
const page = browser.pages()[0] || await browser.newPage();
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: options.timeout ?? 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
await sleep(options.wait);
|
||||
}
|
||||
|
||||
const result: BrowseResult = {
|
||||
@@ -92,7 +90,6 @@ export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
console.log(`Page title: ${result.title}`);
|
||||
console.log(`Final URL: ${result.url}`);
|
||||
|
||||
// Take screenshot if requested
|
||||
if (options.screenshot) {
|
||||
const outputPath = options.output ?? 'screenshot.png';
|
||||
await page.screenshot({ path: outputPath, fullPage: true });
|
||||
@@ -100,11 +97,10 @@ export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
console.log(`Screenshot saved: ${outputPath}`);
|
||||
}
|
||||
|
||||
// If interactive mode, keep browser open
|
||||
if (options.interactive) {
|
||||
console.log('\nInteractive mode - browser will stay open.');
|
||||
console.log('Press Ctrl+C to close.');
|
||||
await new Promise(() => {}); // Keep running
|
||||
await new Promise(() => {});
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -115,16 +111,14 @@ export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
}
|
||||
}
|
||||
|
||||
// Export page for use in other scripts
|
||||
export async function getPage(options?: {
|
||||
headless?: boolean;
|
||||
}): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const browser = await launchBrowser({ headless: options?.headless });
|
||||
const page = await browser.newPage();
|
||||
const page = browser.pages()[0] || await browser.newPage();
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'output'],
|
||||
@@ -145,7 +139,7 @@ async function main() {
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Browser with Camoufox
|
||||
Web Browser with CloakBrowser
|
||||
|
||||
Usage:
|
||||
npx tsx browse.ts --url <url> [options]
|
||||
@@ -166,8 +160,8 @@ Examples:
|
||||
npx tsx browse.ts --url "https://example.com" --headless false --interactive
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_PROFILE_PATH Custom profile directory (default: ~/.camoufox-profile/)
|
||||
CAMOUFOX_HEADLESS Default headless mode (true/false)
|
||||
CLOAKBROWSER_PROFILE_PATH Custom profile directory (default: ~/.cloakbrowser-profile/)
|
||||
CLOAKBROWSER_HEADLESS Default headless mode (true/false)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
@@ -188,7 +182,6 @@ Environment Variables:
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('browse.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
|
||||
49
skills/web-automation/claude-code/scripts/check-install.js
Normal file
49
skills/web-automation/claude-code/scripts/check-install.js
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const referencePath = path.join(__dirname, "reference-source.json");
|
||||
|
||||
function fail(message, details) {
|
||||
const payload = { error: message };
|
||||
if (details) payload.details = details;
|
||||
process.stderr.write(`${JSON.stringify(payload)}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await import("cloakbrowser");
|
||||
await import("playwright-core");
|
||||
} catch (error) {
|
||||
fail(
|
||||
"Missing dependency/config: web-automation requires cloakbrowser and playwright-core.",
|
||||
error instanceof Error ? error.message : String(error)
|
||||
);
|
||||
}
|
||||
|
||||
const browsePath = path.join(__dirname, "browse.ts");
|
||||
const browseSource = fs.readFileSync(browsePath, "utf8");
|
||||
if (!/launchPersistentContext/.test(browseSource) || !/from ['"]cloakbrowser['"]/.test(browseSource)) {
|
||||
fail("browse.ts is not configured for CloakBrowser.");
|
||||
}
|
||||
|
||||
const referenceSource = JSON.parse(fs.readFileSync(referencePath, "utf8"));
|
||||
if (!referenceSource.referenceRepo || !referenceSource.referenceCommit) {
|
||||
fail("Frozen reference metadata is missing from reference-source.json.");
|
||||
}
|
||||
|
||||
process.stdout.write("OK: cloakbrowser + playwright-core installed\n");
|
||||
process.stdout.write("OK: CloakBrowser integration detected in browse.ts\n");
|
||||
process.stdout.write(
|
||||
`OK: frozen reference ${referenceSource.referenceRepo}@${referenceSource.referenceCommit}\n`
|
||||
);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
fail("Install check failed.", error instanceof Error ? error.message : String(error));
|
||||
});
|
||||
188
skills/web-automation/claude-code/scripts/extract.js
Executable file
188
skills/web-automation/claude-code/scripts/extract.js
Executable file
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const DEFAULT_WAIT_MS = 5000;
|
||||
const MAX_WAIT_MS = 20000;
|
||||
const NAV_TIMEOUT_MS = 30000;
|
||||
const EXTRA_CHALLENGE_WAIT_MS = 8000;
|
||||
const CONTENT_LIMIT = 12000;
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
function fail(message, details) {
|
||||
const payload = { error: message };
|
||||
if (details) payload.details = details;
|
||||
process.stderr.write(`${JSON.stringify(payload)}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function parseWaitTime(raw) {
|
||||
const value = Number.parseInt(raw || `${DEFAULT_WAIT_MS}`, 10);
|
||||
if (!Number.isFinite(value) || value < 0) return DEFAULT_WAIT_MS;
|
||||
return Math.min(value, MAX_WAIT_MS);
|
||||
}
|
||||
|
||||
function parseTarget(rawUrl) {
|
||||
if (!rawUrl) {
|
||||
fail("Missing URL. Usage: node extract.js <URL>");
|
||||
}
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = new URL(rawUrl);
|
||||
} catch (error) {
|
||||
fail("Invalid URL.", error.message);
|
||||
}
|
||||
|
||||
if (!["http:", "https:"].includes(parsed.protocol)) {
|
||||
fail("Only http and https URLs are allowed.");
|
||||
}
|
||||
|
||||
return parsed.toString();
|
||||
}
|
||||
|
||||
function ensureParentDir(filePath) {
|
||||
if (!filePath) return;
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function detectChallenge(page) {
|
||||
try {
|
||||
return await page.evaluate(() => {
|
||||
const text = (document.body?.innerText || "").toLowerCase();
|
||||
return (
|
||||
text.includes("checking your browser") ||
|
||||
text.includes("just a moment") ||
|
||||
text.includes("verify you are human") ||
|
||||
text.includes("press and hold") ||
|
||||
document.querySelector('iframe[src*="challenge"]') !== null ||
|
||||
document.querySelector('iframe[src*="cloudflare"]') !== null
|
||||
);
|
||||
});
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function loadCloakBrowser() {
|
||||
try {
|
||||
return await import("cloakbrowser");
|
||||
} catch (error) {
|
||||
fail(
|
||||
"CloakBrowser is not installed for this skill. Run pnpm install in this skill's scripts directory first.",
|
||||
error.message
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function runWithStderrLogs(fn) {
|
||||
const originalLog = console.log;
|
||||
const originalError = console.error;
|
||||
console.log = (...args) => process.stderr.write(`${args.join(" ")}\n`);
|
||||
console.error = (...args) => process.stderr.write(`${args.join(" ")}\n`);
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
console.log = originalLog;
|
||||
console.error = originalError;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const requestedUrl = parseTarget(process.argv[2]);
|
||||
const waitTime = parseWaitTime(process.env.WAIT_TIME);
|
||||
const screenshotPath = process.env.SCREENSHOT_PATH || "";
|
||||
const saveHtml = process.env.SAVE_HTML === "true";
|
||||
const headless = process.env.HEADLESS !== "false";
|
||||
const userAgent = process.env.USER_AGENT || undefined;
|
||||
const startedAt = Date.now();
|
||||
const { ensureBinary, launchContext } = await loadCloakBrowser();
|
||||
|
||||
let context;
|
||||
try {
|
||||
await runWithStderrLogs(() => ensureBinary());
|
||||
|
||||
context = await runWithStderrLogs(() => launchContext({
|
||||
headless,
|
||||
userAgent,
|
||||
locale: "en-US",
|
||||
viewport: { width: 1440, height: 900 },
|
||||
humanize: true,
|
||||
}));
|
||||
|
||||
const page = await context.newPage();
|
||||
const response = await page.goto(requestedUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: NAV_TIMEOUT_MS
|
||||
});
|
||||
|
||||
await sleep(waitTime);
|
||||
|
||||
let challengeDetected = await detectChallenge(page);
|
||||
if (challengeDetected) {
|
||||
await sleep(EXTRA_CHALLENGE_WAIT_MS);
|
||||
challengeDetected = await detectChallenge(page);
|
||||
}
|
||||
|
||||
const extracted = await page.evaluate((contentLimit) => {
|
||||
const bodyText = document.body?.innerText || "";
|
||||
return {
|
||||
finalUrl: window.location.href,
|
||||
title: document.title || "",
|
||||
content: bodyText.slice(0, contentLimit),
|
||||
metaDescription:
|
||||
document.querySelector('meta[name="description"]')?.content ||
|
||||
document.querySelector('meta[property="og:description"]')?.content ||
|
||||
""
|
||||
};
|
||||
}, CONTENT_LIMIT);
|
||||
|
||||
const result = {
|
||||
requestedUrl,
|
||||
finalUrl: extracted.finalUrl,
|
||||
title: extracted.title,
|
||||
content: extracted.content,
|
||||
metaDescription: extracted.metaDescription,
|
||||
status: response ? response.status() : null,
|
||||
challengeDetected,
|
||||
elapsedSeconds: ((Date.now() - startedAt) / 1000).toFixed(2)
|
||||
};
|
||||
|
||||
if (screenshotPath) {
|
||||
ensureParentDir(screenshotPath);
|
||||
await page.screenshot({ path: screenshotPath, fullPage: false, timeout: 10000 });
|
||||
result.screenshot = screenshotPath;
|
||||
}
|
||||
|
||||
if (saveHtml) {
|
||||
const htmlTarget = screenshotPath
|
||||
? screenshotPath.replace(/\.[^.]+$/, ".html")
|
||||
: path.resolve(__dirname, `page-${Date.now()}.html`);
|
||||
ensureParentDir(htmlTarget);
|
||||
fs.writeFileSync(htmlTarget, await page.content());
|
||||
result.htmlFile = htmlTarget;
|
||||
}
|
||||
|
||||
process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
await context.close();
|
||||
} catch (error) {
|
||||
if (context) {
|
||||
try {
|
||||
await context.close();
|
||||
} catch {
|
||||
// Ignore close errors after the primary failure.
|
||||
}
|
||||
}
|
||||
fail("Scrape failed.", error.message);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
329
skills/web-automation/claude-code/scripts/flow.ts
Normal file
329
skills/web-automation/claude-code/scripts/flow.ts
Normal file
@@ -0,0 +1,329 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page } from 'playwright-core';
|
||||
import { launchBrowser } from './browse';
|
||||
|
||||
type Step =
|
||||
| { action: 'goto'; url: string }
|
||||
| { action: 'click'; selector?: string; text?: string; role?: string; name?: string }
|
||||
| { action: 'type'; selector?: string; text: string }
|
||||
| { action: 'press'; key: string; selector?: string }
|
||||
| { action: 'wait'; ms: number }
|
||||
| { action: 'screenshot'; path: string }
|
||||
| { action: 'extract'; selector: string; count?: number };
|
||||
|
||||
function normalizeNavigationUrl(rawUrl: string): string {
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(rawUrl);
|
||||
} catch {
|
||||
throw new Error(`Invalid navigation URL: ${rawUrl}`);
|
||||
}
|
||||
|
||||
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
||||
throw new Error(`Only http and https URLs are allowed in flow steps: ${rawUrl}`);
|
||||
}
|
||||
|
||||
return parsed.toString();
|
||||
}
|
||||
|
||||
function normalizeKey(k: string): string {
|
||||
if (!k) return 'Enter';
|
||||
const lower = k.toLowerCase();
|
||||
if (lower === 'enter' || lower === 'return') return 'Enter';
|
||||
if (lower === 'tab') return 'Tab';
|
||||
if (lower === 'escape' || lower === 'esc') return 'Escape';
|
||||
return k;
|
||||
}
|
||||
|
||||
function splitInstructions(instruction: string): string[] {
|
||||
return instruction
|
||||
.split(/\bthen\b|;/gi)
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function parseInstruction(instruction: string): Step[] {
|
||||
const parts = splitInstructions(instruction);
|
||||
const steps: Step[] = [];
|
||||
|
||||
for (const p of parts) {
|
||||
// go to https://...
|
||||
const goto = p.match(/^(?:go to|open|navigate to)\s+(https?:\/\/\S+)/i);
|
||||
if (goto) {
|
||||
steps.push({ action: 'goto', url: normalizeNavigationUrl(goto[1]) });
|
||||
continue;
|
||||
}
|
||||
|
||||
// click on "text" or click #selector or click button "name"
|
||||
const clickRole = p.match(/^click\s+(button|link|textbox|img|image|tab)\s+"([^"]+)"$/i);
|
||||
if (clickRole) {
|
||||
const role = clickRole[1].toLowerCase() === 'image' ? 'img' : clickRole[1].toLowerCase();
|
||||
steps.push({ action: 'click', role, name: clickRole[2] });
|
||||
continue;
|
||||
}
|
||||
const clickText = p.match(/^click(?: on)?\s+"([^"]+)"/i);
|
||||
if (clickText) {
|
||||
steps.push({ action: 'click', text: clickText[1] });
|
||||
continue;
|
||||
}
|
||||
const clickSelector = p.match(/^click(?: on)?\s+(#[\w-]+|\.[\w-]+|[a-z]+\[[^\]]+\])/i);
|
||||
if (clickSelector) {
|
||||
steps.push({ action: 'click', selector: clickSelector[1] });
|
||||
continue;
|
||||
}
|
||||
|
||||
// type "text" [in selector]
|
||||
const typeInto = p.match(/^type\s+"([^"]+)"\s+in\s+(.+)$/i);
|
||||
if (typeInto) {
|
||||
steps.push({ action: 'type', text: typeInto[1], selector: typeInto[2].trim() });
|
||||
continue;
|
||||
}
|
||||
const typeOnly = p.match(/^type\s+"([^"]+)"$/i);
|
||||
if (typeOnly) {
|
||||
steps.push({ action: 'type', text: typeOnly[1] });
|
||||
continue;
|
||||
}
|
||||
|
||||
// press enter [in selector]
|
||||
const pressIn = p.match(/^press\s+(\w+)\s+in\s+(.+)$/i);
|
||||
if (pressIn) {
|
||||
steps.push({ action: 'press', key: normalizeKey(pressIn[1]), selector: pressIn[2].trim() });
|
||||
continue;
|
||||
}
|
||||
const pressOnly = p.match(/^press\s+(\w+)$/i);
|
||||
if (pressOnly) {
|
||||
steps.push({ action: 'press', key: normalizeKey(pressOnly[1]) });
|
||||
continue;
|
||||
}
|
||||
|
||||
// wait 2s / wait 500ms
|
||||
const waitS = p.match(/^wait\s+(\d+)\s*s(?:ec(?:onds?)?)?$/i);
|
||||
if (waitS) {
|
||||
steps.push({ action: 'wait', ms: parseInt(waitS[1], 10) * 1000 });
|
||||
continue;
|
||||
}
|
||||
const waitMs = p.match(/^wait\s+(\d+)\s*ms$/i);
|
||||
if (waitMs) {
|
||||
steps.push({ action: 'wait', ms: parseInt(waitMs[1], 10) });
|
||||
continue;
|
||||
}
|
||||
|
||||
// screenshot path
|
||||
const shot = p.match(/^screenshot(?: to)?\s+(.+)$/i);
|
||||
if (shot) {
|
||||
steps.push({ action: 'screenshot', path: shot[1].trim() });
|
||||
continue;
|
||||
}
|
||||
|
||||
throw new Error(`Could not parse step: "${p}"`);
|
||||
}
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
function validateSteps(steps: Step[]): Step[] {
|
||||
return steps.map((step) =>
|
||||
step.action === 'goto'
|
||||
? {
|
||||
...step,
|
||||
url: normalizeNavigationUrl(step.url),
|
||||
}
|
||||
: step
|
||||
);
|
||||
}
|
||||
|
||||
function escapeRegExp(value: string): string {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
function isLikelyLoginText(text: string): boolean {
|
||||
return /(login|accedi|sign\s*in|entra)/i.test(text);
|
||||
}
|
||||
|
||||
async function clickByText(page: Page, text: string): Promise<boolean> {
|
||||
const patterns = [new RegExp(`^${escapeRegExp(text)}$`, 'i'), new RegExp(escapeRegExp(text), 'i')];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
const targets = [
|
||||
page.getByRole('button', { name: pattern }).first(),
|
||||
page.getByRole('link', { name: pattern }).first(),
|
||||
page.getByText(pattern).first(),
|
||||
];
|
||||
|
||||
for (const target of targets) {
|
||||
if (await target.count()) {
|
||||
try {
|
||||
await target.click({ timeout: 8000 });
|
||||
return true;
|
||||
} catch {
|
||||
// keep trying next candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function fallbackLoginNavigation(page: Page, requestedText: string): Promise<boolean> {
|
||||
if (!isLikelyLoginText(requestedText)) return false;
|
||||
|
||||
const current = new URL(page.url());
|
||||
|
||||
const candidateLinks = await page.evaluate(() => {
|
||||
const loginTerms = ['login', 'accedi', 'sign in', 'entra'];
|
||||
const anchors = Array.from(document.querySelectorAll('a[href], a[onclick], button[onclick]')) as Array<HTMLAnchorElement | HTMLButtonElement>;
|
||||
|
||||
return anchors
|
||||
.map((el) => {
|
||||
const text = (el.textContent || '').trim().toLowerCase();
|
||||
const href = (el as HTMLAnchorElement).getAttribute('href') || '';
|
||||
return { text, href };
|
||||
})
|
||||
.filter((x) => x.text && loginTerms.some((t) => x.text.includes(t)))
|
||||
.map((x) => x.href)
|
||||
.filter(Boolean);
|
||||
});
|
||||
|
||||
// Prefer real URLs (not javascript:)
|
||||
const realCandidate = candidateLinks.find((h) => /login|account\/login/i.test(h) && !h.startsWith('javascript:'));
|
||||
if (realCandidate) {
|
||||
const target = new URL(realCandidate, page.url()).toString();
|
||||
await page.goto(target, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
return true;
|
||||
}
|
||||
|
||||
// Site-specific fallback for Corriere
|
||||
if (/corriere\.it$/i.test(current.hostname) || /\.corriere\.it$/i.test(current.hostname)) {
|
||||
await page.goto('https://www.corriere.it/account/login', {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 60000,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function typeInBestTarget(page: Page, text: string, selector?: string) {
|
||||
if (selector) {
|
||||
await page.locator(selector).first().click({ timeout: 10000 });
|
||||
await page.locator(selector).first().fill(text);
|
||||
return;
|
||||
}
|
||||
const loc = page.locator('input[name="q"], input[type="search"], input[type="text"], textarea').first();
|
||||
await loc.click({ timeout: 10000 });
|
||||
await loc.fill(text);
|
||||
}
|
||||
|
||||
async function pressOnTarget(page: Page, key: string, selector?: string) {
|
||||
if (selector) {
|
||||
await page.locator(selector).first().press(key);
|
||||
return;
|
||||
}
|
||||
await page.keyboard.press(key);
|
||||
}
|
||||
|
||||
async function runSteps(page: Page, steps: Step[]) {
|
||||
for (const step of steps) {
|
||||
switch (step.action) {
|
||||
case 'goto':
|
||||
await page.goto(normalizeNavigationUrl(step.url), {
|
||||
waitUntil: 'domcontentloaded',
|
||||
timeout: 60000,
|
||||
});
|
||||
break;
|
||||
case 'click':
|
||||
if (step.selector) {
|
||||
await page.locator(step.selector).first().click({ timeout: 15000 });
|
||||
} else if (step.role && step.name) {
|
||||
await page.getByRole(step.role as any, { name: new RegExp(escapeRegExp(step.name), 'i') }).first().click({ timeout: 15000 });
|
||||
} else if (step.text) {
|
||||
const clicked = await clickByText(page, step.text);
|
||||
if (!clicked) {
|
||||
const recovered = await fallbackLoginNavigation(page, step.text);
|
||||
if (!recovered) {
|
||||
throw new Error(`Could not click target text: ${step.text}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error('click step missing selector/text/role');
|
||||
}
|
||||
try {
|
||||
await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
|
||||
} catch {
|
||||
// no navigation is fine
|
||||
}
|
||||
break;
|
||||
case 'type':
|
||||
await typeInBestTarget(page, step.text, step.selector);
|
||||
break;
|
||||
case 'press':
|
||||
await pressOnTarget(page, step.key, step.selector);
|
||||
break;
|
||||
case 'wait':
|
||||
await page.waitForTimeout(step.ms);
|
||||
break;
|
||||
case 'screenshot':
|
||||
await page.screenshot({ path: step.path, fullPage: true });
|
||||
break;
|
||||
case 'extract': {
|
||||
const items = await page.locator(step.selector).allTextContents();
|
||||
const out = items.slice(0, step.count ?? items.length).map((t) => t.trim()).filter(Boolean);
|
||||
console.log(JSON.stringify(out, null, 2));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new Error('Unknown step');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['instruction', 'steps'],
|
||||
boolean: ['headless', 'help'],
|
||||
default: { headless: true },
|
||||
alias: { i: 'instruction', s: 'steps', h: 'help' },
|
||||
});
|
||||
|
||||
if (args.help || (!args.instruction && !args.steps)) {
|
||||
console.log(`
|
||||
General Web Flow Runner (CloakBrowser)
|
||||
|
||||
Usage:
|
||||
npx tsx flow.ts --instruction "go to https://example.com then type \"hello\" then press enter"
|
||||
npx tsx flow.ts --steps '[{"action":"goto","url":"https://example.com"}]'
|
||||
|
||||
Supported natural steps:
|
||||
- go to/open/navigate to <url>
|
||||
- click on "Text"
|
||||
- click <css-selector>
|
||||
- type "text"
|
||||
- type "text" in <css-selector>
|
||||
- press <key>
|
||||
- press <key> in <css-selector>
|
||||
- wait <N>s | wait <N>ms
|
||||
- screenshot <path>
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const steps = validateSteps(args.steps ? JSON.parse(args.steps) : parseInstruction(args.instruction));
|
||||
const browser = await launchBrowser({ headless: args.headless });
|
||||
const page = await browser.newPage();
|
||||
|
||||
try {
|
||||
await runSteps(page, steps);
|
||||
console.log('Flow complete. Final URL:', page.url());
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error('Error:', e instanceof Error ? e.message : e);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,27 +1,36 @@
|
||||
{
|
||||
"name": "web-automation-scripts",
|
||||
"version": "1.0.0",
|
||||
"description": "Web browsing and scraping scripts using Camoufox",
|
||||
"description": "Web browsing and scraping scripts using CloakBrowser",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"check-install": "node check-install.js",
|
||||
"extract": "node extract.js",
|
||||
"browse": "tsx browse.ts",
|
||||
"auth": "tsx auth.ts",
|
||||
"flow": "tsx flow.ts",
|
||||
"scrape": "tsx scrape.ts",
|
||||
"fetch-browser": "npx camoufox-js fetch"
|
||||
"typecheck": "tsc --noEmit -p tsconfig.json",
|
||||
"lint": "pnpm run typecheck && node --check check-install.js && node --check extract.js",
|
||||
"fetch-browser": "npx cloakbrowser install"
|
||||
},
|
||||
"dependencies": {
|
||||
"camoufox-js": "^0.8.5",
|
||||
"playwright-core": "^1.40.0",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"better-sqlite3": "^12.6.2",
|
||||
"cloakbrowser": "^0.3.22",
|
||||
"jsdom": "^24.0.0",
|
||||
"minimist": "^1.2.8"
|
||||
"minimist": "^1.2.8",
|
||||
"playwright-core": "^1.59.1",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.3.0",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/minimist": "^1.2.5",
|
||||
"tsx": "^4.7.0"
|
||||
}
|
||||
"@types/turndown": "^5.0.4",
|
||||
"esbuild": "0.27.0",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.0"
|
||||
},
|
||||
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
|
||||
}
|
||||
|
||||
690
skills/web-automation/claude-code/scripts/pnpm-lock.yaml
generated
690
skills/web-automation/claude-code/scripts/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"referenceRepo": "https://git.fiorinis.com/Home/stef-openclaw-skills",
|
||||
"referenceCommit": "b9878e938c1055e0284876aeb65157286d95f9d1",
|
||||
"importedFiles": [
|
||||
"auth.ts",
|
||||
"browse.ts",
|
||||
"check-install.js",
|
||||
"extract.js",
|
||||
"flow.ts",
|
||||
"package.json",
|
||||
"scan-local-app.ts",
|
||||
"scrape.ts",
|
||||
"test-full.ts",
|
||||
"test-minimal.ts",
|
||||
"test-profile.ts"
|
||||
],
|
||||
"excludedReferencePatterns": [
|
||||
"*discover.js",
|
||||
"*photos.js",
|
||||
"*identifiers.js",
|
||||
"*.test.mjs",
|
||||
"domain-specific helper scripts"
|
||||
]
|
||||
}
|
||||
174
skills/web-automation/claude-code/scripts/scan-local-app.ts
Normal file
174
skills/web-automation/claude-code/scripts/scan-local-app.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
import { mkdirSync, writeFileSync } from 'fs';
|
||||
import { dirname, resolve } from 'path';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
type NavResult = {
|
||||
requestedUrl: string;
|
||||
url: string;
|
||||
status: number | null;
|
||||
title: string;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type RouteCheck = {
|
||||
route: string;
|
||||
result: NavResult;
|
||||
heading: string | null;
|
||||
};
|
||||
|
||||
const DEFAULT_BASE_URL = 'http://localhost:3000';
|
||||
const DEFAULT_REPORT_PATH = resolve(process.cwd(), 'scan-local-app.md');
|
||||
|
||||
function env(name: string): string | undefined {
|
||||
const value = process.env[name]?.trim();
|
||||
return value ? value : undefined;
|
||||
}
|
||||
|
||||
function getRoutes(baseUrl: string): string[] {
|
||||
const routeList = env('SCAN_ROUTES');
|
||||
if (routeList) {
|
||||
return routeList
|
||||
.split(',')
|
||||
.map((route) => route.trim())
|
||||
.filter(Boolean)
|
||||
.map((route) => new URL(route, baseUrl).toString());
|
||||
}
|
||||
|
||||
return [baseUrl];
|
||||
}
|
||||
|
||||
async function gotoWithStatus(page: any, url: string): Promise<NavResult> {
|
||||
const response = await page
|
||||
.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 })
|
||||
.catch((error: unknown) => ({ error }));
|
||||
|
||||
if (response?.error) {
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: null,
|
||||
title: await page.title().catch(() => ''),
|
||||
error: String(response.error),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: response ? response.status() : null,
|
||||
title: await page.title().catch(() => ''),
|
||||
};
|
||||
}
|
||||
|
||||
async function textOrNull(page: any, selector: string): Promise<string | null> {
|
||||
const locator = page.locator(selector).first();
|
||||
try {
|
||||
if ((await locator.count()) === 0) return null;
|
||||
const value = await locator.textContent();
|
||||
return value ? value.trim().replace(/\s+/g, ' ') : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function loginIfConfigured(page: any, baseUrl: string, lines: string[]) {
|
||||
const loginPath = env('SCAN_LOGIN_PATH');
|
||||
const username = env('SCAN_USERNAME') ?? env('CLOAKBROWSER_USERNAME');
|
||||
const password = env('SCAN_PASSWORD') ?? env('CLOAKBROWSER_PASSWORD');
|
||||
const usernameSelector = env('SCAN_USERNAME_SELECTOR') ?? 'input[type="email"], input[name="email"]';
|
||||
const passwordSelector = env('SCAN_PASSWORD_SELECTOR') ?? 'input[type="password"], input[name="password"]';
|
||||
const submitSelector = env('SCAN_SUBMIT_SELECTOR') ?? 'button[type="submit"], input[type="submit"]';
|
||||
|
||||
if (!loginPath) {
|
||||
lines.push('## Login');
|
||||
lines.push('- Skipped: set `SCAN_LOGIN_PATH` to enable login smoke checks.');
|
||||
lines.push('');
|
||||
return;
|
||||
}
|
||||
|
||||
const loginUrl = new URL(loginPath, baseUrl).toString();
|
||||
lines.push('## Login');
|
||||
lines.push(`- Login URL: ${loginUrl}`);
|
||||
await gotoWithStatus(page, loginUrl);
|
||||
|
||||
if (!username || !password) {
|
||||
lines.push('- Skipped: set `SCAN_USERNAME`/`SCAN_PASSWORD` or `CLOAKBROWSER_USERNAME`/`CLOAKBROWSER_PASSWORD`.');
|
||||
lines.push('');
|
||||
return;
|
||||
}
|
||||
|
||||
await page.locator(usernameSelector).first().fill(username);
|
||||
await page.locator(passwordSelector).first().fill(password);
|
||||
await page.locator(submitSelector).first().click();
|
||||
await page.waitForTimeout(2500);
|
||||
|
||||
lines.push(`- After submit URL: ${page.url()}`);
|
||||
lines.push(`- Cookie count: ${(await page.context().cookies()).length}`);
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
async function checkRoutes(page: any, baseUrl: string, lines: string[]) {
|
||||
const routes = getRoutes(baseUrl);
|
||||
const routeChecks: RouteCheck[] = [];
|
||||
|
||||
for (const url of routes) {
|
||||
const result = await gotoWithStatus(page, url);
|
||||
const heading = await textOrNull(page, 'h1');
|
||||
routeChecks.push({
|
||||
route: url,
|
||||
result,
|
||||
heading,
|
||||
});
|
||||
}
|
||||
|
||||
lines.push('## Route Checks');
|
||||
for (const check of routeChecks) {
|
||||
const relativeUrl = check.route.startsWith(baseUrl) ? check.route.slice(baseUrl.length) || '/' : check.route;
|
||||
const finalPath = check.result.url.startsWith(baseUrl)
|
||||
? check.result.url.slice(baseUrl.length) || '/'
|
||||
: check.result.url;
|
||||
const suffix = check.heading ? `, h1="${check.heading}"` : '';
|
||||
const errorSuffix = check.result.error ? `, error="${check.result.error}"` : '';
|
||||
lines.push(
|
||||
`- ${relativeUrl} → status ${check.result.status ?? 'ERR'} (final ${finalPath})${suffix}${errorSuffix}`
|
||||
);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const baseUrl = env('SCAN_BASE_URL') ?? DEFAULT_BASE_URL;
|
||||
const reportPath = resolve(env('SCAN_REPORT_PATH') ?? DEFAULT_REPORT_PATH);
|
||||
const headless = (env('SCAN_HEADLESS') ?? env('CLOAKBROWSER_HEADLESS') ?? 'true') === 'true';
|
||||
const { page, browser } = await getPage({ headless });
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push('# Web Automation Scan (local)');
|
||||
lines.push('');
|
||||
lines.push(`- Base URL: ${baseUrl}`);
|
||||
lines.push(`- Timestamp: ${new Date().toISOString()}`);
|
||||
lines.push(`- Headless: ${headless}`);
|
||||
lines.push(`- Report Path: ${reportPath}`);
|
||||
lines.push('');
|
||||
|
||||
try {
|
||||
await loginIfConfigured(page, baseUrl, lines);
|
||||
await checkRoutes(page, baseUrl, lines);
|
||||
lines.push('## Notes');
|
||||
lines.push('- This generic smoke helper records route availability and top-level headings for a local app.');
|
||||
lines.push('- Configure login and route coverage with `SCAN_*` environment variables.');
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
mkdirSync(dirname(reportPath), { recursive: true });
|
||||
writeFileSync(reportPath, `${lines.join('\n')}\n`, 'utf-8');
|
||||
console.log(`Report written to ${reportPath}`);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
@@ -1,28 +1,25 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { launchPersistentContext } from 'cloakbrowser';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
const profilePath = join(homedir(), '.cloakbrowser-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with full options...');
|
||||
console.log('Launching CloakBrowser with full options...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
const browser = await launchPersistentContext({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
// humanize: 1.5, // Test without this first
|
||||
// geoip: true, // Test without this first
|
||||
// enable_cache: true,
|
||||
// block_webrtc: false,
|
||||
userDataDir: profilePath,
|
||||
humanize: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
const page = browser.pages()[0] || await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://github.com', { timeout: 30000 });
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { launch } from 'cloakbrowser';
|
||||
|
||||
async function test() {
|
||||
console.log('Launching Camoufox with minimal config...');
|
||||
console.log('Launching CloakBrowser with minimal config...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
const browser = await launch({
|
||||
headless: true,
|
||||
humanize: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
|
||||
@@ -1,24 +1,25 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { launchPersistentContext } from 'cloakbrowser';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
const profilePath = join(homedir(), '.cloakbrowser-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with user_data_dir...');
|
||||
console.log('Launching with persistent userDataDir...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
const browser = await launchPersistentContext({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
userDataDir: profilePath,
|
||||
humanize: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
const page = browser.pages()[0] || await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
|
||||
Reference in New Issue
Block a user