Add web-automation skill variants and inline prerequisite checks
This commit is contained in:
29
README.md
29
README.md
@@ -7,8 +7,7 @@ This repo is organized similarly to `obra/superpowers` and is designed to scale
|
||||
## Goals
|
||||
|
||||
- Keep skills portable across coding agents.
|
||||
- Keep each skill self-contained (`skills/<skill-name>/SKILL.md` + optional resources).
|
||||
- Enforce requirement checks for skills that depend on Superpower skills.
|
||||
- Keep each skill self-contained (`skills/<skill-name>/<agent-variant>/SKILL.md` + resources).
|
||||
|
||||
## Repository Layout
|
||||
|
||||
@@ -16,12 +15,15 @@ This repo is organized similarly to `obra/superpowers` and is designed to scale
|
||||
ai-coding-skills/
|
||||
├── README.md
|
||||
├── docs/
|
||||
│ ├── repository-structure.md
|
||||
│ └── requirements-gate.md
|
||||
│ └── install.md
|
||||
├── skills/
|
||||
│ ├── _template/
|
||||
│ │ └── SKILL.md
|
||||
│ └── create-plan/
|
||||
│ ├── create-plan/
|
||||
│ │ ├── codex/
|
||||
│ │ ├── claude-code/
|
||||
│ │ └── opencode/
|
||||
│ └── web-automation/
|
||||
│ ├── codex/
|
||||
│ ├── claude-code/
|
||||
│ └── opencode/
|
||||
@@ -41,21 +43,12 @@ ai-coding-skills/
|
||||
| create-plan | codex | Structured planning with milestones + runbook-first execution workflow | Ready |
|
||||
| create-plan | claude-code | Structured planning with milestones + runbook-first execution workflow | Ready |
|
||||
| create-plan | opencode | Structured planning with milestones + runbook-first execution workflow | Ready |
|
||||
| web-automation | codex | Playwright + Camoufox browsing/scraping/auth automation | Ready |
|
||||
| web-automation | claude-code | Playwright + Camoufox browsing/scraping/auth automation | Ready |
|
||||
| web-automation | opencode | Playwright + Camoufox browsing/scraping/auth automation | Ready |
|
||||
|
||||
See install instructions: `docs/install.md`
|
||||
|
||||
## Requirement Gate (Superpowers dependency)
|
||||
|
||||
If a skill depends on Superpower skills (`https://github.com/obra/superpowers`), it must include an explicit prerequisite block in `SKILL.md` and fail fast with clear instructions when requirements are missing.
|
||||
|
||||
See: `docs/requirements-gate.md`
|
||||
|
||||
## Compatibility Policy
|
||||
|
||||
Each skill added to this repo should specify support status for:
|
||||
|
||||
- Codex
|
||||
- Claude Code
|
||||
- OpenCode
|
||||
|
||||
Use the template at `skills/_template/SKILL.md`.
|
||||
Each skill should explicitly document agent compatibility and any prerequisites directly in its own `SKILL.md`.
|
||||
|
||||
@@ -35,11 +35,50 @@ mkdir -p ~/.opencode/skills/create-plan
|
||||
cp -R skills/create-plan/opencode/* ~/.opencode/skills/create-plan/
|
||||
```
|
||||
|
||||
## 3) Verify
|
||||
## 3) Install `web-automation`
|
||||
|
||||
### Codex
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.codex/skills/web-automation
|
||||
cp -R skills/web-automation/codex/* ~/.codex/skills/web-automation/
|
||||
cd ~/.codex/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
### Claude Code
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.claude/skills/web-automation
|
||||
cp -R skills/web-automation/claude-code/* ~/.claude/skills/web-automation/
|
||||
cd ~/.claude/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
### OpenCode
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.opencode/skills/web-automation
|
||||
cp -R skills/web-automation/opencode/* ~/.opencode/skills/web-automation/
|
||||
cd ~/.opencode/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
Requirements for `web-automation`:
|
||||
- Node.js 20+
|
||||
- pnpm
|
||||
- Browser download/network access for Camoufox
|
||||
|
||||
## 4) Verify
|
||||
|
||||
- Ensure `SKILL.md` exists in the destination folder.
|
||||
- Ensure `templates/` files are present.
|
||||
- Start a new agent session and ask: "create a plan for ..." to verify trigger behavior.
|
||||
- Ensure `templates/` files (create-plan) or `scripts/` files (web-automation) are present.
|
||||
- Start a new agent session and ask:
|
||||
- "create a plan for ..." (create-plan)
|
||||
- "scrape this page ..." (web-automation)
|
||||
|
||||
## Notes
|
||||
|
||||
|
||||
@@ -7,6 +7,19 @@ description: Use when starting a new feature, project, or complex task that need
|
||||
|
||||
Create and maintain a local plan folder under `docs/plans/` (or `docs/plan/` if that repository convention is already in use).
|
||||
|
||||
## Prerequisite Check (MANDATORY)
|
||||
|
||||
This skill depends on Superpowers planning skills. Before proceeding, verify required dependencies exist.
|
||||
|
||||
Required:
|
||||
- Superpowers repo: `https://github.com/obra/superpowers`
|
||||
- `brainstorming` skill
|
||||
- `writing-plans` skill
|
||||
|
||||
If any dependency is missing, stop immediately and return:
|
||||
|
||||
"Missing dependency: Superpowers planning skills are required (`brainstorming`, `writing-plans`). Install from https://github.com/obra/superpowers, then retry."
|
||||
|
||||
## Process
|
||||
|
||||
### Phase 1: Analyze
|
||||
|
||||
@@ -7,6 +7,19 @@ description: Use when starting a new feature, project, or complex task that need
|
||||
|
||||
Create and maintain a local plan folder under `docs/plans/` (or `docs/plan/` if that repository convention is already in use).
|
||||
|
||||
## Prerequisite Check (MANDATORY)
|
||||
|
||||
This skill depends on Superpowers planning skills. Before proceeding, verify required dependencies exist.
|
||||
|
||||
Required:
|
||||
- Superpowers repo: `https://github.com/obra/superpowers`
|
||||
- `brainstorming` skill
|
||||
- `writing-plans` skill
|
||||
|
||||
If any dependency is missing, stop immediately and return:
|
||||
|
||||
"Missing dependency: Superpowers planning skills are required (`brainstorming`, `writing-plans`). Install from https://github.com/obra/superpowers, then retry."
|
||||
|
||||
## Process
|
||||
|
||||
### Phase 1: Bootstrap
|
||||
|
||||
@@ -7,6 +7,19 @@ description: Use when starting a new feature, project, or complex task that need
|
||||
|
||||
Create and maintain a local plan folder under `docs/plans/` (or `docs/plan/` if that repository convention is already in use).
|
||||
|
||||
## Prerequisite Check (MANDATORY)
|
||||
|
||||
This skill depends on Superpowers planning skills. Before proceeding, verify required dependencies exist.
|
||||
|
||||
Required:
|
||||
- Superpowers repo: `https://github.com/obra/superpowers`
|
||||
- `brainstorming` skill
|
||||
- `writing-plans` skill
|
||||
|
||||
If any dependency is missing, stop immediately and return:
|
||||
|
||||
"Missing dependency: Superpowers planning skills are required (`brainstorming`, `writing-plans`). Install from https://github.com/obra/superpowers, then retry."
|
||||
|
||||
## Process
|
||||
|
||||
### Phase 1: Analyze
|
||||
|
||||
36
skills/web-automation/claude-code/SKILL.md
Normal file
36
skills/web-automation/claude-code/SKILL.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: web-automation
|
||||
description: Browse and scrape web pages using Playwright with Camoufox anti-detection browser. Use when automating web workflows, extracting page content to markdown, handling authenticated sessions, or scraping websites with bot protection.
|
||||
---
|
||||
|
||||
# Web Automation with Camoufox (Claude Code)
|
||||
|
||||
Automated web browsing and scraping using Playwright with Camoufox anti-detection browser.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Node.js 20+
|
||||
- pnpm
|
||||
- Network access to download browser binaries
|
||||
|
||||
## First-Time Setup
|
||||
|
||||
```bash
|
||||
cd ~/.claude/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
If native dependency build errors appear (e.g., better-sqlite3), rebuild from the reported package directory.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
- Browse page: `npx tsx browse.ts --url "https://example.com"`
|
||||
- Scrape markdown: `npx tsx scrape.ts --url "https://example.com" --mode main --output page.md`
|
||||
- Authenticate: `npx tsx auth.ts --url "https://example.com/login"`
|
||||
|
||||
## Notes
|
||||
|
||||
- Sessions persist in Camoufox profile storage.
|
||||
- Use `--wait` for dynamic pages.
|
||||
- Use `--mode selector --selector "..."` for targeted extraction.
|
||||
575
skills/web-automation/claude-code/scripts/auth.ts
Normal file
575
skills/web-automation/claude-code/scripts/auth.ts
Normal file
@@ -0,0 +1,575 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Authentication handler for web automation
|
||||
* Supports generic form login and Microsoft SSO (MSAL)
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx auth.ts --url "https://example.com/login" --type form
|
||||
* npx tsx auth.ts --url "https://example.com" --type msal
|
||||
* npx tsx auth.ts --url "https://example.com" --type auto
|
||||
*/
|
||||
|
||||
import { getPage, launchBrowser } from './browse.js';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
import { createInterface } from 'readline';
|
||||
|
||||
// Types
|
||||
type AuthType = 'auto' | 'form' | 'msal';
|
||||
|
||||
interface AuthOptions {
|
||||
url: string;
|
||||
authType: AuthType;
|
||||
credentials?: {
|
||||
username: string;
|
||||
password: string;
|
||||
};
|
||||
headless?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
interface AuthResult {
|
||||
success: boolean;
|
||||
finalUrl: string;
|
||||
authType: AuthType;
|
||||
message: string;
|
||||
}
|
||||
|
||||
// Get credentials from environment or options
|
||||
function getCredentials(options?: {
|
||||
username?: string;
|
||||
password?: string;
|
||||
}): { username: string; password: string } | null {
|
||||
const username = options?.username || process.env.CAMOUFOX_USERNAME;
|
||||
const password = options?.password || process.env.CAMOUFOX_PASSWORD;
|
||||
|
||||
if (!username || !password) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { username, password };
|
||||
}
|
||||
|
||||
// Prompt user for input (for MFA or credentials)
|
||||
async function promptUser(question: string, hidden = false): Promise<string> {
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
return new Promise((resolve) => {
|
||||
if (hidden) {
|
||||
process.stdout.write(question);
|
||||
// Note: This is a simple implementation. For production, use a proper hidden input library
|
||||
}
|
||||
rl.question(question, (answer) => {
|
||||
rl.close();
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Detect authentication type from page
|
||||
async function detectAuthType(page: Page): Promise<AuthType> {
|
||||
const url = page.url();
|
||||
|
||||
// Check for Microsoft login
|
||||
if (
|
||||
url.includes('login.microsoftonline.com') ||
|
||||
url.includes('login.live.com') ||
|
||||
url.includes('login.windows.net')
|
||||
) {
|
||||
return 'msal';
|
||||
}
|
||||
|
||||
// Check for common form login patterns
|
||||
const hasLoginForm = await page.evaluate(() => {
|
||||
const passwordField = document.querySelector(
|
||||
'input[type="password"], input[name*="password"], input[id*="password"]'
|
||||
);
|
||||
const usernameField = document.querySelector(
|
||||
'input[type="email"], input[type="text"][name*="user"], input[type="text"][name*="email"], input[id*="user"], input[id*="email"]'
|
||||
);
|
||||
return !!(passwordField && usernameField);
|
||||
});
|
||||
|
||||
if (hasLoginForm) {
|
||||
return 'form';
|
||||
}
|
||||
|
||||
return 'auto';
|
||||
}
|
||||
|
||||
// Handle generic form login
|
||||
async function handleFormLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting form login...');
|
||||
|
||||
// Find and fill username/email field
|
||||
const usernameSelectors = [
|
||||
'input[type="email"]',
|
||||
'input[name*="user" i]',
|
||||
'input[name*="email" i]',
|
||||
'input[id*="user" i]',
|
||||
'input[id*="email" i]',
|
||||
'input[autocomplete="username"]',
|
||||
'input[type="text"]:first-of-type',
|
||||
];
|
||||
|
||||
let usernameField = null;
|
||||
for (const selector of usernameSelectors) {
|
||||
usernameField = await page.$(selector);
|
||||
if (usernameField) break;
|
||||
}
|
||||
|
||||
if (!usernameField) {
|
||||
console.error('Could not find username/email field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await usernameField.fill(credentials.username);
|
||||
console.log('Filled username field');
|
||||
|
||||
// Find and fill password field
|
||||
const passwordSelectors = [
|
||||
'input[type="password"]',
|
||||
'input[name*="password" i]',
|
||||
'input[id*="password" i]',
|
||||
'input[autocomplete="current-password"]',
|
||||
];
|
||||
|
||||
let passwordField = null;
|
||||
for (const selector of passwordSelectors) {
|
||||
passwordField = await page.$(selector);
|
||||
if (passwordField) break;
|
||||
}
|
||||
|
||||
if (!passwordField) {
|
||||
console.error('Could not find password field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordField.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Check for "Remember me" checkbox and check it
|
||||
const rememberCheckbox = await page.$(
|
||||
'input[type="checkbox"][name*="remember" i], input[type="checkbox"][id*="remember" i]'
|
||||
);
|
||||
if (rememberCheckbox) {
|
||||
await rememberCheckbox.check();
|
||||
console.log('Checked "Remember me" checkbox');
|
||||
}
|
||||
|
||||
// Find and click submit button
|
||||
const submitSelectors = [
|
||||
'button[type="submit"]',
|
||||
'input[type="submit"]',
|
||||
'button:has-text("Sign in")',
|
||||
'button:has-text("Log in")',
|
||||
'button:has-text("Login")',
|
||||
'button:has-text("Submit")',
|
||||
'[role="button"]:has-text("Sign in")',
|
||||
];
|
||||
|
||||
let submitButton = null;
|
||||
for (const selector of submitSelectors) {
|
||||
submitButton = await page.$(selector);
|
||||
if (submitButton) break;
|
||||
}
|
||||
|
||||
if (!submitButton) {
|
||||
// Try pressing Enter as fallback
|
||||
await passwordField.press('Enter');
|
||||
} else {
|
||||
await submitButton.click();
|
||||
}
|
||||
|
||||
console.log('Submitted login form');
|
||||
|
||||
// Wait for navigation or error
|
||||
try {
|
||||
await page.waitForNavigation({ timeout, waitUntil: 'domcontentloaded' });
|
||||
return true;
|
||||
} catch {
|
||||
// Check if we're still on login page with error
|
||||
const errorMessages = await page.$$eval(
|
||||
'.error, .alert-danger, [role="alert"], .login-error',
|
||||
(els) => els.map((el) => el.textContent?.trim()).filter(Boolean)
|
||||
);
|
||||
|
||||
if (errorMessages.length > 0) {
|
||||
console.error('Login error:', errorMessages.join(', '));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true; // Might have succeeded without navigation
|
||||
}
|
||||
}
|
||||
|
||||
// Handle Microsoft SSO login
|
||||
async function handleMsalLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting Microsoft SSO login...');
|
||||
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If not already on Microsoft login, wait for redirect
|
||||
if (!currentUrl.includes('login.microsoftonline.com')) {
|
||||
try {
|
||||
await page.waitForURL('**/login.microsoftonline.com/**', { timeout: 10000 });
|
||||
} catch {
|
||||
console.log('Not redirected to Microsoft login');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for email input
|
||||
const emailInput = await page.waitForSelector(
|
||||
'input[type="email"], input[name="loginfmt"]',
|
||||
{ timeout }
|
||||
);
|
||||
|
||||
if (!emailInput) {
|
||||
console.error('Could not find email input on Microsoft login');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill email and submit
|
||||
await emailInput.fill(credentials.username);
|
||||
console.log('Filled email field');
|
||||
|
||||
const nextButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (nextButton) {
|
||||
await nextButton.click();
|
||||
} else {
|
||||
await emailInput.press('Enter');
|
||||
}
|
||||
|
||||
// Wait for password page
|
||||
try {
|
||||
await page.waitForSelector(
|
||||
'input[type="password"], input[name="passwd"]',
|
||||
{ timeout }
|
||||
);
|
||||
} catch {
|
||||
// Might be using passwordless auth or different flow
|
||||
console.log('Password field not found - might be using different auth flow');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill password
|
||||
const passwordInput = await page.$('input[type="password"], input[name="passwd"]');
|
||||
if (!passwordInput) {
|
||||
console.error('Could not find password input');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordInput.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Submit
|
||||
const signInButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (signInButton) {
|
||||
await signInButton.click();
|
||||
} else {
|
||||
await passwordInput.press('Enter');
|
||||
}
|
||||
|
||||
// Handle "Stay signed in?" prompt
|
||||
try {
|
||||
const staySignedInButton = await page.waitForSelector(
|
||||
'input[value="Yes"], button:has-text("Yes")',
|
||||
{ timeout: 5000 }
|
||||
);
|
||||
if (staySignedInButton) {
|
||||
await staySignedInButton.click();
|
||||
console.log('Clicked "Stay signed in" button');
|
||||
}
|
||||
} catch {
|
||||
// Prompt might not appear
|
||||
}
|
||||
|
||||
// Check for Conditional Access Policy error
|
||||
const caError = await page.$('text=Conditional Access policy');
|
||||
if (caError) {
|
||||
console.error('Blocked by Conditional Access Policy');
|
||||
// Take screenshot for debugging
|
||||
await page.screenshot({ path: 'ca-policy-error.png' });
|
||||
console.log('Screenshot saved: ca-policy-error.png');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wait for redirect away from Microsoft login
|
||||
try {
|
||||
await page.waitForURL(
|
||||
(url) => !url.href.includes('login.microsoftonline.com'),
|
||||
{ timeout }
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if user is already authenticated
|
||||
async function isAuthenticated(page: Page, targetUrl: string): Promise<boolean> {
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If we're on the target URL (not a login page), we're likely authenticated
|
||||
if (currentUrl.startsWith(targetUrl)) {
|
||||
// Check for common login page indicators
|
||||
const isLoginPage = await page.evaluate(() => {
|
||||
const loginIndicators = [
|
||||
'input[type="password"]',
|
||||
'form[action*="login"]',
|
||||
'form[action*="signin"]',
|
||||
'.login-form',
|
||||
'#login',
|
||||
];
|
||||
return loginIndicators.some((sel) => document.querySelector(sel) !== null);
|
||||
});
|
||||
|
||||
return !isLoginPage;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Main authentication function
|
||||
export async function authenticate(options: AuthOptions): Promise<AuthResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless ?? true });
|
||||
const page = await browser.newPage();
|
||||
const timeout = options.timeout ?? 30000;
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if already authenticated
|
||||
if (await isAuthenticated(page, options.url)) {
|
||||
return {
|
||||
success: true,
|
||||
finalUrl: page.url(),
|
||||
authType: 'auto',
|
||||
message: 'Already authenticated (session persisted from profile)',
|
||||
};
|
||||
}
|
||||
|
||||
// Get credentials
|
||||
const credentials = options.credentials
|
||||
? options.credentials
|
||||
: getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
// No credentials - open interactive browser
|
||||
console.log('\nNo credentials provided. Opening browser for manual login...');
|
||||
console.log('Please complete the login process manually.');
|
||||
console.log('The session will be saved to your profile.');
|
||||
|
||||
// Switch to headed mode for manual login
|
||||
await browser.close();
|
||||
const interactiveBrowser = await launchBrowser({ headless: false });
|
||||
const interactivePage = await interactiveBrowser.newPage();
|
||||
await interactivePage.goto(options.url);
|
||||
|
||||
await promptUser('\nPress Enter when you have completed login...');
|
||||
|
||||
const finalUrl = interactivePage.url();
|
||||
await interactiveBrowser.close();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
finalUrl,
|
||||
authType: 'auto',
|
||||
message: 'Manual login completed - session saved to profile',
|
||||
};
|
||||
}
|
||||
|
||||
// Detect auth type if auto
|
||||
let authType = options.authType;
|
||||
if (authType === 'auto') {
|
||||
authType = await detectAuthType(page);
|
||||
console.log(`Detected auth type: ${authType}`);
|
||||
}
|
||||
|
||||
// Handle authentication based on type
|
||||
let success = false;
|
||||
switch (authType) {
|
||||
case 'msal':
|
||||
success = await handleMsalLogin(page, credentials, timeout);
|
||||
break;
|
||||
case 'form':
|
||||
default:
|
||||
success = await handleFormLogin(page, credentials, timeout);
|
||||
break;
|
||||
}
|
||||
|
||||
const finalUrl = page.url();
|
||||
|
||||
return {
|
||||
success,
|
||||
finalUrl,
|
||||
authType,
|
||||
message: success
|
||||
? `Authentication successful - session saved to profile`
|
||||
: 'Authentication failed',
|
||||
};
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Navigate to authenticated page (handles auth if needed)
|
||||
export async function navigateAuthenticated(
|
||||
url: string,
|
||||
options?: {
|
||||
credentials?: { username: string; password: string };
|
||||
headless?: boolean;
|
||||
}
|
||||
): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const { page, browser } = await getPage({ headless: options?.headless ?? true });
|
||||
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if we need to authenticate
|
||||
if (!(await isAuthenticated(page, url))) {
|
||||
console.log('Session expired or not authenticated. Attempting login...');
|
||||
|
||||
// Get credentials
|
||||
const credentials = options?.credentials ?? getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
throw new Error(
|
||||
'Authentication required but no credentials provided. ' +
|
||||
'Set CAMOUFOX_USERNAME and CAMOUFOX_PASSWORD environment variables.'
|
||||
);
|
||||
}
|
||||
|
||||
// Detect and handle auth
|
||||
const authType = await detectAuthType(page);
|
||||
|
||||
let success = false;
|
||||
if (authType === 'msal') {
|
||||
success = await handleMsalLogin(page, credentials, 30000);
|
||||
} else {
|
||||
success = await handleFormLogin(page, credentials, 30000);
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
await browser.close();
|
||||
throw new Error('Authentication failed');
|
||||
}
|
||||
|
||||
// Navigate back to original URL if we were redirected
|
||||
if (!page.url().startsWith(url)) {
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
}
|
||||
}
|
||||
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'type', 'username', 'password'],
|
||||
boolean: ['headless', 'help'],
|
||||
default: {
|
||||
type: 'auto',
|
||||
headless: false, // Default to headed for auth so user can see/interact
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
t: 'type',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Authentication Handler
|
||||
|
||||
Usage:
|
||||
npx tsx auth.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to authenticate (required)
|
||||
-t, --type <type> Auth type: auto, form, or msal (default: auto)
|
||||
--username <user> Username/email (or set CAMOUFOX_USERNAME env var)
|
||||
--password <pass> Password (or set CAMOUFOX_PASSWORD env var)
|
||||
--headless <bool> Run in headless mode (default: false for auth)
|
||||
-h, --help Show this help message
|
||||
|
||||
Auth Types:
|
||||
auto Auto-detect authentication type
|
||||
form Generic username/password form
|
||||
msal Microsoft SSO (login.microsoftonline.com)
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_USERNAME Default username/email for authentication
|
||||
CAMOUFOX_PASSWORD Default password for authentication
|
||||
|
||||
Examples:
|
||||
# Interactive login (no credentials, opens browser)
|
||||
npx tsx auth.ts --url "https://example.com/login"
|
||||
|
||||
# Form login with credentials
|
||||
npx tsx auth.ts --url "https://example.com/login" --type form \\
|
||||
--username "user@example.com" --password "secret"
|
||||
|
||||
# Microsoft SSO login
|
||||
CAMOUFOX_USERNAME=user@company.com CAMOUFOX_PASSWORD=secret \\
|
||||
npx tsx auth.ts --url "https://internal.company.com" --type msal
|
||||
|
||||
Notes:
|
||||
- Session is saved to ~/.camoufox-profile/ for persistence
|
||||
- After successful auth, subsequent browses will be authenticated
|
||||
- Use --headless false if you need to handle MFA manually
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const authType = args.type as AuthType;
|
||||
if (!['auto', 'form', 'msal'].includes(authType)) {
|
||||
console.error(`Invalid auth type: ${authType}. Must be auto, form, or msal.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await authenticate({
|
||||
url: args.url,
|
||||
authType,
|
||||
credentials:
|
||||
args.username && args.password
|
||||
? { username: args.username, password: args.password }
|
||||
: undefined,
|
||||
headless: args.headless,
|
||||
});
|
||||
|
||||
console.log(`\nAuthentication result:`);
|
||||
console.log(` Success: ${result.success}`);
|
||||
console.log(` Auth type: ${result.authType}`);
|
||||
console.log(` Final URL: ${result.finalUrl}`);
|
||||
console.log(` Message: ${result.message}`);
|
||||
|
||||
process.exit(result.success ? 0 : 1);
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('auth.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
195
skills/web-automation/claude-code/scripts/browse.ts
Normal file
195
skills/web-automation/claude-code/scripts/browse.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Browser launcher using Camoufox with persistent profile
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx browse.ts --url "https://example.com"
|
||||
* npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
* npx tsx browse.ts --url "https://example.com" --headless false --wait 5000
|
||||
*/
|
||||
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { existsSync, mkdirSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
|
||||
// Types
|
||||
interface BrowseOptions {
|
||||
url: string;
|
||||
headless?: boolean;
|
||||
screenshot?: boolean;
|
||||
output?: string;
|
||||
wait?: number;
|
||||
timeout?: number;
|
||||
interactive?: boolean;
|
||||
}
|
||||
|
||||
interface BrowseResult {
|
||||
title: string;
|
||||
url: string;
|
||||
screenshotPath?: string;
|
||||
}
|
||||
|
||||
// Get profile directory
|
||||
const getProfilePath = (): string => {
|
||||
const customPath = process.env.CAMOUFOX_PROFILE_PATH;
|
||||
if (customPath) return customPath;
|
||||
|
||||
const profileDir = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profileDir)) {
|
||||
mkdirSync(profileDir, { recursive: true });
|
||||
}
|
||||
return profileDir;
|
||||
};
|
||||
|
||||
// Launch browser with persistent profile
|
||||
export async function launchBrowser(options: {
|
||||
headless?: boolean;
|
||||
}): Promise<BrowserContext> {
|
||||
const profilePath = getProfilePath();
|
||||
const headless =
|
||||
options.headless ??
|
||||
(process.env.CAMOUFOX_HEADLESS ? process.env.CAMOUFOX_HEADLESS === 'true' : true);
|
||||
|
||||
console.log(`Using profile: ${profilePath}`);
|
||||
console.log(`Headless mode: ${headless}`);
|
||||
|
||||
const browser = await Camoufox({
|
||||
user_data_dir: profilePath,
|
||||
headless,
|
||||
});
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
// Browse to URL and optionally take screenshot
|
||||
export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless });
|
||||
const page = await browser.newPage();
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: options.timeout ?? 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const result: BrowseResult = {
|
||||
title: await page.title(),
|
||||
url: page.url(),
|
||||
};
|
||||
|
||||
console.log(`Page title: ${result.title}`);
|
||||
console.log(`Final URL: ${result.url}`);
|
||||
|
||||
// Take screenshot if requested
|
||||
if (options.screenshot) {
|
||||
const outputPath = options.output ?? 'screenshot.png';
|
||||
await page.screenshot({ path: outputPath, fullPage: true });
|
||||
result.screenshotPath = outputPath;
|
||||
console.log(`Screenshot saved: ${outputPath}`);
|
||||
}
|
||||
|
||||
// If interactive mode, keep browser open
|
||||
if (options.interactive) {
|
||||
console.log('\nInteractive mode - browser will stay open.');
|
||||
console.log('Press Ctrl+C to close.');
|
||||
await new Promise(() => {}); // Keep running
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
if (!options.interactive) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export page for use in other scripts
|
||||
export async function getPage(options?: {
|
||||
headless?: boolean;
|
||||
}): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const browser = await launchBrowser({ headless: options?.headless });
|
||||
const page = await browser.newPage();
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'output'],
|
||||
boolean: ['screenshot', 'headless', 'interactive', 'help'],
|
||||
default: {
|
||||
headless: true,
|
||||
screenshot: false,
|
||||
interactive: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
o: 'output',
|
||||
s: 'screenshot',
|
||||
h: 'help',
|
||||
i: 'interactive',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Browser with Camoufox
|
||||
|
||||
Usage:
|
||||
npx tsx browse.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to navigate to (required)
|
||||
-s, --screenshot Take a screenshot of the page
|
||||
-o, --output <path> Output path for screenshot (default: screenshot.png)
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time after page load in milliseconds
|
||||
--timeout <ms> Navigation timeout (default: 60000)
|
||||
-i, --interactive Keep browser open for manual interaction
|
||||
-h, --help Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx browse.ts --url "https://example.com"
|
||||
npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
npx tsx browse.ts --url "https://example.com" --headless false --interactive
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_PROFILE_PATH Custom profile directory (default: ~/.camoufox-profile/)
|
||||
CAMOUFOX_HEADLESS Default headless mode (true/false)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
try {
|
||||
await browse({
|
||||
url: args.url,
|
||||
headless: args.headless,
|
||||
screenshot: args.screenshot,
|
||||
output: args.output,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
timeout: args.timeout ? parseInt(args.timeout, 10) : undefined,
|
||||
interactive: args.interactive,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('browse.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
27
skills/web-automation/claude-code/scripts/package.json
Normal file
27
skills/web-automation/claude-code/scripts/package.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "web-automation-scripts",
|
||||
"version": "1.0.0",
|
||||
"description": "Web browsing and scraping scripts using Camoufox",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"browse": "tsx browse.ts",
|
||||
"scrape": "tsx scrape.ts",
|
||||
"fetch-browser": "npx camoufox-js fetch"
|
||||
},
|
||||
"dependencies": {
|
||||
"camoufox-js": "^0.8.5",
|
||||
"playwright-core": "^1.40.0",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"jsdom": "^24.0.0",
|
||||
"minimist": "^1.2.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.3.0",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/minimist": "^1.2.5",
|
||||
"tsx": "^4.7.0"
|
||||
}
|
||||
}
|
||||
1610
skills/web-automation/claude-code/scripts/pnpm-lock.yaml
generated
Normal file
1610
skills/web-automation/claude-code/scripts/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
351
skills/web-automation/claude-code/scripts/scrape.ts
Normal file
351
skills/web-automation/claude-code/scripts/scrape.ts
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Web scraper that extracts content to markdown
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode main
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".content"
|
||||
*/
|
||||
|
||||
import TurndownService from 'turndown';
|
||||
import * as turndownPluginGfm from 'turndown-plugin-gfm';
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { writeFileSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
// Types
|
||||
type ScrapeMode = 'main' | 'full' | 'selector';
|
||||
|
||||
interface ScrapeOptions {
|
||||
url: string;
|
||||
mode: ScrapeMode;
|
||||
selector?: string;
|
||||
output?: string;
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
headless?: boolean;
|
||||
wait?: number;
|
||||
}
|
||||
|
||||
interface ScrapeResult {
|
||||
title: string;
|
||||
url: string;
|
||||
markdown: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
}
|
||||
|
||||
// Configure Turndown for markdown conversion
|
||||
function createTurndownService(options: {
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
}): TurndownService {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
bulletListMarker: '-',
|
||||
codeBlockStyle: 'fenced',
|
||||
fence: '```',
|
||||
emDelimiter: '*',
|
||||
strongDelimiter: '**',
|
||||
linkStyle: 'inlined',
|
||||
});
|
||||
|
||||
// Add GFM support (tables, strikethrough, task lists)
|
||||
turndown.use(turndownPluginGfm.gfm);
|
||||
|
||||
// Custom rule for code blocks with language detection
|
||||
turndown.addRule('codeBlockWithLanguage', {
|
||||
filter: (node) => {
|
||||
return (
|
||||
node.nodeName === 'PRE' &&
|
||||
node.firstChild?.nodeName === 'CODE'
|
||||
);
|
||||
},
|
||||
replacement: (_content, node) => {
|
||||
const codeNode = node.firstChild as HTMLElement;
|
||||
const className = codeNode.getAttribute('class') || '';
|
||||
const langMatch = className.match(/language-(\w+)/);
|
||||
const lang = langMatch ? langMatch[1] : '';
|
||||
const code = codeNode.textContent || '';
|
||||
return `\n\n\`\`\`${lang}\n${code}\n\`\`\`\n\n`;
|
||||
},
|
||||
});
|
||||
|
||||
// Remove images if not included
|
||||
if (!options.includeImages) {
|
||||
turndown.addRule('removeImages', {
|
||||
filter: 'img',
|
||||
replacement: () => '',
|
||||
});
|
||||
}
|
||||
|
||||
// Remove links but keep text if not included
|
||||
if (!options.includeLinks) {
|
||||
turndown.addRule('removeLinks', {
|
||||
filter: 'a',
|
||||
replacement: (content) => content,
|
||||
});
|
||||
}
|
||||
|
||||
// Remove script, style, nav, footer, aside elements
|
||||
turndown.remove(['script', 'style', 'nav', 'footer', 'aside', 'noscript']);
|
||||
|
||||
return turndown;
|
||||
}
|
||||
|
||||
// Extract main content using Readability
|
||||
function extractMainContent(html: string, url: string): {
|
||||
content: string;
|
||||
title: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
} {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const reader = new Readability(dom.window.document);
|
||||
const article = reader.parse();
|
||||
|
||||
if (!article) {
|
||||
throw new Error('Could not extract main content from page');
|
||||
}
|
||||
|
||||
return {
|
||||
content: article.content,
|
||||
title: article.title,
|
||||
byline: article.byline || undefined,
|
||||
excerpt: article.excerpt || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Scrape a URL and return markdown
|
||||
export async function scrape(options: ScrapeOptions): Promise<ScrapeResult> {
|
||||
const { page, browser } = await getPage({ headless: options.headless ?? true });
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms for dynamic content...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const pageTitle = await page.title();
|
||||
const pageUrl = page.url();
|
||||
|
||||
let html: string;
|
||||
let title = pageTitle;
|
||||
let byline: string | undefined;
|
||||
let excerpt: string | undefined;
|
||||
|
||||
// Get HTML based on mode
|
||||
switch (options.mode) {
|
||||
case 'main': {
|
||||
// Get full page HTML and extract with Readability
|
||||
const fullHtml = await page.content();
|
||||
const extracted = extractMainContent(fullHtml, pageUrl);
|
||||
html = extracted.content;
|
||||
title = extracted.title || pageTitle;
|
||||
byline = extracted.byline;
|
||||
excerpt = extracted.excerpt;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'selector': {
|
||||
if (!options.selector) {
|
||||
throw new Error('Selector mode requires --selector option');
|
||||
}
|
||||
const element = await page.$(options.selector);
|
||||
if (!element) {
|
||||
throw new Error(`Selector not found: ${options.selector}`);
|
||||
}
|
||||
html = await element.innerHTML();
|
||||
break;
|
||||
}
|
||||
|
||||
case 'full':
|
||||
default: {
|
||||
// Get body content, excluding common non-content elements
|
||||
html = await page.evaluate(() => {
|
||||
// Remove common non-content elements
|
||||
const selectorsToRemove = [
|
||||
'script', 'style', 'noscript', 'iframe',
|
||||
'nav', 'header', 'footer', '.cookie-banner',
|
||||
'.advertisement', '.ads', '#ads', '.social-share',
|
||||
'.comments', '#comments', '.sidebar'
|
||||
];
|
||||
|
||||
selectorsToRemove.forEach(selector => {
|
||||
document.querySelectorAll(selector).forEach(el => el.remove());
|
||||
});
|
||||
|
||||
return document.body.innerHTML;
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to markdown
|
||||
const turndown = createTurndownService({
|
||||
includeLinks: options.includeLinks ?? true,
|
||||
includeTables: options.includeTables ?? true,
|
||||
includeImages: options.includeImages ?? false,
|
||||
});
|
||||
|
||||
let markdown = turndown.turndown(html);
|
||||
|
||||
// Add title as H1 if not already present
|
||||
if (!markdown.startsWith('# ')) {
|
||||
markdown = `# ${title}\n\n${markdown}`;
|
||||
}
|
||||
|
||||
// Add metadata header
|
||||
const metadataLines = [
|
||||
`<!-- Scraped from: ${pageUrl} -->`,
|
||||
byline ? `<!-- Author: ${byline} -->` : null,
|
||||
excerpt ? `<!-- Excerpt: ${excerpt} -->` : null,
|
||||
`<!-- Scraped at: ${new Date().toISOString()} -->`,
|
||||
'',
|
||||
].filter(Boolean);
|
||||
|
||||
markdown = metadataLines.join('\n') + '\n' + markdown;
|
||||
|
||||
// Clean up excessive whitespace
|
||||
markdown = markdown
|
||||
.replace(/\n{4,}/g, '\n\n\n')
|
||||
.replace(/[ \t]+$/gm, '')
|
||||
.trim();
|
||||
|
||||
const result: ScrapeResult = {
|
||||
title,
|
||||
url: pageUrl,
|
||||
markdown,
|
||||
byline,
|
||||
excerpt,
|
||||
};
|
||||
|
||||
// Save to file if output specified
|
||||
if (options.output) {
|
||||
writeFileSync(options.output, markdown, 'utf-8');
|
||||
console.log(`Markdown saved to: ${options.output}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'mode', 'selector', 'output'],
|
||||
boolean: ['headless', 'links', 'tables', 'images', 'help'],
|
||||
default: {
|
||||
mode: 'main',
|
||||
headless: true,
|
||||
links: true,
|
||||
tables: true,
|
||||
images: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
m: 'mode',
|
||||
s: 'selector',
|
||||
o: 'output',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Scraper - Extract content to Markdown
|
||||
|
||||
Usage:
|
||||
npx tsx scrape.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to scrape (required)
|
||||
-m, --mode <mode> Scrape mode: main, full, or selector (default: main)
|
||||
-s, --selector <sel> CSS selector for selector mode
|
||||
-o, --output <path> Output file path for markdown
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time for dynamic content
|
||||
--links Include links in output (default: true)
|
||||
--tables Include tables in output (default: true)
|
||||
--images Include images in output (default: false)
|
||||
-h, --help Show this help message
|
||||
|
||||
Scrape Modes:
|
||||
main Extract main article content using Readability (best for articles)
|
||||
full Full page content with common elements removed
|
||||
selector Extract specific element by CSS selector
|
||||
|
||||
Examples:
|
||||
npx tsx scrape.ts --url "https://docs.example.com/guide" --mode main
|
||||
npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".api-docs"
|
||||
npx tsx scrape.ts --url "https://example.com" --mode main --no-links --output clean.md
|
||||
|
||||
Output Format:
|
||||
- GitHub Flavored Markdown (tables, strikethrough, task lists)
|
||||
- Proper heading hierarchy
|
||||
- Code blocks with language detection
|
||||
- Metadata comments at top (source URL, date)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const mode = args.mode as ScrapeMode;
|
||||
if (!['main', 'full', 'selector'].includes(mode)) {
|
||||
console.error(`Invalid mode: ${mode}. Must be main, full, or selector.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await scrape({
|
||||
url: args.url,
|
||||
mode,
|
||||
selector: args.selector,
|
||||
output: args.output,
|
||||
includeLinks: args.links,
|
||||
includeTables: args.tables,
|
||||
includeImages: args.images,
|
||||
headless: args.headless,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
});
|
||||
|
||||
// Print result summary
|
||||
console.log(`\nScrape complete:`);
|
||||
console.log(` Title: ${result.title}`);
|
||||
console.log(` URL: ${result.url}`);
|
||||
if (result.byline) console.log(` Author: ${result.byline}`);
|
||||
console.log(` Markdown length: ${result.markdown.length} chars`);
|
||||
|
||||
// Print markdown if not saved to file
|
||||
if (!args.output) {
|
||||
console.log('\n--- Markdown Output ---\n');
|
||||
console.log(result.markdown);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('scrape.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
39
skills/web-automation/claude-code/scripts/test-full.ts
Normal file
39
skills/web-automation/claude-code/scripts/test-full.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with full options...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
// humanize: 1.5, // Test without this first
|
||||
// geoip: true, // Test without this first
|
||||
// enable_cache: true,
|
||||
// block_webrtc: false,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://github.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await page.screenshot({ path: '/tmp/github-test.png' });
|
||||
console.log('Screenshot saved');
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
22
skills/web-automation/claude-code/scripts/test-minimal.ts
Normal file
22
skills/web-automation/claude-code/scripts/test-minimal.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
|
||||
async function test() {
|
||||
console.log('Launching Camoufox with minimal config...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
32
skills/web-automation/claude-code/scripts/test-profile.ts
Normal file
32
skills/web-automation/claude-code/scripts/test-profile.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with user_data_dir...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
16
skills/web-automation/claude-code/scripts/tsconfig.json
Normal file
16
skills/web-automation/claude-code/scripts/tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["*.ts"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
8
skills/web-automation/claude-code/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
8
skills/web-automation/claude-code/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
declare module 'turndown-plugin-gfm' {
|
||||
import TurndownService from 'turndown';
|
||||
|
||||
export function gfm(turndownService: TurndownService): void;
|
||||
export function strikethrough(turndownService: TurndownService): void;
|
||||
export function tables(turndownService: TurndownService): void;
|
||||
export function taskListItems(turndownService: TurndownService): void;
|
||||
}
|
||||
36
skills/web-automation/codex/SKILL.md
Normal file
36
skills/web-automation/codex/SKILL.md
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: web-automation
|
||||
description: Browse and scrape web pages using Playwright with Camoufox anti-detection browser. Use when automating web workflows, extracting page content to markdown, handling authenticated sessions, or scraping websites with bot protection.
|
||||
---
|
||||
|
||||
# Web Automation with Camoufox (Codex)
|
||||
|
||||
Automated web browsing and scraping using Playwright with Camoufox anti-detection browser.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Node.js 20+
|
||||
- pnpm
|
||||
- Network access to download browser binaries
|
||||
|
||||
## First-Time Setup
|
||||
|
||||
```bash
|
||||
cd ~/.codex/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
If native dependency build errors appear (e.g., better-sqlite3), rebuild from the reported package directory.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
- Browse page: `npx tsx browse.ts --url "https://example.com"`
|
||||
- Scrape markdown: `npx tsx scrape.ts --url "https://example.com" --mode main --output page.md`
|
||||
- Authenticate: `npx tsx auth.ts --url "https://example.com/login"`
|
||||
|
||||
## Notes
|
||||
|
||||
- Sessions persist in Camoufox profile storage.
|
||||
- Use `--wait` for dynamic pages.
|
||||
- Use `--mode selector --selector "..."` for targeted extraction.
|
||||
575
skills/web-automation/codex/scripts/auth.ts
Normal file
575
skills/web-automation/codex/scripts/auth.ts
Normal file
@@ -0,0 +1,575 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Authentication handler for web automation
|
||||
* Supports generic form login and Microsoft SSO (MSAL)
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx auth.ts --url "https://example.com/login" --type form
|
||||
* npx tsx auth.ts --url "https://example.com" --type msal
|
||||
* npx tsx auth.ts --url "https://example.com" --type auto
|
||||
*/
|
||||
|
||||
import { getPage, launchBrowser } from './browse.js';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
import { createInterface } from 'readline';
|
||||
|
||||
// Types
|
||||
type AuthType = 'auto' | 'form' | 'msal';
|
||||
|
||||
interface AuthOptions {
|
||||
url: string;
|
||||
authType: AuthType;
|
||||
credentials?: {
|
||||
username: string;
|
||||
password: string;
|
||||
};
|
||||
headless?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
interface AuthResult {
|
||||
success: boolean;
|
||||
finalUrl: string;
|
||||
authType: AuthType;
|
||||
message: string;
|
||||
}
|
||||
|
||||
// Get credentials from environment or options
|
||||
function getCredentials(options?: {
|
||||
username?: string;
|
||||
password?: string;
|
||||
}): { username: string; password: string } | null {
|
||||
const username = options?.username || process.env.CAMOUFOX_USERNAME;
|
||||
const password = options?.password || process.env.CAMOUFOX_PASSWORD;
|
||||
|
||||
if (!username || !password) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { username, password };
|
||||
}
|
||||
|
||||
// Prompt user for input (for MFA or credentials)
|
||||
async function promptUser(question: string, hidden = false): Promise<string> {
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
return new Promise((resolve) => {
|
||||
if (hidden) {
|
||||
process.stdout.write(question);
|
||||
// Note: This is a simple implementation. For production, use a proper hidden input library
|
||||
}
|
||||
rl.question(question, (answer) => {
|
||||
rl.close();
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Detect authentication type from page
|
||||
async function detectAuthType(page: Page): Promise<AuthType> {
|
||||
const url = page.url();
|
||||
|
||||
// Check for Microsoft login
|
||||
if (
|
||||
url.includes('login.microsoftonline.com') ||
|
||||
url.includes('login.live.com') ||
|
||||
url.includes('login.windows.net')
|
||||
) {
|
||||
return 'msal';
|
||||
}
|
||||
|
||||
// Check for common form login patterns
|
||||
const hasLoginForm = await page.evaluate(() => {
|
||||
const passwordField = document.querySelector(
|
||||
'input[type="password"], input[name*="password"], input[id*="password"]'
|
||||
);
|
||||
const usernameField = document.querySelector(
|
||||
'input[type="email"], input[type="text"][name*="user"], input[type="text"][name*="email"], input[id*="user"], input[id*="email"]'
|
||||
);
|
||||
return !!(passwordField && usernameField);
|
||||
});
|
||||
|
||||
if (hasLoginForm) {
|
||||
return 'form';
|
||||
}
|
||||
|
||||
return 'auto';
|
||||
}
|
||||
|
||||
// Handle generic form login
|
||||
async function handleFormLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting form login...');
|
||||
|
||||
// Find and fill username/email field
|
||||
const usernameSelectors = [
|
||||
'input[type="email"]',
|
||||
'input[name*="user" i]',
|
||||
'input[name*="email" i]',
|
||||
'input[id*="user" i]',
|
||||
'input[id*="email" i]',
|
||||
'input[autocomplete="username"]',
|
||||
'input[type="text"]:first-of-type',
|
||||
];
|
||||
|
||||
let usernameField = null;
|
||||
for (const selector of usernameSelectors) {
|
||||
usernameField = await page.$(selector);
|
||||
if (usernameField) break;
|
||||
}
|
||||
|
||||
if (!usernameField) {
|
||||
console.error('Could not find username/email field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await usernameField.fill(credentials.username);
|
||||
console.log('Filled username field');
|
||||
|
||||
// Find and fill password field
|
||||
const passwordSelectors = [
|
||||
'input[type="password"]',
|
||||
'input[name*="password" i]',
|
||||
'input[id*="password" i]',
|
||||
'input[autocomplete="current-password"]',
|
||||
];
|
||||
|
||||
let passwordField = null;
|
||||
for (const selector of passwordSelectors) {
|
||||
passwordField = await page.$(selector);
|
||||
if (passwordField) break;
|
||||
}
|
||||
|
||||
if (!passwordField) {
|
||||
console.error('Could not find password field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordField.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Check for "Remember me" checkbox and check it
|
||||
const rememberCheckbox = await page.$(
|
||||
'input[type="checkbox"][name*="remember" i], input[type="checkbox"][id*="remember" i]'
|
||||
);
|
||||
if (rememberCheckbox) {
|
||||
await rememberCheckbox.check();
|
||||
console.log('Checked "Remember me" checkbox');
|
||||
}
|
||||
|
||||
// Find and click submit button
|
||||
const submitSelectors = [
|
||||
'button[type="submit"]',
|
||||
'input[type="submit"]',
|
||||
'button:has-text("Sign in")',
|
||||
'button:has-text("Log in")',
|
||||
'button:has-text("Login")',
|
||||
'button:has-text("Submit")',
|
||||
'[role="button"]:has-text("Sign in")',
|
||||
];
|
||||
|
||||
let submitButton = null;
|
||||
for (const selector of submitSelectors) {
|
||||
submitButton = await page.$(selector);
|
||||
if (submitButton) break;
|
||||
}
|
||||
|
||||
if (!submitButton) {
|
||||
// Try pressing Enter as fallback
|
||||
await passwordField.press('Enter');
|
||||
} else {
|
||||
await submitButton.click();
|
||||
}
|
||||
|
||||
console.log('Submitted login form');
|
||||
|
||||
// Wait for navigation or error
|
||||
try {
|
||||
await page.waitForNavigation({ timeout, waitUntil: 'domcontentloaded' });
|
||||
return true;
|
||||
} catch {
|
||||
// Check if we're still on login page with error
|
||||
const errorMessages = await page.$$eval(
|
||||
'.error, .alert-danger, [role="alert"], .login-error',
|
||||
(els) => els.map((el) => el.textContent?.trim()).filter(Boolean)
|
||||
);
|
||||
|
||||
if (errorMessages.length > 0) {
|
||||
console.error('Login error:', errorMessages.join(', '));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true; // Might have succeeded without navigation
|
||||
}
|
||||
}
|
||||
|
||||
// Handle Microsoft SSO login
|
||||
async function handleMsalLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting Microsoft SSO login...');
|
||||
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If not already on Microsoft login, wait for redirect
|
||||
if (!currentUrl.includes('login.microsoftonline.com')) {
|
||||
try {
|
||||
await page.waitForURL('**/login.microsoftonline.com/**', { timeout: 10000 });
|
||||
} catch {
|
||||
console.log('Not redirected to Microsoft login');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for email input
|
||||
const emailInput = await page.waitForSelector(
|
||||
'input[type="email"], input[name="loginfmt"]',
|
||||
{ timeout }
|
||||
);
|
||||
|
||||
if (!emailInput) {
|
||||
console.error('Could not find email input on Microsoft login');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill email and submit
|
||||
await emailInput.fill(credentials.username);
|
||||
console.log('Filled email field');
|
||||
|
||||
const nextButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (nextButton) {
|
||||
await nextButton.click();
|
||||
} else {
|
||||
await emailInput.press('Enter');
|
||||
}
|
||||
|
||||
// Wait for password page
|
||||
try {
|
||||
await page.waitForSelector(
|
||||
'input[type="password"], input[name="passwd"]',
|
||||
{ timeout }
|
||||
);
|
||||
} catch {
|
||||
// Might be using passwordless auth or different flow
|
||||
console.log('Password field not found - might be using different auth flow');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill password
|
||||
const passwordInput = await page.$('input[type="password"], input[name="passwd"]');
|
||||
if (!passwordInput) {
|
||||
console.error('Could not find password input');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordInput.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Submit
|
||||
const signInButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (signInButton) {
|
||||
await signInButton.click();
|
||||
} else {
|
||||
await passwordInput.press('Enter');
|
||||
}
|
||||
|
||||
// Handle "Stay signed in?" prompt
|
||||
try {
|
||||
const staySignedInButton = await page.waitForSelector(
|
||||
'input[value="Yes"], button:has-text("Yes")',
|
||||
{ timeout: 5000 }
|
||||
);
|
||||
if (staySignedInButton) {
|
||||
await staySignedInButton.click();
|
||||
console.log('Clicked "Stay signed in" button');
|
||||
}
|
||||
} catch {
|
||||
// Prompt might not appear
|
||||
}
|
||||
|
||||
// Check for Conditional Access Policy error
|
||||
const caError = await page.$('text=Conditional Access policy');
|
||||
if (caError) {
|
||||
console.error('Blocked by Conditional Access Policy');
|
||||
// Take screenshot for debugging
|
||||
await page.screenshot({ path: 'ca-policy-error.png' });
|
||||
console.log('Screenshot saved: ca-policy-error.png');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wait for redirect away from Microsoft login
|
||||
try {
|
||||
await page.waitForURL(
|
||||
(url) => !url.href.includes('login.microsoftonline.com'),
|
||||
{ timeout }
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if user is already authenticated
|
||||
async function isAuthenticated(page: Page, targetUrl: string): Promise<boolean> {
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If we're on the target URL (not a login page), we're likely authenticated
|
||||
if (currentUrl.startsWith(targetUrl)) {
|
||||
// Check for common login page indicators
|
||||
const isLoginPage = await page.evaluate(() => {
|
||||
const loginIndicators = [
|
||||
'input[type="password"]',
|
||||
'form[action*="login"]',
|
||||
'form[action*="signin"]',
|
||||
'.login-form',
|
||||
'#login',
|
||||
];
|
||||
return loginIndicators.some((sel) => document.querySelector(sel) !== null);
|
||||
});
|
||||
|
||||
return !isLoginPage;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Main authentication function
|
||||
export async function authenticate(options: AuthOptions): Promise<AuthResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless ?? true });
|
||||
const page = await browser.newPage();
|
||||
const timeout = options.timeout ?? 30000;
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if already authenticated
|
||||
if (await isAuthenticated(page, options.url)) {
|
||||
return {
|
||||
success: true,
|
||||
finalUrl: page.url(),
|
||||
authType: 'auto',
|
||||
message: 'Already authenticated (session persisted from profile)',
|
||||
};
|
||||
}
|
||||
|
||||
// Get credentials
|
||||
const credentials = options.credentials
|
||||
? options.credentials
|
||||
: getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
// No credentials - open interactive browser
|
||||
console.log('\nNo credentials provided. Opening browser for manual login...');
|
||||
console.log('Please complete the login process manually.');
|
||||
console.log('The session will be saved to your profile.');
|
||||
|
||||
// Switch to headed mode for manual login
|
||||
await browser.close();
|
||||
const interactiveBrowser = await launchBrowser({ headless: false });
|
||||
const interactivePage = await interactiveBrowser.newPage();
|
||||
await interactivePage.goto(options.url);
|
||||
|
||||
await promptUser('\nPress Enter when you have completed login...');
|
||||
|
||||
const finalUrl = interactivePage.url();
|
||||
await interactiveBrowser.close();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
finalUrl,
|
||||
authType: 'auto',
|
||||
message: 'Manual login completed - session saved to profile',
|
||||
};
|
||||
}
|
||||
|
||||
// Detect auth type if auto
|
||||
let authType = options.authType;
|
||||
if (authType === 'auto') {
|
||||
authType = await detectAuthType(page);
|
||||
console.log(`Detected auth type: ${authType}`);
|
||||
}
|
||||
|
||||
// Handle authentication based on type
|
||||
let success = false;
|
||||
switch (authType) {
|
||||
case 'msal':
|
||||
success = await handleMsalLogin(page, credentials, timeout);
|
||||
break;
|
||||
case 'form':
|
||||
default:
|
||||
success = await handleFormLogin(page, credentials, timeout);
|
||||
break;
|
||||
}
|
||||
|
||||
const finalUrl = page.url();
|
||||
|
||||
return {
|
||||
success,
|
||||
finalUrl,
|
||||
authType,
|
||||
message: success
|
||||
? `Authentication successful - session saved to profile`
|
||||
: 'Authentication failed',
|
||||
};
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Navigate to authenticated page (handles auth if needed)
|
||||
export async function navigateAuthenticated(
|
||||
url: string,
|
||||
options?: {
|
||||
credentials?: { username: string; password: string };
|
||||
headless?: boolean;
|
||||
}
|
||||
): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const { page, browser } = await getPage({ headless: options?.headless ?? true });
|
||||
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if we need to authenticate
|
||||
if (!(await isAuthenticated(page, url))) {
|
||||
console.log('Session expired or not authenticated. Attempting login...');
|
||||
|
||||
// Get credentials
|
||||
const credentials = options?.credentials ?? getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
throw new Error(
|
||||
'Authentication required but no credentials provided. ' +
|
||||
'Set CAMOUFOX_USERNAME and CAMOUFOX_PASSWORD environment variables.'
|
||||
);
|
||||
}
|
||||
|
||||
// Detect and handle auth
|
||||
const authType = await detectAuthType(page);
|
||||
|
||||
let success = false;
|
||||
if (authType === 'msal') {
|
||||
success = await handleMsalLogin(page, credentials, 30000);
|
||||
} else {
|
||||
success = await handleFormLogin(page, credentials, 30000);
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
await browser.close();
|
||||
throw new Error('Authentication failed');
|
||||
}
|
||||
|
||||
// Navigate back to original URL if we were redirected
|
||||
if (!page.url().startsWith(url)) {
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
}
|
||||
}
|
||||
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'type', 'username', 'password'],
|
||||
boolean: ['headless', 'help'],
|
||||
default: {
|
||||
type: 'auto',
|
||||
headless: false, // Default to headed for auth so user can see/interact
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
t: 'type',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Authentication Handler
|
||||
|
||||
Usage:
|
||||
npx tsx auth.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to authenticate (required)
|
||||
-t, --type <type> Auth type: auto, form, or msal (default: auto)
|
||||
--username <user> Username/email (or set CAMOUFOX_USERNAME env var)
|
||||
--password <pass> Password (or set CAMOUFOX_PASSWORD env var)
|
||||
--headless <bool> Run in headless mode (default: false for auth)
|
||||
-h, --help Show this help message
|
||||
|
||||
Auth Types:
|
||||
auto Auto-detect authentication type
|
||||
form Generic username/password form
|
||||
msal Microsoft SSO (login.microsoftonline.com)
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_USERNAME Default username/email for authentication
|
||||
CAMOUFOX_PASSWORD Default password for authentication
|
||||
|
||||
Examples:
|
||||
# Interactive login (no credentials, opens browser)
|
||||
npx tsx auth.ts --url "https://example.com/login"
|
||||
|
||||
# Form login with credentials
|
||||
npx tsx auth.ts --url "https://example.com/login" --type form \\
|
||||
--username "user@example.com" --password "secret"
|
||||
|
||||
# Microsoft SSO login
|
||||
CAMOUFOX_USERNAME=user@company.com CAMOUFOX_PASSWORD=secret \\
|
||||
npx tsx auth.ts --url "https://internal.company.com" --type msal
|
||||
|
||||
Notes:
|
||||
- Session is saved to ~/.camoufox-profile/ for persistence
|
||||
- After successful auth, subsequent browses will be authenticated
|
||||
- Use --headless false if you need to handle MFA manually
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const authType = args.type as AuthType;
|
||||
if (!['auto', 'form', 'msal'].includes(authType)) {
|
||||
console.error(`Invalid auth type: ${authType}. Must be auto, form, or msal.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await authenticate({
|
||||
url: args.url,
|
||||
authType,
|
||||
credentials:
|
||||
args.username && args.password
|
||||
? { username: args.username, password: args.password }
|
||||
: undefined,
|
||||
headless: args.headless,
|
||||
});
|
||||
|
||||
console.log(`\nAuthentication result:`);
|
||||
console.log(` Success: ${result.success}`);
|
||||
console.log(` Auth type: ${result.authType}`);
|
||||
console.log(` Final URL: ${result.finalUrl}`);
|
||||
console.log(` Message: ${result.message}`);
|
||||
|
||||
process.exit(result.success ? 0 : 1);
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('auth.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
195
skills/web-automation/codex/scripts/browse.ts
Normal file
195
skills/web-automation/codex/scripts/browse.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Browser launcher using Camoufox with persistent profile
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx browse.ts --url "https://example.com"
|
||||
* npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
* npx tsx browse.ts --url "https://example.com" --headless false --wait 5000
|
||||
*/
|
||||
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { existsSync, mkdirSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
|
||||
// Types
|
||||
interface BrowseOptions {
|
||||
url: string;
|
||||
headless?: boolean;
|
||||
screenshot?: boolean;
|
||||
output?: string;
|
||||
wait?: number;
|
||||
timeout?: number;
|
||||
interactive?: boolean;
|
||||
}
|
||||
|
||||
interface BrowseResult {
|
||||
title: string;
|
||||
url: string;
|
||||
screenshotPath?: string;
|
||||
}
|
||||
|
||||
// Get profile directory
|
||||
const getProfilePath = (): string => {
|
||||
const customPath = process.env.CAMOUFOX_PROFILE_PATH;
|
||||
if (customPath) return customPath;
|
||||
|
||||
const profileDir = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profileDir)) {
|
||||
mkdirSync(profileDir, { recursive: true });
|
||||
}
|
||||
return profileDir;
|
||||
};
|
||||
|
||||
// Launch browser with persistent profile
|
||||
export async function launchBrowser(options: {
|
||||
headless?: boolean;
|
||||
}): Promise<BrowserContext> {
|
||||
const profilePath = getProfilePath();
|
||||
const headless =
|
||||
options.headless ??
|
||||
(process.env.CAMOUFOX_HEADLESS ? process.env.CAMOUFOX_HEADLESS === 'true' : true);
|
||||
|
||||
console.log(`Using profile: ${profilePath}`);
|
||||
console.log(`Headless mode: ${headless}`);
|
||||
|
||||
const browser = await Camoufox({
|
||||
user_data_dir: profilePath,
|
||||
headless,
|
||||
});
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
// Browse to URL and optionally take screenshot
|
||||
export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless });
|
||||
const page = await browser.newPage();
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: options.timeout ?? 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const result: BrowseResult = {
|
||||
title: await page.title(),
|
||||
url: page.url(),
|
||||
};
|
||||
|
||||
console.log(`Page title: ${result.title}`);
|
||||
console.log(`Final URL: ${result.url}`);
|
||||
|
||||
// Take screenshot if requested
|
||||
if (options.screenshot) {
|
||||
const outputPath = options.output ?? 'screenshot.png';
|
||||
await page.screenshot({ path: outputPath, fullPage: true });
|
||||
result.screenshotPath = outputPath;
|
||||
console.log(`Screenshot saved: ${outputPath}`);
|
||||
}
|
||||
|
||||
// If interactive mode, keep browser open
|
||||
if (options.interactive) {
|
||||
console.log('\nInteractive mode - browser will stay open.');
|
||||
console.log('Press Ctrl+C to close.');
|
||||
await new Promise(() => {}); // Keep running
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
if (!options.interactive) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export page for use in other scripts
|
||||
export async function getPage(options?: {
|
||||
headless?: boolean;
|
||||
}): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const browser = await launchBrowser({ headless: options?.headless });
|
||||
const page = await browser.newPage();
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'output'],
|
||||
boolean: ['screenshot', 'headless', 'interactive', 'help'],
|
||||
default: {
|
||||
headless: true,
|
||||
screenshot: false,
|
||||
interactive: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
o: 'output',
|
||||
s: 'screenshot',
|
||||
h: 'help',
|
||||
i: 'interactive',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Browser with Camoufox
|
||||
|
||||
Usage:
|
||||
npx tsx browse.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to navigate to (required)
|
||||
-s, --screenshot Take a screenshot of the page
|
||||
-o, --output <path> Output path for screenshot (default: screenshot.png)
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time after page load in milliseconds
|
||||
--timeout <ms> Navigation timeout (default: 60000)
|
||||
-i, --interactive Keep browser open for manual interaction
|
||||
-h, --help Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx browse.ts --url "https://example.com"
|
||||
npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
npx tsx browse.ts --url "https://example.com" --headless false --interactive
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_PROFILE_PATH Custom profile directory (default: ~/.camoufox-profile/)
|
||||
CAMOUFOX_HEADLESS Default headless mode (true/false)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
try {
|
||||
await browse({
|
||||
url: args.url,
|
||||
headless: args.headless,
|
||||
screenshot: args.screenshot,
|
||||
output: args.output,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
timeout: args.timeout ? parseInt(args.timeout, 10) : undefined,
|
||||
interactive: args.interactive,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('browse.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
29
skills/web-automation/codex/scripts/package.json
Normal file
29
skills/web-automation/codex/scripts/package.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "web-automation-scripts",
|
||||
"version": "1.0.0",
|
||||
"description": "Web browsing and scraping scripts using Camoufox",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"browse": "tsx browse.ts",
|
||||
"scrape": "tsx scrape.ts",
|
||||
"fetch-browser": "npx camoufox-js fetch"
|
||||
},
|
||||
"dependencies": {
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"camoufox-js": "^0.8.5",
|
||||
"jsdom": "^24.0.0",
|
||||
"minimist": "^1.2.8",
|
||||
"playwright-core": "^1.40.0",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/minimist": "^1.2.5",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"esbuild": "0.27.0",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.0"
|
||||
},
|
||||
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
|
||||
}
|
||||
1613
skills/web-automation/codex/scripts/pnpm-lock.yaml
generated
Normal file
1613
skills/web-automation/codex/scripts/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
212
skills/web-automation/codex/scripts/scan-local-app.ts
Normal file
212
skills/web-automation/codex/scripts/scan-local-app.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
import { writeFileSync } from 'fs';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
const baseUrl = 'http://localhost:3000';
|
||||
const username = 'analyst@fhb.local';
|
||||
const password = process.env.CAMOUFOX_PASSWORD ?? '';
|
||||
|
||||
const reportPath = '/Users/stefano.fiorini/Documents/projects/fhb-loan-spreading-pilot-a/docs/plans/2026-01-24-financials-analysis-redesign/web-automation-scan.md';
|
||||
|
||||
type NavResult = {
|
||||
requestedUrl: string;
|
||||
url: string;
|
||||
status: number | null;
|
||||
title: string;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
async function gotoWithStatus(page: any, url: string): Promise<NavResult> {
|
||||
const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }).catch((e: unknown) => ({ error: e }));
|
||||
if (resp?.error) {
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: null,
|
||||
title: await page.title().catch(() => ''),
|
||||
error: String(resp.error),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: resp ? resp.status() : null,
|
||||
title: await page.title().catch(() => ''),
|
||||
};
|
||||
}
|
||||
|
||||
async function textOrNull(page: any, selector: string): Promise<string | null> {
|
||||
const loc = page.locator(selector).first();
|
||||
try {
|
||||
if ((await loc.count()) === 0) return null;
|
||||
const txt = await loc.textContent();
|
||||
return txt ? txt.trim().replace(/\s+/g, ' ') : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { page, browser } = await getPage({ headless: true });
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push('# Web Automation Scan (local)');
|
||||
lines.push('');
|
||||
lines.push(`- Base URL: ${baseUrl}`);
|
||||
lines.push(`- Timestamp: ${new Date().toISOString()}`);
|
||||
lines.push('');
|
||||
|
||||
try {
|
||||
lines.push('## Login');
|
||||
await gotoWithStatus(page, `${baseUrl}/login`);
|
||||
await page.locator('input[name="email"]').fill(username);
|
||||
await page.locator('input[name="password"]').fill(password);
|
||||
await page.locator('button[type="submit"]').click();
|
||||
await page.waitForTimeout(2500);
|
||||
|
||||
const cookies = await page.context().cookies();
|
||||
const sessionCookie = cookies.find((c: any) => c.name === 'fhb_session');
|
||||
lines.push(`- After submit URL: ${page.url()}`);
|
||||
lines.push(`- Has session cookie (fhb_session): ${Boolean(sessionCookie)}`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Demo Case');
|
||||
const casesNav = await gotoWithStatus(page, `${baseUrl}/cases`);
|
||||
lines.push(`- GET /cases → status ${casesNav.status ?? 'ERR'}, final ${casesNav.url}`);
|
||||
|
||||
const envCaseId = process.env.SCAN_CASE_ID?.trim() || null;
|
||||
let selectedCaseId: string | null = envCaseId;
|
||||
|
||||
if (!selectedCaseId) {
|
||||
const caseLinks = await page.$$eval('a[href^="/cases/"]', (as) =>
|
||||
as
|
||||
.map((a) => ({
|
||||
href: (a as HTMLAnchorElement).getAttribute('href') || '',
|
||||
text: (a.textContent || '').trim(),
|
||||
}))
|
||||
.filter((x) => x.href.includes('/cases/'))
|
||||
);
|
||||
|
||||
const preferredTitles = ['Demo - Strong Borrower', 'Demo - Weak Borrower', 'Demo - Incomplete'];
|
||||
for (const title of preferredTitles) {
|
||||
const match = caseLinks.find((l) => l.text.includes(title) && l.href.includes('/cases/'));
|
||||
const href = match?.href ?? '';
|
||||
const m = href.match(/\/cases\/([0-9a-f-]{36})/i);
|
||||
if (m) {
|
||||
selectedCaseId = m[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!selectedCaseId) {
|
||||
const firstHref =
|
||||
caseLinks.map((l) => l.href).find((h) => /\/cases\/[0-9a-f-]{36}/i.test(h)) ?? null;
|
||||
const m = firstHref?.match(/\/cases\/([0-9a-f-]{36})/i) ?? null;
|
||||
selectedCaseId = m?.[1] ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
lines.push(`- Selected caseId: ${selectedCaseId ?? '(none found)'}`);
|
||||
|
||||
if (!selectedCaseId) {
|
||||
lines.push('');
|
||||
lines.push('⚠️ Could not find a demo case link on /cases.');
|
||||
writeFileSync(reportPath, lines.join('\n') + '\n', 'utf-8');
|
||||
return;
|
||||
}
|
||||
|
||||
const caseBase = `${baseUrl}/cases/${selectedCaseId}/journey`;
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Route Checks');
|
||||
|
||||
const routesToCheck = [
|
||||
`${caseBase}`,
|
||||
`${caseBase}/financials`,
|
||||
`${caseBase}/financials/income`,
|
||||
`${caseBase}/analysis`,
|
||||
`${caseBase}/analysis/configure`,
|
||||
`${caseBase}/analysis/ai`,
|
||||
`${caseBase}/analysis/ai/detail`,
|
||||
`${caseBase}/spreads`,
|
||||
];
|
||||
|
||||
for (const url of routesToCheck) {
|
||||
const r = await gotoWithStatus(page, url);
|
||||
const h1 = await textOrNull(page, 'h1');
|
||||
const finalPath = r.url.startsWith(baseUrl) ? r.url.slice(baseUrl.length) : r.url;
|
||||
lines.push(`- ${url.slice(baseUrl.length)} → status ${r.status ?? 'ERR'} (final ${finalPath})${h1 ? `, h1="${h1}"` : ''}`);
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Spreadsheet Analysis (UI)');
|
||||
await gotoWithStatus(page, `${caseBase}/analysis/configure`);
|
||||
|
||||
const runButton = page.locator('button:has-text("Run Analysis")').first();
|
||||
const disabled = await runButton.isDisabled().catch(() => true);
|
||||
lines.push(`- Run button disabled: ${disabled}`);
|
||||
|
||||
if (!disabled) {
|
||||
await runButton.click();
|
||||
|
||||
const resultsWait = page
|
||||
.waitForURL('**/journey/analysis/results**', { timeout: 180000 })
|
||||
.then(() => 'results' as const);
|
||||
const errorWait = page
|
||||
.locator('[role="alert"]')
|
||||
.filter({ hasText: 'Error' })
|
||||
.first()
|
||||
.waitFor({ timeout: 180000 })
|
||||
.then(() => 'error' as const);
|
||||
|
||||
const outcome = await Promise.race([resultsWait, errorWait]).catch(() => 'timeout' as const);
|
||||
|
||||
if (outcome === 'results') {
|
||||
await page.waitForTimeout(1500);
|
||||
lines.push(`- Results URL: ${page.url().replace(baseUrl, '')}`);
|
||||
|
||||
const downloadHref = await page
|
||||
.locator('a[href*="/journey/analysis/download"]')
|
||||
.first()
|
||||
.getAttribute('href')
|
||||
.catch(() => null);
|
||||
|
||||
if (downloadHref) {
|
||||
const dlUrl = downloadHref.startsWith('http') ? downloadHref : `${baseUrl}${downloadHref}`;
|
||||
const dlResp = await page.goto(dlUrl, { waitUntil: 'commit', timeout: 60000 }).catch(() => null);
|
||||
lines.push(
|
||||
`- Download route status: ${dlResp?.status() ?? 'ERR'} (Content-Type: ${dlResp?.headers()?.['content-type'] ?? 'n/a'})`
|
||||
);
|
||||
} else {
|
||||
lines.push('- Download link not found on results page');
|
||||
}
|
||||
} else if (outcome === 'error') {
|
||||
const errorText = await page
|
||||
.locator('[role="alert"]')
|
||||
.first()
|
||||
.textContent()
|
||||
.then((t: string | null) => (t ? t.trim().replace(/\\s+/g, ' ') : null))
|
||||
.catch(() => null);
|
||||
lines.push(`- Stayed on configure page; saw error callout: ${errorText ?? '(unable to read)'}`);
|
||||
lines.push('- Skipping download check because analysis did not complete.');
|
||||
} else {
|
||||
lines.push('- Timed out waiting for results or error after clicking Run Analysis.');
|
||||
}
|
||||
} else {
|
||||
lines.push('- Skipped running analysis because Run button was disabled.');
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Notes');
|
||||
lines.push('- This scan avoids scraping financial values; it records route availability and basic headings.');
|
||||
|
||||
writeFileSync(reportPath, lines.join('\n') + '\n', 'utf-8');
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
351
skills/web-automation/codex/scripts/scrape.ts
Normal file
351
skills/web-automation/codex/scripts/scrape.ts
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Web scraper that extracts content to markdown
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode main
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".content"
|
||||
*/
|
||||
|
||||
import TurndownService from 'turndown';
|
||||
import * as turndownPluginGfm from 'turndown-plugin-gfm';
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { writeFileSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
// Types
|
||||
type ScrapeMode = 'main' | 'full' | 'selector';
|
||||
|
||||
interface ScrapeOptions {
|
||||
url: string;
|
||||
mode: ScrapeMode;
|
||||
selector?: string;
|
||||
output?: string;
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
headless?: boolean;
|
||||
wait?: number;
|
||||
}
|
||||
|
||||
interface ScrapeResult {
|
||||
title: string;
|
||||
url: string;
|
||||
markdown: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
}
|
||||
|
||||
// Configure Turndown for markdown conversion
|
||||
function createTurndownService(options: {
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
}): TurndownService {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
bulletListMarker: '-',
|
||||
codeBlockStyle: 'fenced',
|
||||
fence: '```',
|
||||
emDelimiter: '*',
|
||||
strongDelimiter: '**',
|
||||
linkStyle: 'inlined',
|
||||
});
|
||||
|
||||
// Add GFM support (tables, strikethrough, task lists)
|
||||
turndown.use(turndownPluginGfm.gfm);
|
||||
|
||||
// Custom rule for code blocks with language detection
|
||||
turndown.addRule('codeBlockWithLanguage', {
|
||||
filter: (node) => {
|
||||
return (
|
||||
node.nodeName === 'PRE' &&
|
||||
node.firstChild?.nodeName === 'CODE'
|
||||
);
|
||||
},
|
||||
replacement: (_content, node) => {
|
||||
const codeNode = node.firstChild as HTMLElement;
|
||||
const className = codeNode.getAttribute('class') || '';
|
||||
const langMatch = className.match(/language-(\w+)/);
|
||||
const lang = langMatch ? langMatch[1] : '';
|
||||
const code = codeNode.textContent || '';
|
||||
return `\n\n\`\`\`${lang}\n${code}\n\`\`\`\n\n`;
|
||||
},
|
||||
});
|
||||
|
||||
// Remove images if not included
|
||||
if (!options.includeImages) {
|
||||
turndown.addRule('removeImages', {
|
||||
filter: 'img',
|
||||
replacement: () => '',
|
||||
});
|
||||
}
|
||||
|
||||
// Remove links but keep text if not included
|
||||
if (!options.includeLinks) {
|
||||
turndown.addRule('removeLinks', {
|
||||
filter: 'a',
|
||||
replacement: (content) => content,
|
||||
});
|
||||
}
|
||||
|
||||
// Remove script, style, nav, footer, aside elements
|
||||
turndown.remove(['script', 'style', 'nav', 'footer', 'aside', 'noscript']);
|
||||
|
||||
return turndown;
|
||||
}
|
||||
|
||||
// Extract main content using Readability
|
||||
function extractMainContent(html: string, url: string): {
|
||||
content: string;
|
||||
title: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
} {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const reader = new Readability(dom.window.document);
|
||||
const article = reader.parse();
|
||||
|
||||
if (!article) {
|
||||
throw new Error('Could not extract main content from page');
|
||||
}
|
||||
|
||||
return {
|
||||
content: article.content,
|
||||
title: article.title,
|
||||
byline: article.byline || undefined,
|
||||
excerpt: article.excerpt || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Scrape a URL and return markdown
|
||||
export async function scrape(options: ScrapeOptions): Promise<ScrapeResult> {
|
||||
const { page, browser } = await getPage({ headless: options.headless ?? true });
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms for dynamic content...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const pageTitle = await page.title();
|
||||
const pageUrl = page.url();
|
||||
|
||||
let html: string;
|
||||
let title = pageTitle;
|
||||
let byline: string | undefined;
|
||||
let excerpt: string | undefined;
|
||||
|
||||
// Get HTML based on mode
|
||||
switch (options.mode) {
|
||||
case 'main': {
|
||||
// Get full page HTML and extract with Readability
|
||||
const fullHtml = await page.content();
|
||||
const extracted = extractMainContent(fullHtml, pageUrl);
|
||||
html = extracted.content;
|
||||
title = extracted.title || pageTitle;
|
||||
byline = extracted.byline;
|
||||
excerpt = extracted.excerpt;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'selector': {
|
||||
if (!options.selector) {
|
||||
throw new Error('Selector mode requires --selector option');
|
||||
}
|
||||
const element = await page.$(options.selector);
|
||||
if (!element) {
|
||||
throw new Error(`Selector not found: ${options.selector}`);
|
||||
}
|
||||
html = await element.innerHTML();
|
||||
break;
|
||||
}
|
||||
|
||||
case 'full':
|
||||
default: {
|
||||
// Get body content, excluding common non-content elements
|
||||
html = await page.evaluate(() => {
|
||||
// Remove common non-content elements
|
||||
const selectorsToRemove = [
|
||||
'script', 'style', 'noscript', 'iframe',
|
||||
'nav', 'header', 'footer', '.cookie-banner',
|
||||
'.advertisement', '.ads', '#ads', '.social-share',
|
||||
'.comments', '#comments', '.sidebar'
|
||||
];
|
||||
|
||||
selectorsToRemove.forEach(selector => {
|
||||
document.querySelectorAll(selector).forEach(el => el.remove());
|
||||
});
|
||||
|
||||
return document.body.innerHTML;
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to markdown
|
||||
const turndown = createTurndownService({
|
||||
includeLinks: options.includeLinks ?? true,
|
||||
includeTables: options.includeTables ?? true,
|
||||
includeImages: options.includeImages ?? false,
|
||||
});
|
||||
|
||||
let markdown = turndown.turndown(html);
|
||||
|
||||
// Add title as H1 if not already present
|
||||
if (!markdown.startsWith('# ')) {
|
||||
markdown = `# ${title}\n\n${markdown}`;
|
||||
}
|
||||
|
||||
// Add metadata header
|
||||
const metadataLines = [
|
||||
`<!-- Scraped from: ${pageUrl} -->`,
|
||||
byline ? `<!-- Author: ${byline} -->` : null,
|
||||
excerpt ? `<!-- Excerpt: ${excerpt} -->` : null,
|
||||
`<!-- Scraped at: ${new Date().toISOString()} -->`,
|
||||
'',
|
||||
].filter(Boolean);
|
||||
|
||||
markdown = metadataLines.join('\n') + '\n' + markdown;
|
||||
|
||||
// Clean up excessive whitespace
|
||||
markdown = markdown
|
||||
.replace(/\n{4,}/g, '\n\n\n')
|
||||
.replace(/[ \t]+$/gm, '')
|
||||
.trim();
|
||||
|
||||
const result: ScrapeResult = {
|
||||
title,
|
||||
url: pageUrl,
|
||||
markdown,
|
||||
byline,
|
||||
excerpt,
|
||||
};
|
||||
|
||||
// Save to file if output specified
|
||||
if (options.output) {
|
||||
writeFileSync(options.output, markdown, 'utf-8');
|
||||
console.log(`Markdown saved to: ${options.output}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'mode', 'selector', 'output'],
|
||||
boolean: ['headless', 'links', 'tables', 'images', 'help'],
|
||||
default: {
|
||||
mode: 'main',
|
||||
headless: true,
|
||||
links: true,
|
||||
tables: true,
|
||||
images: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
m: 'mode',
|
||||
s: 'selector',
|
||||
o: 'output',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Scraper - Extract content to Markdown
|
||||
|
||||
Usage:
|
||||
npx tsx scrape.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to scrape (required)
|
||||
-m, --mode <mode> Scrape mode: main, full, or selector (default: main)
|
||||
-s, --selector <sel> CSS selector for selector mode
|
||||
-o, --output <path> Output file path for markdown
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time for dynamic content
|
||||
--links Include links in output (default: true)
|
||||
--tables Include tables in output (default: true)
|
||||
--images Include images in output (default: false)
|
||||
-h, --help Show this help message
|
||||
|
||||
Scrape Modes:
|
||||
main Extract main article content using Readability (best for articles)
|
||||
full Full page content with common elements removed
|
||||
selector Extract specific element by CSS selector
|
||||
|
||||
Examples:
|
||||
npx tsx scrape.ts --url "https://docs.example.com/guide" --mode main
|
||||
npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".api-docs"
|
||||
npx tsx scrape.ts --url "https://example.com" --mode main --no-links --output clean.md
|
||||
|
||||
Output Format:
|
||||
- GitHub Flavored Markdown (tables, strikethrough, task lists)
|
||||
- Proper heading hierarchy
|
||||
- Code blocks with language detection
|
||||
- Metadata comments at top (source URL, date)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const mode = args.mode as ScrapeMode;
|
||||
if (!['main', 'full', 'selector'].includes(mode)) {
|
||||
console.error(`Invalid mode: ${mode}. Must be main, full, or selector.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await scrape({
|
||||
url: args.url,
|
||||
mode,
|
||||
selector: args.selector,
|
||||
output: args.output,
|
||||
includeLinks: args.links,
|
||||
includeTables: args.tables,
|
||||
includeImages: args.images,
|
||||
headless: args.headless,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
});
|
||||
|
||||
// Print result summary
|
||||
console.log(`\nScrape complete:`);
|
||||
console.log(` Title: ${result.title}`);
|
||||
console.log(` URL: ${result.url}`);
|
||||
if (result.byline) console.log(` Author: ${result.byline}`);
|
||||
console.log(` Markdown length: ${result.markdown.length} chars`);
|
||||
|
||||
// Print markdown if not saved to file
|
||||
if (!args.output) {
|
||||
console.log('\n--- Markdown Output ---\n');
|
||||
console.log(result.markdown);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('scrape.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
39
skills/web-automation/codex/scripts/test-full.ts
Normal file
39
skills/web-automation/codex/scripts/test-full.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with full options...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
// humanize: 1.5, // Test without this first
|
||||
// geoip: true, // Test without this first
|
||||
// enable_cache: true,
|
||||
// block_webrtc: false,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://github.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await page.screenshot({ path: '/tmp/github-test.png' });
|
||||
console.log('Screenshot saved');
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
22
skills/web-automation/codex/scripts/test-minimal.ts
Normal file
22
skills/web-automation/codex/scripts/test-minimal.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
|
||||
async function test() {
|
||||
console.log('Launching Camoufox with minimal config...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
32
skills/web-automation/codex/scripts/test-profile.ts
Normal file
32
skills/web-automation/codex/scripts/test-profile.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with user_data_dir...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
@@ -0,0 +1,78 @@
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
type Extracted = {
|
||||
title: string;
|
||||
url: string;
|
||||
colorVars: Array<[string, string]>;
|
||||
samples: Record<string, null | { background: string; color: string; border: string }>;
|
||||
};
|
||||
|
||||
function isColorValue(value: string) {
|
||||
return /#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})\b/i.test(value) || /\brgb\(|\bhsl\(/i.test(value);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const url = process.argv[2] ?? 'https://www.firsthorizon.com';
|
||||
|
||||
const { page, browser } = await getPage({ headless: true });
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
await page.waitForTimeout(5000);
|
||||
|
||||
const data = await page.evaluate(`(() => {
|
||||
const rootStyles = getComputedStyle(document.documentElement);
|
||||
const vars = {};
|
||||
for (let i = 0; i < rootStyles.length; i++) {
|
||||
const prop = rootStyles[i];
|
||||
if (prop && prop.startsWith('--')) {
|
||||
vars[prop] = rootStyles.getPropertyValue(prop).trim();
|
||||
}
|
||||
}
|
||||
|
||||
const pick = (selector) => {
|
||||
const el = document.querySelector(selector);
|
||||
if (!el) return null;
|
||||
const cs = getComputedStyle(el);
|
||||
return {
|
||||
background: cs.backgroundColor,
|
||||
color: cs.color,
|
||||
border: cs.borderColor,
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
title: document.title,
|
||||
url: location.href,
|
||||
vars,
|
||||
samples: {
|
||||
body: pick('body'),
|
||||
header: pick('header'),
|
||||
nav: pick('nav'),
|
||||
primaryButton: pick('button, [role="button"], a[role="button"], a.button, .button'),
|
||||
link: pick('a'),
|
||||
},
|
||||
};
|
||||
})()`);
|
||||
|
||||
const entries = Object.entries(data.vars) as Array<[string, string]>;
|
||||
const colorVars = entries
|
||||
.filter(([, v]) => v && isColorValue(v))
|
||||
.sort((a, b) => a[0].localeCompare(b[0]));
|
||||
|
||||
const out: Extracted = {
|
||||
title: data.title,
|
||||
url: data.url,
|
||||
colorVars,
|
||||
samples: data.samples,
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(out, null, 2));
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
16
skills/web-automation/codex/scripts/tsconfig.json
Normal file
16
skills/web-automation/codex/scripts/tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["*.ts"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
8
skills/web-automation/codex/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
8
skills/web-automation/codex/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
declare module 'turndown-plugin-gfm' {
|
||||
import TurndownService from 'turndown';
|
||||
|
||||
export function gfm(turndownService: TurndownService): void;
|
||||
export function strikethrough(turndownService: TurndownService): void;
|
||||
export function tables(turndownService: TurndownService): void;
|
||||
export function taskListItems(turndownService: TurndownService): void;
|
||||
}
|
||||
37
skills/web-automation/opencode/SKILL.md
Normal file
37
skills/web-automation/opencode/SKILL.md
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
name: web-automation
|
||||
description: Browse and scrape web pages using Playwright with Camoufox anti-detection browser. Use when automating web workflows, extracting page content to markdown, handling authenticated sessions, or scraping websites with bot protection.
|
||||
---
|
||||
|
||||
# Web Automation with Camoufox (OpenCode)
|
||||
|
||||
Automated web browsing and scraping using Playwright with Camoufox anti-detection browser.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Node.js 20+
|
||||
- pnpm
|
||||
- Network access to download browser binaries
|
||||
|
||||
## First-Time Setup
|
||||
|
||||
```bash
|
||||
# Use your OpenCode skills path
|
||||
cd ~/.opencode/skills/web-automation/scripts
|
||||
pnpm install
|
||||
npx camoufox-js fetch
|
||||
```
|
||||
|
||||
If native dependency build errors appear (e.g., better-sqlite3), rebuild from the reported package directory.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
- Browse page: `npx tsx browse.ts --url "https://example.com"`
|
||||
- Scrape markdown: `npx tsx scrape.ts --url "https://example.com" --mode main --output page.md`
|
||||
- Authenticate: `npx tsx auth.ts --url "https://example.com/login"`
|
||||
|
||||
## Notes
|
||||
|
||||
- Sessions persist in Camoufox profile storage.
|
||||
- Use `--wait` for dynamic pages.
|
||||
- Use `--mode selector --selector "..."` for targeted extraction.
|
||||
575
skills/web-automation/opencode/scripts/auth.ts
Normal file
575
skills/web-automation/opencode/scripts/auth.ts
Normal file
@@ -0,0 +1,575 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Authentication handler for web automation
|
||||
* Supports generic form login and Microsoft SSO (MSAL)
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx auth.ts --url "https://example.com/login" --type form
|
||||
* npx tsx auth.ts --url "https://example.com" --type msal
|
||||
* npx tsx auth.ts --url "https://example.com" --type auto
|
||||
*/
|
||||
|
||||
import { getPage, launchBrowser } from './browse.js';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
import { createInterface } from 'readline';
|
||||
|
||||
// Types
|
||||
type AuthType = 'auto' | 'form' | 'msal';
|
||||
|
||||
interface AuthOptions {
|
||||
url: string;
|
||||
authType: AuthType;
|
||||
credentials?: {
|
||||
username: string;
|
||||
password: string;
|
||||
};
|
||||
headless?: boolean;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
interface AuthResult {
|
||||
success: boolean;
|
||||
finalUrl: string;
|
||||
authType: AuthType;
|
||||
message: string;
|
||||
}
|
||||
|
||||
// Get credentials from environment or options
|
||||
function getCredentials(options?: {
|
||||
username?: string;
|
||||
password?: string;
|
||||
}): { username: string; password: string } | null {
|
||||
const username = options?.username || process.env.CAMOUFOX_USERNAME;
|
||||
const password = options?.password || process.env.CAMOUFOX_PASSWORD;
|
||||
|
||||
if (!username || !password) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { username, password };
|
||||
}
|
||||
|
||||
// Prompt user for input (for MFA or credentials)
|
||||
async function promptUser(question: string, hidden = false): Promise<string> {
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
return new Promise((resolve) => {
|
||||
if (hidden) {
|
||||
process.stdout.write(question);
|
||||
// Note: This is a simple implementation. For production, use a proper hidden input library
|
||||
}
|
||||
rl.question(question, (answer) => {
|
||||
rl.close();
|
||||
resolve(answer);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Detect authentication type from page
|
||||
async function detectAuthType(page: Page): Promise<AuthType> {
|
||||
const url = page.url();
|
||||
|
||||
// Check for Microsoft login
|
||||
if (
|
||||
url.includes('login.microsoftonline.com') ||
|
||||
url.includes('login.live.com') ||
|
||||
url.includes('login.windows.net')
|
||||
) {
|
||||
return 'msal';
|
||||
}
|
||||
|
||||
// Check for common form login patterns
|
||||
const hasLoginForm = await page.evaluate(() => {
|
||||
const passwordField = document.querySelector(
|
||||
'input[type="password"], input[name*="password"], input[id*="password"]'
|
||||
);
|
||||
const usernameField = document.querySelector(
|
||||
'input[type="email"], input[type="text"][name*="user"], input[type="text"][name*="email"], input[id*="user"], input[id*="email"]'
|
||||
);
|
||||
return !!(passwordField && usernameField);
|
||||
});
|
||||
|
||||
if (hasLoginForm) {
|
||||
return 'form';
|
||||
}
|
||||
|
||||
return 'auto';
|
||||
}
|
||||
|
||||
// Handle generic form login
|
||||
async function handleFormLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting form login...');
|
||||
|
||||
// Find and fill username/email field
|
||||
const usernameSelectors = [
|
||||
'input[type="email"]',
|
||||
'input[name*="user" i]',
|
||||
'input[name*="email" i]',
|
||||
'input[id*="user" i]',
|
||||
'input[id*="email" i]',
|
||||
'input[autocomplete="username"]',
|
||||
'input[type="text"]:first-of-type',
|
||||
];
|
||||
|
||||
let usernameField = null;
|
||||
for (const selector of usernameSelectors) {
|
||||
usernameField = await page.$(selector);
|
||||
if (usernameField) break;
|
||||
}
|
||||
|
||||
if (!usernameField) {
|
||||
console.error('Could not find username/email field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await usernameField.fill(credentials.username);
|
||||
console.log('Filled username field');
|
||||
|
||||
// Find and fill password field
|
||||
const passwordSelectors = [
|
||||
'input[type="password"]',
|
||||
'input[name*="password" i]',
|
||||
'input[id*="password" i]',
|
||||
'input[autocomplete="current-password"]',
|
||||
];
|
||||
|
||||
let passwordField = null;
|
||||
for (const selector of passwordSelectors) {
|
||||
passwordField = await page.$(selector);
|
||||
if (passwordField) break;
|
||||
}
|
||||
|
||||
if (!passwordField) {
|
||||
console.error('Could not find password field');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordField.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Check for "Remember me" checkbox and check it
|
||||
const rememberCheckbox = await page.$(
|
||||
'input[type="checkbox"][name*="remember" i], input[type="checkbox"][id*="remember" i]'
|
||||
);
|
||||
if (rememberCheckbox) {
|
||||
await rememberCheckbox.check();
|
||||
console.log('Checked "Remember me" checkbox');
|
||||
}
|
||||
|
||||
// Find and click submit button
|
||||
const submitSelectors = [
|
||||
'button[type="submit"]',
|
||||
'input[type="submit"]',
|
||||
'button:has-text("Sign in")',
|
||||
'button:has-text("Log in")',
|
||||
'button:has-text("Login")',
|
||||
'button:has-text("Submit")',
|
||||
'[role="button"]:has-text("Sign in")',
|
||||
];
|
||||
|
||||
let submitButton = null;
|
||||
for (const selector of submitSelectors) {
|
||||
submitButton = await page.$(selector);
|
||||
if (submitButton) break;
|
||||
}
|
||||
|
||||
if (!submitButton) {
|
||||
// Try pressing Enter as fallback
|
||||
await passwordField.press('Enter');
|
||||
} else {
|
||||
await submitButton.click();
|
||||
}
|
||||
|
||||
console.log('Submitted login form');
|
||||
|
||||
// Wait for navigation or error
|
||||
try {
|
||||
await page.waitForNavigation({ timeout, waitUntil: 'domcontentloaded' });
|
||||
return true;
|
||||
} catch {
|
||||
// Check if we're still on login page with error
|
||||
const errorMessages = await page.$$eval(
|
||||
'.error, .alert-danger, [role="alert"], .login-error',
|
||||
(els) => els.map((el) => el.textContent?.trim()).filter(Boolean)
|
||||
);
|
||||
|
||||
if (errorMessages.length > 0) {
|
||||
console.error('Login error:', errorMessages.join(', '));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true; // Might have succeeded without navigation
|
||||
}
|
||||
}
|
||||
|
||||
// Handle Microsoft SSO login
|
||||
async function handleMsalLogin(
|
||||
page: Page,
|
||||
credentials: { username: string; password: string },
|
||||
timeout: number
|
||||
): Promise<boolean> {
|
||||
console.log('Attempting Microsoft SSO login...');
|
||||
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If not already on Microsoft login, wait for redirect
|
||||
if (!currentUrl.includes('login.microsoftonline.com')) {
|
||||
try {
|
||||
await page.waitForURL('**/login.microsoftonline.com/**', { timeout: 10000 });
|
||||
} catch {
|
||||
console.log('Not redirected to Microsoft login');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for email input
|
||||
const emailInput = await page.waitForSelector(
|
||||
'input[type="email"], input[name="loginfmt"]',
|
||||
{ timeout }
|
||||
);
|
||||
|
||||
if (!emailInput) {
|
||||
console.error('Could not find email input on Microsoft login');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill email and submit
|
||||
await emailInput.fill(credentials.username);
|
||||
console.log('Filled email field');
|
||||
|
||||
const nextButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (nextButton) {
|
||||
await nextButton.click();
|
||||
} else {
|
||||
await emailInput.press('Enter');
|
||||
}
|
||||
|
||||
// Wait for password page
|
||||
try {
|
||||
await page.waitForSelector(
|
||||
'input[type="password"], input[name="passwd"]',
|
||||
{ timeout }
|
||||
);
|
||||
} catch {
|
||||
// Might be using passwordless auth or different flow
|
||||
console.log('Password field not found - might be using different auth flow');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill password
|
||||
const passwordInput = await page.$('input[type="password"], input[name="passwd"]');
|
||||
if (!passwordInput) {
|
||||
console.error('Could not find password input');
|
||||
return false;
|
||||
}
|
||||
|
||||
await passwordInput.fill(credentials.password);
|
||||
console.log('Filled password field');
|
||||
|
||||
// Submit
|
||||
const signInButton = await page.$('input[type="submit"], button[type="submit"]');
|
||||
if (signInButton) {
|
||||
await signInButton.click();
|
||||
} else {
|
||||
await passwordInput.press('Enter');
|
||||
}
|
||||
|
||||
// Handle "Stay signed in?" prompt
|
||||
try {
|
||||
const staySignedInButton = await page.waitForSelector(
|
||||
'input[value="Yes"], button:has-text("Yes")',
|
||||
{ timeout: 5000 }
|
||||
);
|
||||
if (staySignedInButton) {
|
||||
await staySignedInButton.click();
|
||||
console.log('Clicked "Stay signed in" button');
|
||||
}
|
||||
} catch {
|
||||
// Prompt might not appear
|
||||
}
|
||||
|
||||
// Check for Conditional Access Policy error
|
||||
const caError = await page.$('text=Conditional Access policy');
|
||||
if (caError) {
|
||||
console.error('Blocked by Conditional Access Policy');
|
||||
// Take screenshot for debugging
|
||||
await page.screenshot({ path: 'ca-policy-error.png' });
|
||||
console.log('Screenshot saved: ca-policy-error.png');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wait for redirect away from Microsoft login
|
||||
try {
|
||||
await page.waitForURL(
|
||||
(url) => !url.href.includes('login.microsoftonline.com'),
|
||||
{ timeout }
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if user is already authenticated
|
||||
async function isAuthenticated(page: Page, targetUrl: string): Promise<boolean> {
|
||||
const currentUrl = page.url();
|
||||
|
||||
// If we're on the target URL (not a login page), we're likely authenticated
|
||||
if (currentUrl.startsWith(targetUrl)) {
|
||||
// Check for common login page indicators
|
||||
const isLoginPage = await page.evaluate(() => {
|
||||
const loginIndicators = [
|
||||
'input[type="password"]',
|
||||
'form[action*="login"]',
|
||||
'form[action*="signin"]',
|
||||
'.login-form',
|
||||
'#login',
|
||||
];
|
||||
return loginIndicators.some((sel) => document.querySelector(sel) !== null);
|
||||
});
|
||||
|
||||
return !isLoginPage;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Main authentication function
|
||||
export async function authenticate(options: AuthOptions): Promise<AuthResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless ?? true });
|
||||
const page = await browser.newPage();
|
||||
const timeout = options.timeout ?? 30000;
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if already authenticated
|
||||
if (await isAuthenticated(page, options.url)) {
|
||||
return {
|
||||
success: true,
|
||||
finalUrl: page.url(),
|
||||
authType: 'auto',
|
||||
message: 'Already authenticated (session persisted from profile)',
|
||||
};
|
||||
}
|
||||
|
||||
// Get credentials
|
||||
const credentials = options.credentials
|
||||
? options.credentials
|
||||
: getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
// No credentials - open interactive browser
|
||||
console.log('\nNo credentials provided. Opening browser for manual login...');
|
||||
console.log('Please complete the login process manually.');
|
||||
console.log('The session will be saved to your profile.');
|
||||
|
||||
// Switch to headed mode for manual login
|
||||
await browser.close();
|
||||
const interactiveBrowser = await launchBrowser({ headless: false });
|
||||
const interactivePage = await interactiveBrowser.newPage();
|
||||
await interactivePage.goto(options.url);
|
||||
|
||||
await promptUser('\nPress Enter when you have completed login...');
|
||||
|
||||
const finalUrl = interactivePage.url();
|
||||
await interactiveBrowser.close();
|
||||
|
||||
return {
|
||||
success: true,
|
||||
finalUrl,
|
||||
authType: 'auto',
|
||||
message: 'Manual login completed - session saved to profile',
|
||||
};
|
||||
}
|
||||
|
||||
// Detect auth type if auto
|
||||
let authType = options.authType;
|
||||
if (authType === 'auto') {
|
||||
authType = await detectAuthType(page);
|
||||
console.log(`Detected auth type: ${authType}`);
|
||||
}
|
||||
|
||||
// Handle authentication based on type
|
||||
let success = false;
|
||||
switch (authType) {
|
||||
case 'msal':
|
||||
success = await handleMsalLogin(page, credentials, timeout);
|
||||
break;
|
||||
case 'form':
|
||||
default:
|
||||
success = await handleFormLogin(page, credentials, timeout);
|
||||
break;
|
||||
}
|
||||
|
||||
const finalUrl = page.url();
|
||||
|
||||
return {
|
||||
success,
|
||||
finalUrl,
|
||||
authType,
|
||||
message: success
|
||||
? `Authentication successful - session saved to profile`
|
||||
: 'Authentication failed',
|
||||
};
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Navigate to authenticated page (handles auth if needed)
|
||||
export async function navigateAuthenticated(
|
||||
url: string,
|
||||
options?: {
|
||||
credentials?: { username: string; password: string };
|
||||
headless?: boolean;
|
||||
}
|
||||
): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const { page, browser } = await getPage({ headless: options?.headless ?? true });
|
||||
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
|
||||
// Check if we need to authenticate
|
||||
if (!(await isAuthenticated(page, url))) {
|
||||
console.log('Session expired or not authenticated. Attempting login...');
|
||||
|
||||
// Get credentials
|
||||
const credentials = options?.credentials ?? getCredentials();
|
||||
|
||||
if (!credentials) {
|
||||
throw new Error(
|
||||
'Authentication required but no credentials provided. ' +
|
||||
'Set CAMOUFOX_USERNAME and CAMOUFOX_PASSWORD environment variables.'
|
||||
);
|
||||
}
|
||||
|
||||
// Detect and handle auth
|
||||
const authType = await detectAuthType(page);
|
||||
|
||||
let success = false;
|
||||
if (authType === 'msal') {
|
||||
success = await handleMsalLogin(page, credentials, 30000);
|
||||
} else {
|
||||
success = await handleFormLogin(page, credentials, 30000);
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
await browser.close();
|
||||
throw new Error('Authentication failed');
|
||||
}
|
||||
|
||||
// Navigate back to original URL if we were redirected
|
||||
if (!page.url().startsWith(url)) {
|
||||
await page.goto(url, { timeout: 60000, waitUntil: 'domcontentloaded' });
|
||||
}
|
||||
}
|
||||
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'type', 'username', 'password'],
|
||||
boolean: ['headless', 'help'],
|
||||
default: {
|
||||
type: 'auto',
|
||||
headless: false, // Default to headed for auth so user can see/interact
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
t: 'type',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Authentication Handler
|
||||
|
||||
Usage:
|
||||
npx tsx auth.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to authenticate (required)
|
||||
-t, --type <type> Auth type: auto, form, or msal (default: auto)
|
||||
--username <user> Username/email (or set CAMOUFOX_USERNAME env var)
|
||||
--password <pass> Password (or set CAMOUFOX_PASSWORD env var)
|
||||
--headless <bool> Run in headless mode (default: false for auth)
|
||||
-h, --help Show this help message
|
||||
|
||||
Auth Types:
|
||||
auto Auto-detect authentication type
|
||||
form Generic username/password form
|
||||
msal Microsoft SSO (login.microsoftonline.com)
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_USERNAME Default username/email for authentication
|
||||
CAMOUFOX_PASSWORD Default password for authentication
|
||||
|
||||
Examples:
|
||||
# Interactive login (no credentials, opens browser)
|
||||
npx tsx auth.ts --url "https://example.com/login"
|
||||
|
||||
# Form login with credentials
|
||||
npx tsx auth.ts --url "https://example.com/login" --type form \\
|
||||
--username "user@example.com" --password "secret"
|
||||
|
||||
# Microsoft SSO login
|
||||
CAMOUFOX_USERNAME=user@company.com CAMOUFOX_PASSWORD=secret \\
|
||||
npx tsx auth.ts --url "https://internal.company.com" --type msal
|
||||
|
||||
Notes:
|
||||
- Session is saved to ~/.camoufox-profile/ for persistence
|
||||
- After successful auth, subsequent browses will be authenticated
|
||||
- Use --headless false if you need to handle MFA manually
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const authType = args.type as AuthType;
|
||||
if (!['auto', 'form', 'msal'].includes(authType)) {
|
||||
console.error(`Invalid auth type: ${authType}. Must be auto, form, or msal.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await authenticate({
|
||||
url: args.url,
|
||||
authType,
|
||||
credentials:
|
||||
args.username && args.password
|
||||
? { username: args.username, password: args.password }
|
||||
: undefined,
|
||||
headless: args.headless,
|
||||
});
|
||||
|
||||
console.log(`\nAuthentication result:`);
|
||||
console.log(` Success: ${result.success}`);
|
||||
console.log(` Auth type: ${result.authType}`);
|
||||
console.log(` Final URL: ${result.finalUrl}`);
|
||||
console.log(` Message: ${result.message}`);
|
||||
|
||||
process.exit(result.success ? 0 : 1);
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('auth.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
195
skills/web-automation/opencode/scripts/browse.ts
Normal file
195
skills/web-automation/opencode/scripts/browse.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Browser launcher using Camoufox with persistent profile
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx browse.ts --url "https://example.com"
|
||||
* npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
* npx tsx browse.ts --url "https://example.com" --headless false --wait 5000
|
||||
*/
|
||||
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { existsSync, mkdirSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import type { Page, BrowserContext } from 'playwright-core';
|
||||
|
||||
// Types
|
||||
interface BrowseOptions {
|
||||
url: string;
|
||||
headless?: boolean;
|
||||
screenshot?: boolean;
|
||||
output?: string;
|
||||
wait?: number;
|
||||
timeout?: number;
|
||||
interactive?: boolean;
|
||||
}
|
||||
|
||||
interface BrowseResult {
|
||||
title: string;
|
||||
url: string;
|
||||
screenshotPath?: string;
|
||||
}
|
||||
|
||||
// Get profile directory
|
||||
const getProfilePath = (): string => {
|
||||
const customPath = process.env.CAMOUFOX_PROFILE_PATH;
|
||||
if (customPath) return customPath;
|
||||
|
||||
const profileDir = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profileDir)) {
|
||||
mkdirSync(profileDir, { recursive: true });
|
||||
}
|
||||
return profileDir;
|
||||
};
|
||||
|
||||
// Launch browser with persistent profile
|
||||
export async function launchBrowser(options: {
|
||||
headless?: boolean;
|
||||
}): Promise<BrowserContext> {
|
||||
const profilePath = getProfilePath();
|
||||
const headless =
|
||||
options.headless ??
|
||||
(process.env.CAMOUFOX_HEADLESS ? process.env.CAMOUFOX_HEADLESS === 'true' : true);
|
||||
|
||||
console.log(`Using profile: ${profilePath}`);
|
||||
console.log(`Headless mode: ${headless}`);
|
||||
|
||||
const browser = await Camoufox({
|
||||
user_data_dir: profilePath,
|
||||
headless,
|
||||
});
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
// Browse to URL and optionally take screenshot
|
||||
export async function browse(options: BrowseOptions): Promise<BrowseResult> {
|
||||
const browser = await launchBrowser({ headless: options.headless });
|
||||
const page = await browser.newPage();
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: options.timeout ?? 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const result: BrowseResult = {
|
||||
title: await page.title(),
|
||||
url: page.url(),
|
||||
};
|
||||
|
||||
console.log(`Page title: ${result.title}`);
|
||||
console.log(`Final URL: ${result.url}`);
|
||||
|
||||
// Take screenshot if requested
|
||||
if (options.screenshot) {
|
||||
const outputPath = options.output ?? 'screenshot.png';
|
||||
await page.screenshot({ path: outputPath, fullPage: true });
|
||||
result.screenshotPath = outputPath;
|
||||
console.log(`Screenshot saved: ${outputPath}`);
|
||||
}
|
||||
|
||||
// If interactive mode, keep browser open
|
||||
if (options.interactive) {
|
||||
console.log('\nInteractive mode - browser will stay open.');
|
||||
console.log('Press Ctrl+C to close.');
|
||||
await new Promise(() => {}); // Keep running
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
if (!options.interactive) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export page for use in other scripts
|
||||
export async function getPage(options?: {
|
||||
headless?: boolean;
|
||||
}): Promise<{ page: Page; browser: BrowserContext }> {
|
||||
const browser = await launchBrowser({ headless: options?.headless });
|
||||
const page = await browser.newPage();
|
||||
return { page, browser };
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'output'],
|
||||
boolean: ['screenshot', 'headless', 'interactive', 'help'],
|
||||
default: {
|
||||
headless: true,
|
||||
screenshot: false,
|
||||
interactive: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
o: 'output',
|
||||
s: 'screenshot',
|
||||
h: 'help',
|
||||
i: 'interactive',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Browser with Camoufox
|
||||
|
||||
Usage:
|
||||
npx tsx browse.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to navigate to (required)
|
||||
-s, --screenshot Take a screenshot of the page
|
||||
-o, --output <path> Output path for screenshot (default: screenshot.png)
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time after page load in milliseconds
|
||||
--timeout <ms> Navigation timeout (default: 60000)
|
||||
-i, --interactive Keep browser open for manual interaction
|
||||
-h, --help Show this help message
|
||||
|
||||
Examples:
|
||||
npx tsx browse.ts --url "https://example.com"
|
||||
npx tsx browse.ts --url "https://example.com" --screenshot --output page.png
|
||||
npx tsx browse.ts --url "https://example.com" --headless false --interactive
|
||||
|
||||
Environment Variables:
|
||||
CAMOUFOX_PROFILE_PATH Custom profile directory (default: ~/.camoufox-profile/)
|
||||
CAMOUFOX_HEADLESS Default headless mode (true/false)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
try {
|
||||
await browse({
|
||||
url: args.url,
|
||||
headless: args.headless,
|
||||
screenshot: args.screenshot,
|
||||
output: args.output,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
timeout: args.timeout ? parseInt(args.timeout, 10) : undefined,
|
||||
interactive: args.interactive,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('browse.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
29
skills/web-automation/opencode/scripts/package.json
Normal file
29
skills/web-automation/opencode/scripts/package.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "web-automation-scripts",
|
||||
"version": "1.0.0",
|
||||
"description": "Web browsing and scraping scripts using Camoufox",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"browse": "tsx browse.ts",
|
||||
"scrape": "tsx scrape.ts",
|
||||
"fetch-browser": "npx camoufox-js fetch"
|
||||
},
|
||||
"dependencies": {
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"camoufox-js": "^0.8.5",
|
||||
"jsdom": "^24.0.0",
|
||||
"minimist": "^1.2.8",
|
||||
"playwright-core": "^1.40.0",
|
||||
"turndown": "^7.1.2",
|
||||
"turndown-plugin-gfm": "^1.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jsdom": "^21.1.6",
|
||||
"@types/minimist": "^1.2.5",
|
||||
"@types/turndown": "^5.0.4",
|
||||
"esbuild": "0.27.0",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.0"
|
||||
},
|
||||
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
|
||||
}
|
||||
1613
skills/web-automation/opencode/scripts/pnpm-lock.yaml
generated
Normal file
1613
skills/web-automation/opencode/scripts/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
212
skills/web-automation/opencode/scripts/scan-local-app.ts
Normal file
212
skills/web-automation/opencode/scripts/scan-local-app.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
import { writeFileSync } from 'fs';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
const baseUrl = 'http://localhost:3000';
|
||||
const username = 'analyst@fhb.local';
|
||||
const password = process.env.CAMOUFOX_PASSWORD ?? '';
|
||||
|
||||
const reportPath = '/Users/stefano.fiorini/Documents/projects/fhb-loan-spreading-pilot-a/docs/plans/2026-01-24-financials-analysis-redesign/web-automation-scan.md';
|
||||
|
||||
type NavResult = {
|
||||
requestedUrl: string;
|
||||
url: string;
|
||||
status: number | null;
|
||||
title: string;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
async function gotoWithStatus(page: any, url: string): Promise<NavResult> {
|
||||
const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }).catch((e: unknown) => ({ error: e }));
|
||||
if (resp?.error) {
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: null,
|
||||
title: await page.title().catch(() => ''),
|
||||
error: String(resp.error),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
requestedUrl: url,
|
||||
url: page.url(),
|
||||
status: resp ? resp.status() : null,
|
||||
title: await page.title().catch(() => ''),
|
||||
};
|
||||
}
|
||||
|
||||
async function textOrNull(page: any, selector: string): Promise<string | null> {
|
||||
const loc = page.locator(selector).first();
|
||||
try {
|
||||
if ((await loc.count()) === 0) return null;
|
||||
const txt = await loc.textContent();
|
||||
return txt ? txt.trim().replace(/\s+/g, ' ') : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { page, browser } = await getPage({ headless: true });
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push('# Web Automation Scan (local)');
|
||||
lines.push('');
|
||||
lines.push(`- Base URL: ${baseUrl}`);
|
||||
lines.push(`- Timestamp: ${new Date().toISOString()}`);
|
||||
lines.push('');
|
||||
|
||||
try {
|
||||
lines.push('## Login');
|
||||
await gotoWithStatus(page, `${baseUrl}/login`);
|
||||
await page.locator('input[name="email"]').fill(username);
|
||||
await page.locator('input[name="password"]').fill(password);
|
||||
await page.locator('button[type="submit"]').click();
|
||||
await page.waitForTimeout(2500);
|
||||
|
||||
const cookies = await page.context().cookies();
|
||||
const sessionCookie = cookies.find((c: any) => c.name === 'fhb_session');
|
||||
lines.push(`- After submit URL: ${page.url()}`);
|
||||
lines.push(`- Has session cookie (fhb_session): ${Boolean(sessionCookie)}`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('## Demo Case');
|
||||
const casesNav = await gotoWithStatus(page, `${baseUrl}/cases`);
|
||||
lines.push(`- GET /cases → status ${casesNav.status ?? 'ERR'}, final ${casesNav.url}`);
|
||||
|
||||
const envCaseId = process.env.SCAN_CASE_ID?.trim() || null;
|
||||
let selectedCaseId: string | null = envCaseId;
|
||||
|
||||
if (!selectedCaseId) {
|
||||
const caseLinks = await page.$$eval('a[href^="/cases/"]', (as) =>
|
||||
as
|
||||
.map((a) => ({
|
||||
href: (a as HTMLAnchorElement).getAttribute('href') || '',
|
||||
text: (a.textContent || '').trim(),
|
||||
}))
|
||||
.filter((x) => x.href.includes('/cases/'))
|
||||
);
|
||||
|
||||
const preferredTitles = ['Demo - Strong Borrower', 'Demo - Weak Borrower', 'Demo - Incomplete'];
|
||||
for (const title of preferredTitles) {
|
||||
const match = caseLinks.find((l) => l.text.includes(title) && l.href.includes('/cases/'));
|
||||
const href = match?.href ?? '';
|
||||
const m = href.match(/\/cases\/([0-9a-f-]{36})/i);
|
||||
if (m) {
|
||||
selectedCaseId = m[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!selectedCaseId) {
|
||||
const firstHref =
|
||||
caseLinks.map((l) => l.href).find((h) => /\/cases\/[0-9a-f-]{36}/i.test(h)) ?? null;
|
||||
const m = firstHref?.match(/\/cases\/([0-9a-f-]{36})/i) ?? null;
|
||||
selectedCaseId = m?.[1] ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
lines.push(`- Selected caseId: ${selectedCaseId ?? '(none found)'}`);
|
||||
|
||||
if (!selectedCaseId) {
|
||||
lines.push('');
|
||||
lines.push('⚠️ Could not find a demo case link on /cases.');
|
||||
writeFileSync(reportPath, lines.join('\n') + '\n', 'utf-8');
|
||||
return;
|
||||
}
|
||||
|
||||
const caseBase = `${baseUrl}/cases/${selectedCaseId}/journey`;
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Route Checks');
|
||||
|
||||
const routesToCheck = [
|
||||
`${caseBase}`,
|
||||
`${caseBase}/financials`,
|
||||
`${caseBase}/financials/income`,
|
||||
`${caseBase}/analysis`,
|
||||
`${caseBase}/analysis/configure`,
|
||||
`${caseBase}/analysis/ai`,
|
||||
`${caseBase}/analysis/ai/detail`,
|
||||
`${caseBase}/spreads`,
|
||||
];
|
||||
|
||||
for (const url of routesToCheck) {
|
||||
const r = await gotoWithStatus(page, url);
|
||||
const h1 = await textOrNull(page, 'h1');
|
||||
const finalPath = r.url.startsWith(baseUrl) ? r.url.slice(baseUrl.length) : r.url;
|
||||
lines.push(`- ${url.slice(baseUrl.length)} → status ${r.status ?? 'ERR'} (final ${finalPath})${h1 ? `, h1="${h1}"` : ''}`);
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Spreadsheet Analysis (UI)');
|
||||
await gotoWithStatus(page, `${caseBase}/analysis/configure`);
|
||||
|
||||
const runButton = page.locator('button:has-text("Run Analysis")').first();
|
||||
const disabled = await runButton.isDisabled().catch(() => true);
|
||||
lines.push(`- Run button disabled: ${disabled}`);
|
||||
|
||||
if (!disabled) {
|
||||
await runButton.click();
|
||||
|
||||
const resultsWait = page
|
||||
.waitForURL('**/journey/analysis/results**', { timeout: 180000 })
|
||||
.then(() => 'results' as const);
|
||||
const errorWait = page
|
||||
.locator('[role="alert"]')
|
||||
.filter({ hasText: 'Error' })
|
||||
.first()
|
||||
.waitFor({ timeout: 180000 })
|
||||
.then(() => 'error' as const);
|
||||
|
||||
const outcome = await Promise.race([resultsWait, errorWait]).catch(() => 'timeout' as const);
|
||||
|
||||
if (outcome === 'results') {
|
||||
await page.waitForTimeout(1500);
|
||||
lines.push(`- Results URL: ${page.url().replace(baseUrl, '')}`);
|
||||
|
||||
const downloadHref = await page
|
||||
.locator('a[href*="/journey/analysis/download"]')
|
||||
.first()
|
||||
.getAttribute('href')
|
||||
.catch(() => null);
|
||||
|
||||
if (downloadHref) {
|
||||
const dlUrl = downloadHref.startsWith('http') ? downloadHref : `${baseUrl}${downloadHref}`;
|
||||
const dlResp = await page.goto(dlUrl, { waitUntil: 'commit', timeout: 60000 }).catch(() => null);
|
||||
lines.push(
|
||||
`- Download route status: ${dlResp?.status() ?? 'ERR'} (Content-Type: ${dlResp?.headers()?.['content-type'] ?? 'n/a'})`
|
||||
);
|
||||
} else {
|
||||
lines.push('- Download link not found on results page');
|
||||
}
|
||||
} else if (outcome === 'error') {
|
||||
const errorText = await page
|
||||
.locator('[role="alert"]')
|
||||
.first()
|
||||
.textContent()
|
||||
.then((t: string | null) => (t ? t.trim().replace(/\\s+/g, ' ') : null))
|
||||
.catch(() => null);
|
||||
lines.push(`- Stayed on configure page; saw error callout: ${errorText ?? '(unable to read)'}`);
|
||||
lines.push('- Skipping download check because analysis did not complete.');
|
||||
} else {
|
||||
lines.push('- Timed out waiting for results or error after clicking Run Analysis.');
|
||||
}
|
||||
} else {
|
||||
lines.push('- Skipped running analysis because Run button was disabled.');
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('## Notes');
|
||||
lines.push('- This scan avoids scraping financial values; it records route availability and basic headings.');
|
||||
|
||||
writeFileSync(reportPath, lines.join('\n') + '\n', 'utf-8');
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exitCode = 1;
|
||||
});
|
||||
351
skills/web-automation/opencode/scripts/scrape.ts
Normal file
351
skills/web-automation/opencode/scripts/scrape.ts
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env npx tsx
|
||||
|
||||
/**
|
||||
* Web scraper that extracts content to markdown
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode main
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
* npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".content"
|
||||
*/
|
||||
|
||||
import TurndownService from 'turndown';
|
||||
import * as turndownPluginGfm from 'turndown-plugin-gfm';
|
||||
import { Readability } from '@mozilla/readability';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import { writeFileSync } from 'fs';
|
||||
import parseArgs from 'minimist';
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
// Types
|
||||
type ScrapeMode = 'main' | 'full' | 'selector';
|
||||
|
||||
interface ScrapeOptions {
|
||||
url: string;
|
||||
mode: ScrapeMode;
|
||||
selector?: string;
|
||||
output?: string;
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
headless?: boolean;
|
||||
wait?: number;
|
||||
}
|
||||
|
||||
interface ScrapeResult {
|
||||
title: string;
|
||||
url: string;
|
||||
markdown: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
}
|
||||
|
||||
// Configure Turndown for markdown conversion
|
||||
function createTurndownService(options: {
|
||||
includeLinks?: boolean;
|
||||
includeTables?: boolean;
|
||||
includeImages?: boolean;
|
||||
}): TurndownService {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
bulletListMarker: '-',
|
||||
codeBlockStyle: 'fenced',
|
||||
fence: '```',
|
||||
emDelimiter: '*',
|
||||
strongDelimiter: '**',
|
||||
linkStyle: 'inlined',
|
||||
});
|
||||
|
||||
// Add GFM support (tables, strikethrough, task lists)
|
||||
turndown.use(turndownPluginGfm.gfm);
|
||||
|
||||
// Custom rule for code blocks with language detection
|
||||
turndown.addRule('codeBlockWithLanguage', {
|
||||
filter: (node) => {
|
||||
return (
|
||||
node.nodeName === 'PRE' &&
|
||||
node.firstChild?.nodeName === 'CODE'
|
||||
);
|
||||
},
|
||||
replacement: (_content, node) => {
|
||||
const codeNode = node.firstChild as HTMLElement;
|
||||
const className = codeNode.getAttribute('class') || '';
|
||||
const langMatch = className.match(/language-(\w+)/);
|
||||
const lang = langMatch ? langMatch[1] : '';
|
||||
const code = codeNode.textContent || '';
|
||||
return `\n\n\`\`\`${lang}\n${code}\n\`\`\`\n\n`;
|
||||
},
|
||||
});
|
||||
|
||||
// Remove images if not included
|
||||
if (!options.includeImages) {
|
||||
turndown.addRule('removeImages', {
|
||||
filter: 'img',
|
||||
replacement: () => '',
|
||||
});
|
||||
}
|
||||
|
||||
// Remove links but keep text if not included
|
||||
if (!options.includeLinks) {
|
||||
turndown.addRule('removeLinks', {
|
||||
filter: 'a',
|
||||
replacement: (content) => content,
|
||||
});
|
||||
}
|
||||
|
||||
// Remove script, style, nav, footer, aside elements
|
||||
turndown.remove(['script', 'style', 'nav', 'footer', 'aside', 'noscript']);
|
||||
|
||||
return turndown;
|
||||
}
|
||||
|
||||
// Extract main content using Readability
|
||||
function extractMainContent(html: string, url: string): {
|
||||
content: string;
|
||||
title: string;
|
||||
byline?: string;
|
||||
excerpt?: string;
|
||||
} {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const reader = new Readability(dom.window.document);
|
||||
const article = reader.parse();
|
||||
|
||||
if (!article) {
|
||||
throw new Error('Could not extract main content from page');
|
||||
}
|
||||
|
||||
return {
|
||||
content: article.content,
|
||||
title: article.title,
|
||||
byline: article.byline || undefined,
|
||||
excerpt: article.excerpt || undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Scrape a URL and return markdown
|
||||
export async function scrape(options: ScrapeOptions): Promise<ScrapeResult> {
|
||||
const { page, browser } = await getPage({ headless: options.headless ?? true });
|
||||
|
||||
try {
|
||||
// Navigate to URL
|
||||
console.log(`Navigating to: ${options.url}`);
|
||||
await page.goto(options.url, {
|
||||
timeout: 60000,
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Wait if specified
|
||||
if (options.wait) {
|
||||
console.log(`Waiting ${options.wait}ms for dynamic content...`);
|
||||
await page.waitForTimeout(options.wait);
|
||||
}
|
||||
|
||||
const pageTitle = await page.title();
|
||||
const pageUrl = page.url();
|
||||
|
||||
let html: string;
|
||||
let title = pageTitle;
|
||||
let byline: string | undefined;
|
||||
let excerpt: string | undefined;
|
||||
|
||||
// Get HTML based on mode
|
||||
switch (options.mode) {
|
||||
case 'main': {
|
||||
// Get full page HTML and extract with Readability
|
||||
const fullHtml = await page.content();
|
||||
const extracted = extractMainContent(fullHtml, pageUrl);
|
||||
html = extracted.content;
|
||||
title = extracted.title || pageTitle;
|
||||
byline = extracted.byline;
|
||||
excerpt = extracted.excerpt;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'selector': {
|
||||
if (!options.selector) {
|
||||
throw new Error('Selector mode requires --selector option');
|
||||
}
|
||||
const element = await page.$(options.selector);
|
||||
if (!element) {
|
||||
throw new Error(`Selector not found: ${options.selector}`);
|
||||
}
|
||||
html = await element.innerHTML();
|
||||
break;
|
||||
}
|
||||
|
||||
case 'full':
|
||||
default: {
|
||||
// Get body content, excluding common non-content elements
|
||||
html = await page.evaluate(() => {
|
||||
// Remove common non-content elements
|
||||
const selectorsToRemove = [
|
||||
'script', 'style', 'noscript', 'iframe',
|
||||
'nav', 'header', 'footer', '.cookie-banner',
|
||||
'.advertisement', '.ads', '#ads', '.social-share',
|
||||
'.comments', '#comments', '.sidebar'
|
||||
];
|
||||
|
||||
selectorsToRemove.forEach(selector => {
|
||||
document.querySelectorAll(selector).forEach(el => el.remove());
|
||||
});
|
||||
|
||||
return document.body.innerHTML;
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to markdown
|
||||
const turndown = createTurndownService({
|
||||
includeLinks: options.includeLinks ?? true,
|
||||
includeTables: options.includeTables ?? true,
|
||||
includeImages: options.includeImages ?? false,
|
||||
});
|
||||
|
||||
let markdown = turndown.turndown(html);
|
||||
|
||||
// Add title as H1 if not already present
|
||||
if (!markdown.startsWith('# ')) {
|
||||
markdown = `# ${title}\n\n${markdown}`;
|
||||
}
|
||||
|
||||
// Add metadata header
|
||||
const metadataLines = [
|
||||
`<!-- Scraped from: ${pageUrl} -->`,
|
||||
byline ? `<!-- Author: ${byline} -->` : null,
|
||||
excerpt ? `<!-- Excerpt: ${excerpt} -->` : null,
|
||||
`<!-- Scraped at: ${new Date().toISOString()} -->`,
|
||||
'',
|
||||
].filter(Boolean);
|
||||
|
||||
markdown = metadataLines.join('\n') + '\n' + markdown;
|
||||
|
||||
// Clean up excessive whitespace
|
||||
markdown = markdown
|
||||
.replace(/\n{4,}/g, '\n\n\n')
|
||||
.replace(/[ \t]+$/gm, '')
|
||||
.trim();
|
||||
|
||||
const result: ScrapeResult = {
|
||||
title,
|
||||
url: pageUrl,
|
||||
markdown,
|
||||
byline,
|
||||
excerpt,
|
||||
};
|
||||
|
||||
// Save to file if output specified
|
||||
if (options.output) {
|
||||
writeFileSync(options.output, markdown, 'utf-8');
|
||||
console.log(`Markdown saved to: ${options.output}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// CLI entry point
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv.slice(2), {
|
||||
string: ['url', 'mode', 'selector', 'output'],
|
||||
boolean: ['headless', 'links', 'tables', 'images', 'help'],
|
||||
default: {
|
||||
mode: 'main',
|
||||
headless: true,
|
||||
links: true,
|
||||
tables: true,
|
||||
images: false,
|
||||
},
|
||||
alias: {
|
||||
u: 'url',
|
||||
m: 'mode',
|
||||
s: 'selector',
|
||||
o: 'output',
|
||||
h: 'help',
|
||||
},
|
||||
});
|
||||
|
||||
if (args.help || !args.url) {
|
||||
console.log(`
|
||||
Web Scraper - Extract content to Markdown
|
||||
|
||||
Usage:
|
||||
npx tsx scrape.ts --url <url> [options]
|
||||
|
||||
Options:
|
||||
-u, --url <url> URL to scrape (required)
|
||||
-m, --mode <mode> Scrape mode: main, full, or selector (default: main)
|
||||
-s, --selector <sel> CSS selector for selector mode
|
||||
-o, --output <path> Output file path for markdown
|
||||
--headless <bool> Run in headless mode (default: true)
|
||||
--wait <ms> Wait time for dynamic content
|
||||
--links Include links in output (default: true)
|
||||
--tables Include tables in output (default: true)
|
||||
--images Include images in output (default: false)
|
||||
-h, --help Show this help message
|
||||
|
||||
Scrape Modes:
|
||||
main Extract main article content using Readability (best for articles)
|
||||
full Full page content with common elements removed
|
||||
selector Extract specific element by CSS selector
|
||||
|
||||
Examples:
|
||||
npx tsx scrape.ts --url "https://docs.example.com/guide" --mode main
|
||||
npx tsx scrape.ts --url "https://example.com" --mode full --output page.md
|
||||
npx tsx scrape.ts --url "https://example.com" --mode selector --selector ".api-docs"
|
||||
npx tsx scrape.ts --url "https://example.com" --mode main --no-links --output clean.md
|
||||
|
||||
Output Format:
|
||||
- GitHub Flavored Markdown (tables, strikethrough, task lists)
|
||||
- Proper heading hierarchy
|
||||
- Code blocks with language detection
|
||||
- Metadata comments at top (source URL, date)
|
||||
`);
|
||||
process.exit(args.help ? 0 : 1);
|
||||
}
|
||||
|
||||
const mode = args.mode as ScrapeMode;
|
||||
if (!['main', 'full', 'selector'].includes(mode)) {
|
||||
console.error(`Invalid mode: ${mode}. Must be main, full, or selector.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await scrape({
|
||||
url: args.url,
|
||||
mode,
|
||||
selector: args.selector,
|
||||
output: args.output,
|
||||
includeLinks: args.links,
|
||||
includeTables: args.tables,
|
||||
includeImages: args.images,
|
||||
headless: args.headless,
|
||||
wait: args.wait ? parseInt(args.wait, 10) : undefined,
|
||||
});
|
||||
|
||||
// Print result summary
|
||||
console.log(`\nScrape complete:`);
|
||||
console.log(` Title: ${result.title}`);
|
||||
console.log(` URL: ${result.url}`);
|
||||
if (result.byline) console.log(` Author: ${result.byline}`);
|
||||
console.log(` Markdown length: ${result.markdown.length} chars`);
|
||||
|
||||
// Print markdown if not saved to file
|
||||
if (!args.output) {
|
||||
console.log('\n--- Markdown Output ---\n');
|
||||
console.log(result.markdown);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
const isMainModule = process.argv[1]?.includes('scrape.ts');
|
||||
if (isMainModule) {
|
||||
main();
|
||||
}
|
||||
39
skills/web-automation/opencode/scripts/test-full.ts
Normal file
39
skills/web-automation/opencode/scripts/test-full.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with full options...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
// humanize: 1.5, // Test without this first
|
||||
// geoip: true, // Test without this first
|
||||
// enable_cache: true,
|
||||
// block_webrtc: false,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://github.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await page.screenshot({ path: '/tmp/github-test.png' });
|
||||
console.log('Screenshot saved');
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
22
skills/web-automation/opencode/scripts/test-minimal.ts
Normal file
22
skills/web-automation/opencode/scripts/test-minimal.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
|
||||
async function test() {
|
||||
console.log('Launching Camoufox with minimal config...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
32
skills/web-automation/opencode/scripts/test-profile.ts
Normal file
32
skills/web-automation/opencode/scripts/test-profile.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { Camoufox } from 'camoufox-js';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { mkdirSync, existsSync } from 'fs';
|
||||
|
||||
async function test() {
|
||||
const profilePath = join(homedir(), '.camoufox-profile');
|
||||
if (!existsSync(profilePath)) {
|
||||
mkdirSync(profilePath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log('Profile path:', profilePath);
|
||||
console.log('Launching with user_data_dir...');
|
||||
|
||||
const browser = await Camoufox({
|
||||
headless: true,
|
||||
user_data_dir: profilePath,
|
||||
});
|
||||
|
||||
console.log('Browser launched');
|
||||
const page = await browser.newPage();
|
||||
console.log('Page created');
|
||||
|
||||
await page.goto('https://example.com', { timeout: 30000 });
|
||||
console.log('Navigated to:', page.url());
|
||||
console.log('Title:', await page.title());
|
||||
|
||||
await browser.close();
|
||||
console.log('Done');
|
||||
}
|
||||
|
||||
test().catch(console.error);
|
||||
@@ -0,0 +1,78 @@
|
||||
import { getPage } from './browse.js';
|
||||
|
||||
type Extracted = {
|
||||
title: string;
|
||||
url: string;
|
||||
colorVars: Array<[string, string]>;
|
||||
samples: Record<string, null | { background: string; color: string; border: string }>;
|
||||
};
|
||||
|
||||
function isColorValue(value: string) {
|
||||
return /#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})\b/i.test(value) || /\brgb\(|\bhsl\(/i.test(value);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const url = process.argv[2] ?? 'https://www.firsthorizon.com';
|
||||
|
||||
const { page, browser } = await getPage({ headless: true });
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
||||
await page.waitForTimeout(5000);
|
||||
|
||||
const data = await page.evaluate(`(() => {
|
||||
const rootStyles = getComputedStyle(document.documentElement);
|
||||
const vars = {};
|
||||
for (let i = 0; i < rootStyles.length; i++) {
|
||||
const prop = rootStyles[i];
|
||||
if (prop && prop.startsWith('--')) {
|
||||
vars[prop] = rootStyles.getPropertyValue(prop).trim();
|
||||
}
|
||||
}
|
||||
|
||||
const pick = (selector) => {
|
||||
const el = document.querySelector(selector);
|
||||
if (!el) return null;
|
||||
const cs = getComputedStyle(el);
|
||||
return {
|
||||
background: cs.backgroundColor,
|
||||
color: cs.color,
|
||||
border: cs.borderColor,
|
||||
};
|
||||
};
|
||||
|
||||
return {
|
||||
title: document.title,
|
||||
url: location.href,
|
||||
vars,
|
||||
samples: {
|
||||
body: pick('body'),
|
||||
header: pick('header'),
|
||||
nav: pick('nav'),
|
||||
primaryButton: pick('button, [role="button"], a[role="button"], a.button, .button'),
|
||||
link: pick('a'),
|
||||
},
|
||||
};
|
||||
})()`);
|
||||
|
||||
const entries = Object.entries(data.vars) as Array<[string, string]>;
|
||||
const colorVars = entries
|
||||
.filter(([, v]) => v && isColorValue(v))
|
||||
.sort((a, b) => a[0].localeCompare(b[0]));
|
||||
|
||||
const out: Extracted = {
|
||||
title: data.title,
|
||||
url: data.url,
|
||||
colorVars,
|
||||
samples: data.samples,
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(out, null, 2));
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
16
skills/web-automation/opencode/scripts/tsconfig.json
Normal file
16
skills/web-automation/opencode/scripts/tsconfig.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["*.ts"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
8
skills/web-automation/opencode/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
8
skills/web-automation/opencode/scripts/turndown-plugin-gfm.d.ts
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
declare module 'turndown-plugin-gfm' {
|
||||
import TurndownService from 'turndown';
|
||||
|
||||
export function gfm(turndownService: TurndownService): void;
|
||||
export function strikethrough(turndownService: TurndownService): void;
|
||||
export function tables(turndownService: TurndownService): void;
|
||||
export function taskListItems(turndownService: TurndownService): void;
|
||||
}
|
||||
Reference in New Issue
Block a user