From a6dffe0091f381ae9811ac4a48963b1a07b75020 Mon Sep 17 00:00:00 2001 From: Luke Date: Wed, 11 Feb 2026 22:01:38 +0000 Subject: [PATCH] Add general flow runner and document natural-language usage - Add flow.ts for go/click/type/press/wait/screenshot flows - Update web-automation docs with natural-language examples - Update SKILL.md quick reference for flow.ts - Remove temp script files --- docs/web-automation.md | 23 ++ skills/web-automation/SKILL.md | 11 + skills/web-automation/scripts/flow.ts | 209 ++++++++++++++++++ .../tmp-extract-firsthorizon-colors.ts | 78 ------- 4 files changed, 243 insertions(+), 78 deletions(-) create mode 100644 skills/web-automation/scripts/flow.ts delete mode 100644 skills/web-automation/scripts/tmp-extract-firsthorizon-colors.ts diff --git a/docs/web-automation.md b/docs/web-automation.md index ce887a4..91587d5 100644 --- a/docs/web-automation.md +++ b/docs/web-automation.md @@ -40,4 +40,27 @@ npx tsx scrape.ts --url "https://example.com" --mode main --output page.md # Authenticate flow npx tsx auth.ts --url "https://example.com/login" + +# General natural-language browser flow +npx tsx flow.ts --instruction 'go to https://search.fiorinis.com then type "pippo" then press enter then wait 2s' +``` + +## Natural-language flow runner (`flow.ts`) + +Use `flow.ts` when you want a general command style like: + +- "go to this site" +- "find this button and click it" +- "type this and press enter" + +### Example + +```bash +npx tsx flow.ts --instruction 'go to https://example.com then click on "Sign in" then type "stef@example.com" in #email then press enter' +``` + +You can also use JSON steps for deterministic runs: + +```bash +npx tsx flow.ts --steps '[{"action":"goto","url":"https://example.com"},{"action":"click","text":"Sign in"}]' ``` diff --git a/skills/web-automation/SKILL.md b/skills/web-automation/SKILL.md index dbb5bdf..f85fb55 100644 --- a/skills/web-automation/SKILL.md +++ b/skills/web-automation/SKILL.md @@ -40,6 +40,17 @@ If any check fails, stop and return: - Browse page: `npx tsx browse.ts --url "https://example.com"` - Scrape markdown: `npx tsx scrape.ts --url "https://example.com" --mode main --output page.md` - Authenticate: `npx tsx auth.ts --url "https://example.com/login"` +- Natural-language flow: `npx tsx flow.ts --instruction 'go to https://example.com then click on "Login" then type "user@example.com" in #email then press enter'` + +## General flow runner + +Use `flow.ts` for multi-step commands in plain language (go/click/type/press/wait/screenshot). + +Example: + +```bash +npx tsx flow.ts --instruction 'go to https://search.fiorinis.com then type "pippo" then press enter then wait 2s' +``` ## Notes diff --git a/skills/web-automation/scripts/flow.ts b/skills/web-automation/scripts/flow.ts new file mode 100644 index 0000000..90dc3ac --- /dev/null +++ b/skills/web-automation/scripts/flow.ts @@ -0,0 +1,209 @@ +#!/usr/bin/env npx tsx + +import parseArgs from 'minimist'; +import type { Page } from 'playwright-core'; +import { launchBrowser } from './browse'; + +type Step = + | { action: 'goto'; url: string } + | { action: 'click'; selector?: string; text?: string } + | { action: 'type'; selector?: string; text: string } + | { action: 'press'; key: string; selector?: string } + | { action: 'wait'; ms: number } + | { action: 'screenshot'; path: string } + | { action: 'extract'; selector: string; count?: number }; + +function normalizeKey(k: string): string { + if (!k) return 'Enter'; + const lower = k.toLowerCase(); + if (lower === 'enter' || lower === 'return') return 'Enter'; + if (lower === 'tab') return 'Tab'; + if (lower === 'escape' || lower === 'esc') return 'Escape'; + return k; +} + +function splitInstructions(instruction: string): string[] { + return instruction + .split(/\bthen\b|;/gi) + .map((s) => s.trim()) + .filter(Boolean); +} + +function parseInstruction(instruction: string): Step[] { + const parts = splitInstructions(instruction); + const steps: Step[] = []; + + for (const p of parts) { + // go to https://... + const goto = p.match(/^(?:go to|open|navigate to)\s+(https?:\/\/\S+)/i); + if (goto) { + steps.push({ action: 'goto', url: goto[1] }); + continue; + } + + // click on "text" or click #selector + const clickText = p.match(/^click(?: on)?\s+"([^"]+)"/i); + if (clickText) { + steps.push({ action: 'click', text: clickText[1] }); + continue; + } + const clickSelector = p.match(/^click(?: on)?\s+(#[\w-]+|\.[\w-]+|[a-z]+\[[^\]]+\])/i); + if (clickSelector) { + steps.push({ action: 'click', selector: clickSelector[1] }); + continue; + } + + // type "text" [in selector] + const typeInto = p.match(/^type\s+"([^"]+)"\s+in\s+(.+)$/i); + if (typeInto) { + steps.push({ action: 'type', text: typeInto[1], selector: typeInto[2].trim() }); + continue; + } + const typeOnly = p.match(/^type\s+"([^"]+)"$/i); + if (typeOnly) { + steps.push({ action: 'type', text: typeOnly[1] }); + continue; + } + + // press enter [in selector] + const pressIn = p.match(/^press\s+(\w+)\s+in\s+(.+)$/i); + if (pressIn) { + steps.push({ action: 'press', key: normalizeKey(pressIn[1]), selector: pressIn[2].trim() }); + continue; + } + const pressOnly = p.match(/^press\s+(\w+)$/i); + if (pressOnly) { + steps.push({ action: 'press', key: normalizeKey(pressOnly[1]) }); + continue; + } + + // wait 2s / wait 500ms + const waitS = p.match(/^wait\s+(\d+)\s*s(?:ec(?:onds?)?)?$/i); + if (waitS) { + steps.push({ action: 'wait', ms: parseInt(waitS[1], 10) * 1000 }); + continue; + } + const waitMs = p.match(/^wait\s+(\d+)\s*ms$/i); + if (waitMs) { + steps.push({ action: 'wait', ms: parseInt(waitMs[1], 10) }); + continue; + } + + // screenshot path + const shot = p.match(/^screenshot(?: to)?\s+(.+)$/i); + if (shot) { + steps.push({ action: 'screenshot', path: shot[1].trim() }); + continue; + } + + throw new Error(`Could not parse step: "${p}"`); + } + + return steps; +} + +async function clickByText(page: Page, text: string) { + const loc = page.getByRole('button', { name: text }).or(page.getByRole('link', { name: text })).or(page.getByText(text)); + await loc.first().click({ timeout: 15000 }); +} + +async function typeInBestTarget(page: Page, text: string, selector?: string) { + if (selector) { + await page.locator(selector).first().click({ timeout: 10000 }); + await page.locator(selector).first().fill(text); + return; + } + const loc = page.locator('input[name="q"], input[type="search"], input[type="text"], textarea').first(); + await loc.click({ timeout: 10000 }); + await loc.fill(text); +} + +async function pressOnTarget(page: Page, key: string, selector?: string) { + if (selector) { + await page.locator(selector).first().press(key); + return; + } + await page.keyboard.press(key); +} + +async function runSteps(page: Page, steps: Step[]) { + for (const step of steps) { + switch (step.action) { + case 'goto': + await page.goto(step.url, { waitUntil: 'domcontentloaded', timeout: 60000 }); + break; + case 'click': + if (step.selector) await page.locator(step.selector).first().click({ timeout: 15000 }); + else if (step.text) await clickByText(page, step.text); + else throw new Error('click step missing selector/text'); + break; + case 'type': + await typeInBestTarget(page, step.text, step.selector); + break; + case 'press': + await pressOnTarget(page, step.key, step.selector); + break; + case 'wait': + await page.waitForTimeout(step.ms); + break; + case 'screenshot': + await page.screenshot({ path: step.path, fullPage: true }); + break; + case 'extract': { + const items = await page.locator(step.selector).allTextContents(); + const out = items.slice(0, step.count ?? items.length).map((t) => t.trim()).filter(Boolean); + console.log(JSON.stringify(out, null, 2)); + break; + } + default: + throw new Error('Unknown step'); + } + } +} + +async function main() { + const args = parseArgs(process.argv.slice(2), { + string: ['instruction', 'steps'], + boolean: ['headless', 'help'], + default: { headless: true }, + alias: { i: 'instruction', s: 'steps', h: 'help' }, + }); + + if (args.help || (!args.instruction && !args.steps)) { + console.log(` +General Web Flow Runner (Camoufox) + +Usage: + npx tsx flow.ts --instruction "go to https://example.com then type \"hello\" then press enter" + npx tsx flow.ts --steps '[{"action":"goto","url":"https://example.com"}]' + +Supported natural steps: + - go to/open/navigate to + - click on "Text" + - click + - type "text" + - type "text" in + - press + - press in + - wait s | wait ms + - screenshot +`); + process.exit(args.help ? 0 : 1); + } + + const browser = await launchBrowser({ headless: args.headless }); + const page = await browser.newPage(); + + try { + const steps: Step[] = args.steps ? JSON.parse(args.steps) : parseInstruction(args.instruction); + await runSteps(page, steps); + console.log('Flow complete. Final URL:', page.url()); + } finally { + await browser.close(); + } +} + +main().catch((e) => { + console.error('Error:', e instanceof Error ? e.message : e); + process.exit(1); +}); diff --git a/skills/web-automation/scripts/tmp-extract-firsthorizon-colors.ts b/skills/web-automation/scripts/tmp-extract-firsthorizon-colors.ts deleted file mode 100644 index 414dfff..0000000 --- a/skills/web-automation/scripts/tmp-extract-firsthorizon-colors.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { getPage } from './browse.js'; - -type Extracted = { - title: string; - url: string; - colorVars: Array<[string, string]>; - samples: Record; -}; - -function isColorValue(value: string) { - return /#([0-9a-f]{3,4}|[0-9a-f]{6}|[0-9a-f]{8})\b/i.test(value) || /\brgb\(|\bhsl\(/i.test(value); -} - -async function main() { - const url = process.argv[2] ?? 'https://www.firsthorizon.com'; - - const { page, browser } = await getPage({ headless: true }); - try { - await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 }); - await page.waitForTimeout(5000); - - const data = await page.evaluate(`(() => { - const rootStyles = getComputedStyle(document.documentElement); - const vars = {}; - for (let i = 0; i < rootStyles.length; i++) { - const prop = rootStyles[i]; - if (prop && prop.startsWith('--')) { - vars[prop] = rootStyles.getPropertyValue(prop).trim(); - } - } - - const pick = (selector) => { - const el = document.querySelector(selector); - if (!el) return null; - const cs = getComputedStyle(el); - return { - background: cs.backgroundColor, - color: cs.color, - border: cs.borderColor, - }; - }; - - return { - title: document.title, - url: location.href, - vars, - samples: { - body: pick('body'), - header: pick('header'), - nav: pick('nav'), - primaryButton: pick('button, [role="button"], a[role="button"], a.button, .button'), - link: pick('a'), - }, - }; - })()`); - - const entries = Object.entries(data.vars) as Array<[string, string]>; - const colorVars = entries - .filter(([, v]) => v && isColorValue(v)) - .sort((a, b) => a[0].localeCompare(b[0])); - - const out: Extracted = { - title: data.title, - url: data.url, - colorVars, - samples: data.samples, - }; - - process.stdout.write(JSON.stringify(out, null, 2)); - } finally { - await browser.close(); - } -} - -main().catch((error) => { - console.error(error); - process.exit(1); -});