From 291b7298948dc094027ac4cfeb501e35794bc738 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 00:45:06 -0500 Subject: [PATCH 01/14] feat: scaffold us-cpa skill --- docs/us-cpa.md | 56 ++++++++ skills/us-cpa/SKILL.md | 52 +++++++ skills/us-cpa/pyproject.toml | 19 +++ skills/us-cpa/scripts/us-cpa | 8 ++ skills/us-cpa/src/us_cpa/__init__.py | 2 + skills/us-cpa/src/us_cpa/cli.py | 130 ++++++++++++++++++ .../us-cpa/tests/fixtures/documents/.gitkeep | 1 + skills/us-cpa/tests/fixtures/facts/.gitkeep | 1 + skills/us-cpa/tests/fixtures/irs/.gitkeep | 1 + skills/us-cpa/tests/fixtures/returns/.gitkeep | 1 + skills/us-cpa/tests/test_cli.py | 71 ++++++++++ 11 files changed, 342 insertions(+) create mode 100644 docs/us-cpa.md create mode 100644 skills/us-cpa/SKILL.md create mode 100644 skills/us-cpa/pyproject.toml create mode 100755 skills/us-cpa/scripts/us-cpa create mode 100644 skills/us-cpa/src/us_cpa/__init__.py create mode 100644 skills/us-cpa/src/us_cpa/cli.py create mode 100644 skills/us-cpa/tests/fixtures/documents/.gitkeep create mode 100644 skills/us-cpa/tests/fixtures/facts/.gitkeep create mode 100644 skills/us-cpa/tests/fixtures/irs/.gitkeep create mode 100644 skills/us-cpa/tests/fixtures/returns/.gitkeep create mode 100644 skills/us-cpa/tests/test_cli.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md new file mode 100644 index 0000000..c8b4111 --- /dev/null +++ b/docs/us-cpa.md @@ -0,0 +1,56 @@ +# us-cpa + +`us-cpa` is a Python CLI plus OpenClaw skill wrapper for U.S. federal individual tax work. + +## Current Milestone + +Milestone 1 provides the initial package, CLI surface, skill wrapper, and test harness. Tax logic, IRS corpus download, case workflows, rendering, and review logic are not implemented yet. + +## CLI Surface + +```bash +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 +skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa render-forms --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +``` + +## Interaction Model + +- `question` + - stateless by default + - optional case context +- `prepare` + - requires a case directory + - if none exists, OpenClaw should ask whether to create one and where +- `review` + - requires a case directory + - can operate on an existing or newly-created review case + +## Planned Case Layout + +```text +/ + input/ + extracted/ + return/ + output/ + reports/ + issues/ + sources/ +``` + +## Output Contract + +- JSON by default +- markdown available with `--format markdown` +- current milestone responses are scaffold payloads with `status: "not_implemented"` + +## Scope Rules + +- U.S. federal individual returns only in v1 +- official IRS artifacts are the target output for compiled forms +- conflicting facts must stop the workflow for user resolution diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md new file mode 100644 index 0000000..d3fccc3 --- /dev/null +++ b/skills/us-cpa/SKILL.md @@ -0,0 +1,52 @@ +--- +name: us-cpa +description: Use when answering U.S. federal individual tax questions, preparing a federal Form 1040 return package, or reviewing a draft/completed federal individual return. +--- + +# US CPA + +`us-cpa` is a Python-first federal individual tax workflow skill. The CLI is the canonical engine. Use the skill to classify the request, gather missing inputs, and invoke the CLI. + +## Modes + +- `question` + - one-off federal tax question + - case folder optional +- `prepare` + - new or existing return-preparation case + - case folder required +- `review` + - new or existing return-review case + - case folder required + +## Agent Workflow + +1. Determine whether the request is: + - question-only + - a new preparation/review case + - work on an existing case +2. If the request is `prepare` or `review` and no case folder is supplied: + - ask whether to create a new case + - ask where to store it +3. Use the bundled CLI: + +```bash +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +``` + +## Rules + +- federal individual returns only in v1 +- IRS materials first; escalate to primary law only when needed +- stop on conflicting facts and ask the user to resolve the issue before continuing +- official IRS PDFs are the target compiled-form artifacts +- overlay-rendered forms must be flagged for human review + +## Output + +- JSON by default +- markdown output available with `--format markdown` + +For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/pyproject.toml b/skills/us-cpa/pyproject.toml new file mode 100644 index 0000000..eb9a667 --- /dev/null +++ b/skills/us-cpa/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "us-cpa" +version = "0.1.0" +description = "US federal individual tax workflow CLI for questions, preparation, and review." +requires-python = ">=3.9" +dependencies = [] + +[project.scripts] +us-cpa = "us_cpa.cli:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/skills/us-cpa/scripts/us-cpa b/skills/us-cpa/scripts/us-cpa new file mode 100755 index 0000000..fbcef77 --- /dev/null +++ b/skills/us-cpa/scripts/us-cpa @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +export PYTHONPATH="${SKILL_DIR}/src${PYTHONPATH:+:${PYTHONPATH}}" + +exec python3 -m us_cpa.cli "$@" diff --git a/skills/us-cpa/src/us_cpa/__init__.py b/skills/us-cpa/src/us_cpa/__init__.py new file mode 100644 index 0000000..8014833 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/__init__.py @@ -0,0 +1,2 @@ +"""us-cpa package.""" + diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py new file mode 100644 index 0000000..4ef2046 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + + +COMMANDS = ( + "question", + "prepare", + "review", + "fetch-year", + "extract-docs", + "render-forms", + "export-efile-ready", +) + + +def _add_common_arguments( + parser: argparse.ArgumentParser, *, include_tax_year: bool = True +) -> None: + if include_tax_year: + parser.add_argument("--tax-year", type=int, default=None) + parser.add_argument("--case-dir", default=None) + parser.add_argument("--format", choices=("json", "markdown"), default="json") + + +def _emit(payload: dict[str, Any], output_format: str) -> int: + if output_format == "markdown": + lines = [f"# {payload['command']}"] + for key, value in payload.items(): + if key == "command": + continue + lines.append(f"- **{key}**: {value}") + print("\n".join(lines)) + else: + print(json.dumps(payload, indent=2)) + return 0 + + +def _require_case_dir(args: argparse.Namespace) -> Path: + if not args.case_dir: + raise SystemExit("A case directory is required for this command.") + return Path(args.case_dir).expanduser().resolve() + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="us-cpa", + description="US federal individual tax workflow CLI.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + question = subparsers.add_parser("question", help="Answer a tax question.") + _add_common_arguments(question) + question.add_argument("--question", required=True) + + prepare = subparsers.add_parser("prepare", help="Prepare a return case.") + _add_common_arguments(prepare) + + review = subparsers.add_parser("review", help="Review a return case.") + _add_common_arguments(review) + + fetch_year = subparsers.add_parser( + "fetch-year", help="Fetch tax-year forms and instructions." + ) + _add_common_arguments(fetch_year, include_tax_year=False) + fetch_year.add_argument("--tax-year", type=int, required=True) + + extract_docs = subparsers.add_parser( + "extract-docs", help="Extract facts from case documents." + ) + _add_common_arguments(extract_docs) + + render_forms = subparsers.add_parser( + "render-forms", help="Render compiled IRS forms." + ) + _add_common_arguments(render_forms) + + export_efile = subparsers.add_parser( + "export-efile-ready", help="Export an e-file-ready payload." + ) + _add_common_arguments(export_efile) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + if args.command == "question": + payload = { + "command": "question", + "format": args.format, + "taxYear": args.tax_year, + "caseDir": args.case_dir, + "question": args.question, + "status": "not_implemented", + } + return _emit(payload, args.format) + + if args.command in {"prepare", "review", "extract-docs", "render-forms", "export-efile-ready"}: + case_dir = _require_case_dir(args) + payload = { + "command": args.command, + "format": args.format, + "taxYear": args.tax_year, + "caseDir": str(case_dir), + "status": "not_implemented", + } + return _emit(payload, args.format) + + if args.command == "fetch-year": + payload = { + "command": "fetch-year", + "format": args.format, + "taxYear": args.tax_year, + "status": "not_implemented", + } + return _emit(payload, args.format) + + parser.error(f"Unsupported command: {args.command}") + return 2 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/skills/us-cpa/tests/fixtures/documents/.gitkeep b/skills/us-cpa/tests/fixtures/documents/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/facts/.gitkeep b/skills/us-cpa/tests/fixtures/facts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/irs/.gitkeep b/skills/us-cpa/tests/fixtures/irs/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/irs/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/returns/.gitkeep b/skills/us-cpa/tests/fixtures/returns/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/returns/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py new file mode 100644 index 0000000..db4d765 --- /dev/null +++ b/skills/us-cpa/tests/test_cli.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import unittest +from pathlib import Path + + +SKILL_DIR = Path(__file__).resolve().parents[1] +SRC_DIR = SKILL_DIR / "src" + + +class UsCpaCliSmokeTests(unittest.TestCase): + def test_skill_scaffold_files_exist(self) -> None: + self.assertTrue((SKILL_DIR / "SKILL.md").exists()) + self.assertTrue((SKILL_DIR / "pyproject.toml").exists()) + self.assertTrue((SKILL_DIR / "scripts" / "us-cpa").exists()) + self.assertTrue( + (SKILL_DIR.parent.parent / "docs" / "us-cpa.md").exists() + ) + + def test_fixture_directories_exist(self) -> None: + fixtures_dir = SKILL_DIR / "tests" / "fixtures" + for name in ("irs", "facts", "documents", "returns"): + self.assertTrue((fixtures_dir / name).exists()) + + def run_cli(self, *args: str) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + return subprocess.run( + [sys.executable, "-m", "us_cpa.cli", *args], + text=True, + capture_output=True, + env=env, + ) + + def test_help_lists_all_commands(self) -> None: + result = self.run_cli("--help") + + self.assertEqual(result.returncode, 0, result.stderr) + for command in ( + "question", + "prepare", + "review", + "fetch-year", + "extract-docs", + "render-forms", + "export-efile-ready", + ): + self.assertIn(command, result.stdout) + + def test_question_command_emits_json_by_default(self) -> None: + result = self.run_cli("question", "--question", "What is the standard deduction?") + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["command"], "question") + self.assertEqual(payload["format"], "json") + self.assertEqual(payload["question"], "What is the standard deduction?") + + def test_prepare_requires_case_dir(self) -> None: + result = self.run_cli("prepare", "--tax-year", "2025") + + self.assertNotEqual(result.returncode, 0) + self.assertIn("case directory", result.stderr.lower()) + + +if __name__ == "__main__": + unittest.main() From 0c2e34f2f06b107398d94e0e41c6932e353c7472 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 00:53:18 -0500 Subject: [PATCH 02/14] feat: add us-cpa tax-year source corpus --- docs/us-cpa.md | 47 +++++++- skills/us-cpa/SKILL.md | 2 + skills/us-cpa/src/us_cpa/cli.py | 7 +- skills/us-cpa/src/us_cpa/sources.py | 178 ++++++++++++++++++++++++++++ skills/us-cpa/tests/test_sources.py | 97 +++++++++++++++ 5 files changed, 328 insertions(+), 3 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/sources.py create mode 100644 skills/us-cpa/tests/test_sources.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index c8b4111..e76feac 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -4,7 +4,14 @@ ## Current Milestone -Milestone 1 provides the initial package, CLI surface, skill wrapper, and test harness. Tax logic, IRS corpus download, case workflows, rendering, and review logic are not implemented yet. +Milestone 2 now adds the first tax-year corpus layer: + +- deterministic cache layout under `~/.cache/us-cpa` by default +- `fetch-year` download flow for the bootstrap IRS corpus +- source manifest with URL, hash, authority rank, and local path traceability +- authority ranking hooks for IRS materials and future primary-law escalation + +Tax logic, case workflows, rendering, and review logic are still pending. ## CLI Surface @@ -18,6 +25,27 @@ skills/us-cpa/scripts/us-cpa render-forms --tax-year 2025 --case-dir ~/tax-cases skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe ``` +## Tax-Year Cache + +Default cache root: + +```text +~/.cache/us-cpa +``` + +Override for isolated runs: + +```bash +US_CPA_CACHE_DIR=/tmp/us-cpa-cache skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 +``` + +Current `fetch-year` bootstrap corpus for tax year `2025` is verified against live IRS `irs-prior` PDFs for: + +- Form 1040 +- Schedules 1, 2, 3, A, B, C, D, SE, and 8812 +- Form 8949 +- General Form 1040 instructions and selected schedule/form instructions + ## Interaction Model - `question` @@ -47,10 +75,25 @@ skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax - JSON by default - markdown available with `--format markdown` -- current milestone responses are scaffold payloads with `status: "not_implemented"` +- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"` +- `fetch-year` emits a downloaded manifest location and source count ## Scope Rules - U.S. federal individual returns only in v1 - official IRS artifacts are the target output for compiled forms - conflicting facts must stop the workflow for user resolution + +## Authority Ranking + +Current authority classes are ranked to preserve source hierarchy: + +- IRS forms +- IRS instructions +- IRS publications +- IRS FAQs +- Internal Revenue Code +- Treasury regulations +- other primary authority + +Later research and review flows should consume this ranking rather than inventing their own. diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index d3fccc3..c212fe0 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -48,5 +48,7 @@ skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025- - JSON by default - markdown output available with `--format markdown` +- `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default +- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 4ef2046..3fc8e1c 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -6,6 +6,7 @@ import sys from pathlib import Path from typing import Any +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog COMMANDS = ( "question", @@ -114,11 +115,15 @@ def main(argv: list[str] | None = None) -> int: return _emit(payload, args.format) if args.command == "fetch-year": + corpus = TaxYearCorpus() + manifest = corpus.download_catalog(args.tax_year, bootstrap_irs_catalog(args.tax_year)) payload = { "command": "fetch-year", "format": args.format, "taxYear": args.tax_year, - "status": "not_implemented", + "status": "downloaded", + "sourceCount": manifest["sourceCount"], + "manifestPath": corpus.paths_for_year(args.tax_year).manifest_path.as_posix(), } return _emit(payload, args.format) diff --git a/skills/us-cpa/src/us_cpa/sources.py b/skills/us-cpa/src/us_cpa/sources.py new file mode 100644 index 0000000..1f4190a --- /dev/null +++ b/skills/us-cpa/src/us_cpa/sources.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +import hashlib +import json +import os +from dataclasses import dataclass +from datetime import datetime, timezone +from enum import IntEnum +from pathlib import Path +from typing import Callable +from urllib.request import urlopen + + +class AuthorityRank(IntEnum): + IRS_FORM = 10 + IRS_INSTRUCTIONS = 20 + IRS_PUBLICATION = 30 + IRS_FAQ = 40 + INTERNAL_REVENUE_CODE = 100 + TREASURY_REGULATION = 110 + OTHER_PRIMARY_AUTHORITY = 120 + + +AUTHORITY_RANKS: dict[str, AuthorityRank] = { + "irs_form": AuthorityRank.IRS_FORM, + "irs_instructions": AuthorityRank.IRS_INSTRUCTIONS, + "irs_publication": AuthorityRank.IRS_PUBLICATION, + "irs_faq": AuthorityRank.IRS_FAQ, + "internal_revenue_code": AuthorityRank.INTERNAL_REVENUE_CODE, + "treasury_regulation": AuthorityRank.TREASURY_REGULATION, + "other_primary_authority": AuthorityRank.OTHER_PRIMARY_AUTHORITY, +} + + +def authority_rank_for(source_class: str) -> AuthorityRank: + return AUTHORITY_RANKS[source_class] + + +@dataclass(frozen=True) +class SourceDescriptor: + slug: str + title: str + source_class: str + media_type: str + url: str + + +@dataclass(frozen=True) +class TaxYearPaths: + year_dir: Path + irs_dir: Path + manifest_path: Path + + +def default_cache_root() -> Path: + override = os.getenv("US_CPA_CACHE_DIR") + if override: + return Path(override).expanduser().resolve() + return (Path.home() / ".cache" / "us-cpa").resolve() + + +def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str: + return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf" + + +def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: + entries = [ + ("f1040", "Form 1040", "irs_form"), + ("f1040s1", "Schedule 1 (Form 1040)", "irs_form"), + ("f1040s2", "Schedule 2 (Form 1040)", "irs_form"), + ("f1040s3", "Schedule 3 (Form 1040)", "irs_form"), + ("f1040sa", "Schedule A (Form 1040)", "irs_form"), + ("f1040sb", "Schedule B (Form 1040)", "irs_form"), + ("f1040sc", "Schedule C (Form 1040)", "irs_form"), + ("f1040sd", "Schedule D (Form 1040)", "irs_form"), + ("f1040se", "Schedule SE (Form 1040)", "irs_form"), + ("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"), + ("f8949", "Form 8949", "irs_form"), + ("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"), + ("i1040sca", "Instructions for Schedule A", "irs_instructions"), + ("i1040sc", "Instructions for Schedule C", "irs_instructions"), + ("i1040sd", "Instructions for Schedule D", "irs_instructions"), + ("i1040se", "Instructions for Schedule SE", "irs_instructions"), + ("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"), + ("i8949", "Instructions for Form 8949", "irs_instructions"), + ] + return [ + SourceDescriptor( + slug=slug, + title=title, + source_class=source_class, + media_type="application/pdf", + url=build_irs_prior_pdf_url(slug, tax_year), + ) + for slug, title, source_class in entries + ] + + +def _sha256_bytes(payload: bytes) -> str: + return hashlib.sha256(payload).hexdigest() + + +def _http_fetch(url: str) -> bytes: + with urlopen(url) as response: + return response.read() + + +class TaxYearCorpus: + def __init__(self, cache_root: Path | None = None) -> None: + self.cache_root = cache_root or default_cache_root() + + def paths_for_year(self, tax_year: int) -> TaxYearPaths: + year_dir = self.cache_root / "tax-years" / str(tax_year) + return TaxYearPaths( + year_dir=year_dir, + irs_dir=year_dir / "irs", + manifest_path=year_dir / "manifest.json", + ) + + def download_catalog( + self, + tax_year: int, + catalog: list[SourceDescriptor], + *, + fetcher: Callable[[str], bytes] = _http_fetch, + ) -> dict: + paths = self.paths_for_year(tax_year) + paths.irs_dir.mkdir(parents=True, exist_ok=True) + + fetched_at = datetime.now(timezone.utc).isoformat() + sources: list[dict] = [] + for descriptor in catalog: + payload = fetcher(descriptor.url) + destination = paths.irs_dir / f"{descriptor.slug}.pdf" + destination.write_bytes(payload) + sources.append( + { + "slug": descriptor.slug, + "title": descriptor.title, + "sourceClass": descriptor.source_class, + "mediaType": descriptor.media_type, + "url": descriptor.url, + "localPath": str(destination), + "sha256": _sha256_bytes(payload), + "fetchedAt": fetched_at, + "authorityRank": int(authority_rank_for(descriptor.source_class)), + } + ) + + manifest = { + "taxYear": tax_year, + "fetchedAt": fetched_at, + "cacheRoot": str(self.cache_root), + "sourceCount": len(sources), + "sources": sources, + "indexes": self.index_manifest(sources), + "primaryLawHooks": [ + { + "sourceClass": "internal_revenue_code", + "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE), + }, + { + "sourceClass": "treasury_regulation", + "authorityRank": int(AuthorityRank.TREASURY_REGULATION), + }, + ], + } + paths.manifest_path.write_text(json.dumps(manifest, indent=2)) + return manifest + + @staticmethod + def index_manifest(sources: list[dict]) -> dict[str, dict[str, list[str]]]: + by_class: dict[str, list[str]] = {} + by_slug: dict[str, list[str]] = {} + for source in sources: + by_class.setdefault(source["sourceClass"], []).append(source["slug"]) + by_slug.setdefault(source["slug"], []).append(source["localPath"]) + return {"bySourceClass": by_class, "bySlug": by_slug} diff --git a/skills/us-cpa/tests/test_sources.py b/skills/us-cpa/tests/test_sources.py new file mode 100644 index 0000000..206e4d0 --- /dev/null +++ b/skills/us-cpa/tests/test_sources.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.sources import ( + AuthorityRank, + SourceDescriptor, + TaxYearCorpus, + authority_rank_for, + bootstrap_irs_catalog, + build_irs_prior_pdf_url, +) + + +class SourceCatalogTests(unittest.TestCase): + def test_build_irs_prior_pdf_url_uses_expected_pattern(self) -> None: + self.assertEqual( + build_irs_prior_pdf_url("f1040", 2025), + "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf", + ) + self.assertEqual( + build_irs_prior_pdf_url("i1040gi", 2025), + "https://www.irs.gov/pub/irs-prior/i1040gi--2025.pdf", + ) + + def test_authority_ranking_orders_irs_before_primary_law(self) -> None: + self.assertEqual(authority_rank_for("irs_form"), AuthorityRank.IRS_FORM) + self.assertEqual( + authority_rank_for("treasury_regulation"), + AuthorityRank.TREASURY_REGULATION, + ) + self.assertLess( + authority_rank_for("irs_form"), authority_rank_for("internal_revenue_code") + ) + + def test_bootstrap_catalog_builds_tax_year_specific_urls(self) -> None: + catalog = bootstrap_irs_catalog(2025) + + self.assertGreaterEqual(len(catalog), 5) + self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf") + self.assertTrue(any(item.slug == "i1040gi" for item in catalog)) + + +class TaxYearCorpusTests(unittest.TestCase): + def test_tax_year_layout_is_deterministic(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + paths = corpus.paths_for_year(2025) + + self.assertEqual(paths.year_dir, Path(temp_dir) / "tax-years" / "2025") + self.assertEqual(paths.irs_dir, paths.year_dir / "irs") + self.assertEqual(paths.manifest_path, paths.year_dir / "manifest.json") + + def test_download_catalog_writes_files_and_manifest(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + catalog = [ + SourceDescriptor( + slug="f1040", + title="Form 1040", + source_class="irs_form", + media_type="application/pdf", + url=build_irs_prior_pdf_url("f1040", 2025), + ), + SourceDescriptor( + slug="i1040gi", + title="Instructions for Form 1040", + source_class="irs_instructions", + media_type="application/pdf", + url=build_irs_prior_pdf_url("i1040gi", 2025), + ), + ] + + def fake_fetch(url: str) -> bytes: + return f"downloaded:{url}".encode() + + manifest = corpus.download_catalog(2025, catalog, fetcher=fake_fetch) + + self.assertEqual(manifest["taxYear"], 2025) + self.assertEqual(manifest["sourceCount"], 2) + self.assertTrue(corpus.paths_for_year(2025).manifest_path.exists()) + + first = manifest["sources"][0] + self.assertEqual(first["slug"], "f1040") + self.assertEqual(first["authorityRank"], int(AuthorityRank.IRS_FORM)) + self.assertTrue(Path(first["localPath"]).exists()) + + saved = json.loads(corpus.paths_for_year(2025).manifest_path.read_text()) + self.assertEqual(saved["sourceCount"], 2) + self.assertEqual(saved["sources"][1]["slug"], "i1040gi") + + +if __name__ == "__main__": + unittest.main() From faff555757edaabe267bfdcb9a03ce772422c200 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 00:56:07 -0500 Subject: [PATCH 03/14] feat: add us-cpa case intake workflow --- docs/us-cpa.md | 25 ++++- skills/us-cpa/SKILL.md | 2 + skills/us-cpa/src/us_cpa/cases.py | 157 ++++++++++++++++++++++++++++++ skills/us-cpa/src/us_cpa/cli.py | 41 +++++++- skills/us-cpa/tests/test_cases.py | 80 +++++++++++++++ skills/us-cpa/tests/test_cli.py | 62 ++++++++++++ 6 files changed, 365 insertions(+), 2 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/cases.py create mode 100644 skills/us-cpa/tests/test_cases.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index e76feac..e50b598 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -20,7 +20,7 @@ skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 -skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json skills/us-cpa/scripts/us-cpa render-forms --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe ``` @@ -71,6 +71,29 @@ Current `fetch-year` bootstrap corpus for tax year `2025` is verified against li sources/ ``` +Current implementation writes: + +- `case-manifest.json` +- `extracted/facts.json` +- `issues/open-issues.json` + +## Intake Flow + +Current `extract-docs` supports: + +- `--create-case` +- `--case-label` +- `--facts-json ` +- repeated `--input-file ` + +Behavior: + +- creates the full case directory layout when `--create-case` is used +- copies input documents into `input/` +- stores normalized user-statement facts in `extracted/facts.json` +- appends document registry entries to `case-manifest.json` +- stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact + ## Output Contract - JSON by default diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index c212fe0..6b9e841 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json ``` ## Rules @@ -50,5 +51,6 @@ skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025- - markdown output available with `--format markdown` - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation +- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cases.py b/skills/us-cpa/src/us_cpa/cases.py new file mode 100644 index 0000000..9f8827d --- /dev/null +++ b/skills/us-cpa/src/us_cpa/cases.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import hashlib +import json +import shutil +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +CASE_SUBDIRECTORIES = ( + "input", + "extracted", + "return", + "output", + "reports", + "issues", + "sources", +) + + +def _timestamp() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _sha256_path(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(65536), b""): + digest.update(chunk) + return digest.hexdigest() + + +class CaseConflictError(Exception): + def __init__(self, issue: dict[str, Any]) -> None: + super().__init__(issue["message"]) + self.issue = issue + + +@dataclass +class CaseManager: + case_dir: Path + + def __post_init__(self) -> None: + self.case_dir = self.case_dir.expanduser().resolve() + + @property + def manifest_path(self) -> Path: + return self.case_dir / "case-manifest.json" + + @property + def facts_path(self) -> Path: + return self.case_dir / "extracted" / "facts.json" + + @property + def issues_path(self) -> Path: + return self.case_dir / "issues" / "open-issues.json" + + def create_case(self, *, case_label: str, tax_year: int) -> dict[str, Any]: + self.case_dir.mkdir(parents=True, exist_ok=True) + for name in CASE_SUBDIRECTORIES: + (self.case_dir / name).mkdir(exist_ok=True) + + manifest = { + "caseLabel": case_label, + "taxYear": tax_year, + "createdAt": _timestamp(), + "updatedAt": _timestamp(), + "status": "open", + "documents": [], + } + self.manifest_path.write_text(json.dumps(manifest, indent=2)) + if not self.facts_path.exists(): + self.facts_path.write_text(json.dumps({"facts": {}}, indent=2)) + if not self.issues_path.exists(): + self.issues_path.write_text(json.dumps({"issues": []}, indent=2)) + return manifest + + def load_manifest(self) -> dict[str, Any]: + return json.loads(self.manifest_path.read_text()) + + def _load_facts(self) -> dict[str, Any]: + return json.loads(self.facts_path.read_text()) + + def _write_manifest(self, manifest: dict[str, Any]) -> None: + manifest["updatedAt"] = _timestamp() + self.manifest_path.write_text(json.dumps(manifest, indent=2)) + + def _write_facts(self, facts: dict[str, Any]) -> None: + self.facts_path.write_text(json.dumps(facts, indent=2)) + + def _write_issue(self, issue: dict[str, Any]) -> None: + current = json.loads(self.issues_path.read_text()) + current["issues"].append(issue) + self.issues_path.write_text(json.dumps(current, indent=2)) + + def intake( + self, + *, + tax_year: int, + user_facts: dict[str, Any], + document_paths: list[Path], + ) -> dict[str, Any]: + manifest = self.load_manifest() + if manifest["taxYear"] != tax_year: + raise ValueError( + f"Case tax year {manifest['taxYear']} does not match requested tax year {tax_year}." + ) + + registered_documents = [] + for source_path in document_paths: + source_path = source_path.expanduser().resolve() + destination = self.case_dir / "input" / source_path.name + shutil.copy2(source_path, destination) + document_entry = { + "name": source_path.name, + "sourcePath": str(source_path), + "storedPath": str(destination), + "sha256": _sha256_path(destination), + "registeredAt": _timestamp(), + } + manifest["documents"].append(document_entry) + registered_documents.append(document_entry) + + facts_payload = self._load_facts() + for field, value in user_facts.items(): + existing = facts_payload["facts"].get(field) + if existing and existing["value"] != value: + issue = { + "status": "needs_resolution", + "issueType": "fact_conflict", + "field": field, + "existingValue": existing["value"], + "newValue": value, + "message": f"Conflicting values for {field}. Resolve before continuing.", + "createdAt": _timestamp(), + "taxYear": tax_year, + } + self._write_issue(issue) + raise CaseConflictError(issue) + + facts_payload["facts"][field] = { + "value": value, + "sourceType": "user_statement", + "capturedAt": _timestamp(), + } + + self._write_manifest(manifest) + self._write_facts(facts_payload) + return { + "status": "accepted", + "caseDir": str(self.case_dir), + "taxYear": tax_year, + "registeredDocuments": registered_documents, + "factCount": len(facts_payload["facts"]), + } diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 3fc8e1c..6d5b46f 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -6,6 +6,7 @@ import sys from pathlib import Path from typing import Any +from us_cpa.cases import CaseConflictError, CaseManager from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog COMMANDS = ( @@ -47,6 +48,12 @@ def _require_case_dir(args: argparse.Namespace) -> Path: return Path(args.case_dir).expanduser().resolve() +def _load_json_file(path_value: str | None) -> dict[str, Any]: + if not path_value: + return {} + return json.loads(Path(path_value).expanduser().resolve().read_text()) + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="us-cpa", @@ -74,6 +81,10 @@ def build_parser() -> argparse.ArgumentParser: "extract-docs", help="Extract facts from case documents." ) _add_common_arguments(extract_docs) + extract_docs.add_argument("--create-case", action="store_true") + extract_docs.add_argument("--case-label") + extract_docs.add_argument("--facts-json") + extract_docs.add_argument("--input-file", action="append", default=[]) render_forms = subparsers.add_parser( "render-forms", help="Render compiled IRS forms." @@ -103,7 +114,35 @@ def main(argv: list[str] | None = None) -> int: } return _emit(payload, args.format) - if args.command in {"prepare", "review", "extract-docs", "render-forms", "export-efile-ready"}: + if args.command == "extract-docs": + case_dir = _require_case_dir(args) + manager = CaseManager(case_dir) + if args.create_case: + if not args.case_label: + raise SystemExit("--case-label is required when --create-case is used.") + manager.create_case(case_label=args.case_label, tax_year=args.tax_year) + elif not manager.manifest_path.exists(): + raise SystemExit("Case manifest not found. Use --create-case for a new case.") + + try: + result = manager.intake( + tax_year=args.tax_year, + user_facts=_load_json_file(args.facts_json), + document_paths=[ + Path(path_value).expanduser().resolve() for path_value in args.input_file + ], + ) + except CaseConflictError as exc: + print(json.dumps(exc.issue, indent=2)) + return 1 + payload = { + "command": args.command, + "format": args.format, + **result, + } + return _emit(payload, args.format) + + if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}: case_dir = _require_case_dir(args) payload = { "command": args.command, diff --git a/skills/us-cpa/tests/test_cases.py b/skills/us-cpa/tests/test_cases.py new file mode 100644 index 0000000..71fa379 --- /dev/null +++ b/skills/us-cpa/tests/test_cases.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.cases import CaseConflictError, CaseManager + + +class CaseManagerTests(unittest.TestCase): + def test_create_case_builds_expected_layout(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + + manifest = manager.create_case(case_label="Jane Doe", tax_year=2025) + + self.assertEqual(manifest["caseLabel"], "Jane Doe") + self.assertEqual(manifest["taxYear"], 2025) + for name in ( + "input", + "extracted", + "return", + "output", + "reports", + "issues", + "sources", + ): + self.assertTrue((case_dir / name).is_dir()) + self.assertTrue((case_dir / "case-manifest.json").exists()) + + def test_intake_registers_documents_and_user_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + case_dir = root / "2025-jane-doe" + document = root / "w2.txt" + document.write_text("sample w2") + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + + result = manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single", "taxpayer.ssnLast4": "1234"}, + document_paths=[document], + ) + + self.assertEqual(result["status"], "accepted") + self.assertEqual(len(result["registeredDocuments"]), 1) + self.assertTrue((case_dir / "input" / "w2.txt").exists()) + facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) + self.assertEqual(facts["facts"]["filingStatus"]["value"], "single") + + def test_conflicting_facts_raise_structured_issue(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single"}, + document_paths=[], + ) + + with self.assertRaises(CaseConflictError) as context: + manager.intake( + tax_year=2025, + user_facts={"filingStatus": "married_filing_jointly"}, + document_paths=[], + ) + + issue = context.exception.issue + self.assertEqual(issue["status"], "needs_resolution") + self.assertEqual(issue["issueType"], "fact_conflict") + self.assertEqual(issue["field"], "filingStatus") + self.assertTrue((case_dir / "issues" / "open-issues.json").exists()) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index db4d765..942f027 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -4,6 +4,7 @@ import json import os import subprocess import sys +import tempfile import unittest from pathlib import Path @@ -66,6 +67,67 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertNotEqual(result.returncode, 0) self.assertIn("case directory", result.stderr.lower()) + def test_extract_docs_can_create_case_and_register_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text(json.dumps({"filingStatus": "single"})) + + result = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "accepted") + self.assertEqual(payload["factCount"], 1) + self.assertTrue((case_dir / "case-manifest.json").exists()) + + def test_extract_docs_stops_on_conflicts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + first_facts = Path(temp_dir) / "facts-1.json" + second_facts = Path(temp_dir) / "facts-2.json" + first_facts.write_text(json.dumps({"filingStatus": "single"})) + second_facts.write_text(json.dumps({"filingStatus": "married_filing_jointly"})) + + first = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(first_facts), + ) + self.assertEqual(first.returncode, 0, first.stderr) + + second = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--facts-json", + str(second_facts), + ) + self.assertNotEqual(second.returncode, 0) + payload = json.loads(second.stdout) + self.assertEqual(payload["status"], "needs_resolution") + self.assertEqual(payload["issueType"], "fact_conflict") + if __name__ == "__main__": unittest.main() From 8f797b3a5188bce6882aa12f18c376efb96ae241 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:17:14 -0500 Subject: [PATCH 04/14] feat: add us-cpa question engine --- docs/us-cpa.md | 21 ++++ skills/us-cpa/SKILL.md | 2 + skills/us-cpa/src/us_cpa/cli.py | 27 +++- skills/us-cpa/src/us_cpa/questions.py | 172 ++++++++++++++++++++++++++ skills/us-cpa/tests/test_cli.py | 65 +++++++++- skills/us-cpa/tests/test_questions.py | 75 +++++++++++ 6 files changed, 360 insertions(+), 2 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/questions.py create mode 100644 skills/us-cpa/tests/test_questions.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index e50b598..d500436 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -17,6 +17,7 @@ Tax logic, case workflows, rendering, and review logic are still pending. ```bash skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 @@ -98,9 +99,29 @@ Behavior: - JSON by default - markdown available with `--format markdown` +- `question` supports: + - `--style conversation` + - `--style memo` - `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"` - `fetch-year` emits a downloaded manifest location and source count +## Question Engine + +Current `question` implementation: + +- loads the cached tax-year corpus +- searches a small IRS-first topical rule set +- returns one canonical analysis object +- renders that analysis as: + - conversational output + - memo output +- marks questions outside the current topical rule set as requiring primary-law escalation + +Current implemented topics: + +- standard deduction +- Schedule C / sole proprietorship reporting trigger + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 6b9e841..f14247a 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -32,6 +32,7 @@ description: Use when answering U.S. federal individual tax questions, preparing ```bash skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json @@ -49,6 +50,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - JSON by default - markdown output available with `--format markdown` +- `question` supports `--style conversation|memo` - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 6d5b46f..1249c51 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any from us_cpa.cases import CaseConflictError, CaseManager +from us_cpa.questions import QuestionEngine, render_analysis, render_memo from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog COMMANDS = ( @@ -64,6 +65,7 @@ def build_parser() -> argparse.ArgumentParser: question = subparsers.add_parser("question", help="Answer a tax question.") _add_common_arguments(question) question.add_argument("--question", required=True) + question.add_argument("--style", choices=("conversation", "memo"), default="conversation") prepare = subparsers.add_parser("prepare", help="Prepare a return case.") _add_common_arguments(prepare) @@ -104,14 +106,37 @@ def main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) if args.command == "question": + corpus = TaxYearCorpus() + engine = QuestionEngine(corpus=corpus) + case_facts: dict[str, Any] = {} + if args.case_dir: + manager = CaseManager(Path(args.case_dir)) + if manager.facts_path.exists(): + case_facts = { + key: value["value"] + for key, value in json.loads(manager.facts_path.read_text())["facts"].items() + } + analysis = engine.answer( + question=args.question, + tax_year=args.tax_year, + case_facts=case_facts, + ) payload = { "command": "question", "format": args.format, + "style": args.style, "taxYear": args.tax_year, "caseDir": args.case_dir, "question": args.question, - "status": "not_implemented", + "status": "answered", + "analysis": analysis, } + payload["rendered"] = ( + render_memo(analysis) if args.style == "memo" else render_analysis(analysis) + ) + if args.format == "markdown": + print(payload["rendered"]) + return 0 return _emit(payload, args.format) if args.command == "extract-docs": diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py new file mode 100644 index 0000000..be4b103 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from us_cpa.sources import TaxYearCorpus + + +TOPIC_RULES = [ + { + "issue": "standard_deduction", + "keywords": ("standard deduction",), + "authority_slugs": ("i1040gi",), + "answer_by_status": { + "single": "$15,000", + "married_filing_jointly": "$30,000", + "head_of_household": "$22,500", + }, + "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.", + "confidence": "high", + }, + { + "issue": "schedule_c_required", + "keywords": ("schedule c", "sole proprietor", "self-employment"), + "authority_slugs": ("f1040sc", "i1040sc"), + "answer": "Schedule C is generally required when a taxpayer reports sole proprietorship business income or expenses.", + "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.", + "confidence": "medium", + }, +] + + +def _normalize_question(question: str) -> str: + return question.strip().lower() + + +def _filing_status_label(status: str) -> str: + return status.replace("_", " ").title() + + +@dataclass +class QuestionEngine: + corpus: TaxYearCorpus + + def _manifest(self, tax_year: int) -> dict[str, Any]: + path = self.corpus.paths_for_year(tax_year).manifest_path + if not path.exists(): + raise FileNotFoundError( + f"Tax year {tax_year} corpus not found at {path}. Run fetch-year first." + ) + return json.loads(path.read_text()) + + def _authorities_for(self, manifest: dict[str, Any], slugs: tuple[str, ...]) -> list[dict[str, Any]]: + found = [] + sources = {item["slug"]: item for item in manifest["sources"]} + for slug in slugs: + if slug in sources: + source = sources[slug] + found.append( + { + "slug": source["slug"], + "title": source["title"], + "sourceClass": source["sourceClass"], + "url": source["url"], + "localPath": source["localPath"], + "authorityRank": source["authorityRank"], + } + ) + return found + + def answer(self, *, question: str, tax_year: int, case_facts: dict[str, Any]) -> dict[str, Any]: + manifest = self._manifest(tax_year) + normalized = _normalize_question(question) + facts_used = [{"field": key, "value": value} for key, value in sorted(case_facts.items())] + + for rule in TOPIC_RULES: + if all(keyword in normalized for keyword in rule["keywords"]): + authorities = self._authorities_for(manifest, rule["authority_slugs"]) + if rule["issue"] == "standard_deduction": + filing_status = case_facts.get("filingStatus", "single") + answer = rule["answer_by_status"].get(filing_status, rule["answer_by_status"]["single"]) + summary = rule["summary_template"].format( + filing_status_label=_filing_status_label(filing_status), + answer=answer, + tax_year=tax_year, + ) + else: + answer = rule["answer"] + summary = rule["summary"] + + return { + "issue": rule["issue"], + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [], + "authorities": authorities, + "conclusion": {"answer": answer, "summary": summary}, + "confidence": rule["confidence"], + "followUpQuestions": [], + "primaryLawRequired": False, + } + + return { + "issue": "requires_primary_law_escalation", + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [ + "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently." + ], + "authorities": [], + "conclusion": { + "answer": "Insufficient IRS-form and instruction support for a confident answer.", + "summary": "This question needs primary-law analysis before a reliable answer can be given.", + }, + "confidence": "low", + "followUpQuestions": [ + "What facts drive the section-level issue?", + "Is there an existing return position or drafted treatment to review?", + ], + "primaryLawRequired": True, + } + + +def render_analysis(analysis: dict[str, Any]) -> str: + lines = [analysis["conclusion"]["summary"]] + if analysis["factsUsed"]: + facts = ", ".join(f"{item['field']}={item['value']}" for item in analysis["factsUsed"]) + lines.append(f"Facts used: {facts}.") + if analysis["authorities"]: + titles = "; ".join(item["title"] for item in analysis["authorities"]) + lines.append(f"Authorities: {titles}.") + if analysis["missingFacts"]: + lines.append(f"Open items: {' '.join(analysis['missingFacts'])}") + return " ".join(lines) + + +def render_memo(analysis: dict[str, Any]) -> str: + lines = [ + "# Tax Memo", + "", + f"## Issue\n{analysis['issue']}", + "", + "## Facts", + ] + if analysis["factsUsed"]: + for item in analysis["factsUsed"]: + lines.append(f"- {item['field']}: {item['value']}") + else: + lines.append("- No case-specific facts supplied.") + lines.extend(["", "## Authorities"]) + if analysis["authorities"]: + for authority in analysis["authorities"]: + lines.append(f"- {authority['title']}") + else: + lines.append("- Primary-law escalation required.") + lines.extend( + [ + "", + "## Analysis", + analysis["conclusion"]["summary"], + "", + "## Conclusion", + analysis["conclusion"]["answer"], + ] + ) + if analysis["missingFacts"]: + lines.extend(["", "## Open Items"]) + for item in analysis["missingFacts"]: + lines.append(f"- {item}") + return "\n".join(lines) diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 942f027..3d21a17 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -53,13 +53,40 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn(command, result.stdout) def test_question_command_emits_json_by_default(self) -> None: - result = self.run_cli("question", "--question", "What is the standard deduction?") + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) self.assertEqual(result.returncode, 0, result.stderr) payload = json.loads(result.stdout) self.assertEqual(payload["command"], "question") self.assertEqual(payload["format"], "json") self.assertEqual(payload["question"], "What is the standard deduction?") + self.assertEqual(payload["status"], "answered") + self.assertIn("analysis", payload) def test_prepare_requires_case_dir(self) -> None: result = self.run_cli("prepare", "--tax-year", "2025") @@ -128,6 +155,42 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertEqual(payload["status"], "needs_resolution") self.assertEqual(payload["issueType"], "fact_conflict") + def test_question_markdown_memo_mode_renders_tax_memo(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--format", + "markdown", + "--style", + "memo", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertIn("# Tax Memo", result.stdout) + self.assertIn("## Conclusion", result.stdout) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py new file mode 100644 index 0000000..e392af2 --- /dev/null +++ b/skills/us-cpa/tests/test_questions.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.questions import QuestionEngine, render_analysis, render_memo +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class QuestionEngineTests(unittest.TestCase): + def build_engine(self, temp_dir: str) -> QuestionEngine: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + + def fake_fetch(url: str) -> bytes: + return f"source for {url}".encode() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return QuestionEngine(corpus=corpus) + + def test_standard_deduction_question_returns_structured_analysis(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="What is the standard deduction for single filers?", + tax_year=2025, + case_facts={"filingStatus": "single"}, + ) + + self.assertEqual(analysis["issue"], "standard_deduction") + self.assertEqual(analysis["taxYear"], 2025) + self.assertEqual(analysis["conclusion"]["answer"], "$15,000") + self.assertEqual(analysis["confidence"], "high") + self.assertTrue(analysis["authorities"]) + self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") + + def test_complex_question_flags_primary_law_escalation(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Does section 469 passive activity loss limitation apply here?", + tax_year=2025, + case_facts={}, + ) + + self.assertEqual(analysis["confidence"], "low") + self.assertTrue(analysis["primaryLawRequired"]) + self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) + + def test_renderers_produce_conversation_and_memo(self) -> None: + analysis = { + "issue": "standard_deduction", + "taxYear": 2025, + "factsUsed": [{"field": "filingStatus", "value": "single"}], + "missingFacts": [], + "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], + "conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."}, + "confidence": "high", + "followUpQuestions": [], + "primaryLawRequired": False, + } + + conversation = render_analysis(analysis) + memo = render_memo(analysis) + + self.assertIn("$15,000", conversation) + self.assertIn("Issue", memo) + self.assertIn("Authorities", memo) + + +if __name__ == "__main__": + unittest.main() From c3c0d859081cfed56b1a927b78cbc90b88a5d3ae Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:23:47 -0500 Subject: [PATCH 05/14] feat: add us-cpa return model and calculations --- skills/us-cpa/src/us_cpa/returns.py | 122 ++++++++++++++++++++++++++++ skills/us-cpa/tests/test_returns.py | 48 +++++++++++ 2 files changed, 170 insertions(+) create mode 100644 skills/us-cpa/src/us_cpa/returns.py create mode 100644 skills/us-cpa/tests/test_returns.py diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py new file mode 100644 index 0000000..c952e6a --- /dev/null +++ b/skills/us-cpa/src/us_cpa/returns.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +STANDARD_DEDUCTION_2025 = { + "single": 15750.0, + "married_filing_jointly": 31500.0, + "head_of_household": 23625.0, +} + + +TAX_BRACKETS_2025 = { + "single": [ + (11925.0, 0.10), + (48475.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250525.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], + "married_filing_jointly": [ + (23850.0, 0.10), + (96950.0, 0.12), + (206700.0, 0.22), + (394600.0, 0.24), + (501050.0, 0.32), + (751600.0, 0.35), + (float("inf"), 0.37), + ], + "head_of_household": [ + (17000.0, 0.10), + (64850.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250500.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], +} + + +def _as_float(value: Any) -> float: + if value in (None, ""): + return 0.0 + return float(value) + + +def tax_on_ordinary_income(amount: float, filing_status: str) -> float: + taxable = max(0.0, amount) + brackets = TAX_BRACKETS_2025[filing_status] + lower = 0.0 + tax = 0.0 + for upper, rate in brackets: + if taxable <= lower: + break + portion = min(taxable, upper) - lower + tax += portion * rate + lower = upper + return round(tax, 2) + + +def resolve_required_forms(normalized: dict[str, Any]) -> list[str]: + forms = ["f1040"] + if normalized["income"]["taxableInterest"] > 1500: + forms.append("f1040sb") + if normalized["income"]["businessIncome"] != 0: + forms.extend(["f1040sc", "f1040se", "f1040s1"]) + return forms + + +def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]: + filing_status = facts.get("filingStatus", "single") + wages = _as_float(facts.get("wages")) + interest = _as_float(facts.get("taxableInterest")) + business_income = _as_float(facts.get("businessIncome")) + withholding = _as_float(facts.get("federalWithholding")) + + adjusted_gross_income = wages + interest + business_income + standard_deduction = STANDARD_DEDUCTION_2025[filing_status] + taxable_income = max(0.0, adjusted_gross_income - standard_deduction) + income_tax = tax_on_ordinary_income(taxable_income, filing_status) + self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2) + total_tax = round(income_tax + self_employment_tax, 2) + total_payments = withholding + refund = round(max(0.0, total_payments - total_tax), 2) + balance_due = round(max(0.0, total_tax - total_payments), 2) + + normalized = { + "taxYear": tax_year, + "taxpayer": { + "fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"), + }, + "filingStatus": filing_status, + "income": { + "wages": wages, + "taxableInterest": interest, + "businessIncome": business_income, + }, + "payments": { + "federalWithholding": withholding, + }, + "deductions": { + "standardDeduction": standard_deduction, + }, + "taxes": { + "incomeTax": income_tax, + "selfEmploymentTax": self_employment_tax, + "totalTax": total_tax, + }, + "totals": { + "adjustedGrossIncome": round(adjusted_gross_income, 2), + "taxableIncome": round(taxable_income, 2), + "totalPayments": round(total_payments, 2), + "refund": refund, + "balanceDue": balance_due, + }, + } + normalized["requiredForms"] = resolve_required_forms(normalized) + return normalized diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py new file mode 100644 index 0000000..2541e2c --- /dev/null +++ b/skills/us-cpa/tests/test_returns.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import unittest + +from us_cpa.returns import normalize_case_facts, resolve_required_forms, tax_on_ordinary_income + + +class ReturnModelTests(unittest.TestCase): + def test_normalize_case_facts_computes_basic_1040_totals(self) -> None: + normalized = normalize_case_facts( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + 2025, + ) + + self.assertEqual(normalized["requiredForms"], ["f1040"]) + self.assertEqual(normalized["deductions"]["standardDeduction"], 15750.0) + self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0) + self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0) + self.assertEqual(normalized["totals"]["refund"], 2116.5) + + def test_resolve_required_forms_adds_business_and_interest_forms(self) -> None: + normalized = normalize_case_facts( + { + "filingStatus": "single", + "wages": 0, + "taxableInterest": 2000, + "businessIncome": 12000, + }, + 2025, + ) + + self.assertEqual( + resolve_required_forms(normalized), + ["f1040", "f1040sb", "f1040sc", "f1040se", "f1040s1"], + ) + + def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None: + self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5) + + +if __name__ == "__main__": + unittest.main() From decf3132d5ac3b71ec8912943f6a69220a14ff0b Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:26:29 -0500 Subject: [PATCH 06/14] feat: add us-cpa pdf renderer --- docs/us-cpa.md | 16 +++++ skills/us-cpa/SKILL.md | 1 + skills/us-cpa/src/us_cpa/renderers.py | 88 +++++++++++++++++++++++++++ skills/us-cpa/tests/test_renderers.py | 53 ++++++++++++++++ 4 files changed, 158 insertions(+) create mode 100644 skills/us-cpa/src/us_cpa/renderers.py create mode 100644 skills/us-cpa/tests/test_renderers.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index d500436..df8f9e1 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -122,6 +122,22 @@ Current implemented topics: - standard deduction - Schedule C / sole proprietorship reporting trigger +## Form Rendering + +Current rendering path: + +- official IRS PDFs from the cached tax-year corpus +- overlay rendering onto those official PDFs using `reportlab` + `pypdf` +- artifact manifest written to `output/artifacts.json` + +Current rendered form support: + +- Form 1040 overlay artifact generation + +Current review rule: + +- overlay-rendered artifacts are marked `reviewRequired: true` + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index f14247a..8c39ebe 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -54,5 +54,6 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict +- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py new file mode 100644 index 0000000..440d8de --- /dev/null +++ b/skills/us-cpa/src/us_cpa/renderers.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import json +from io import BytesIO +from pathlib import Path +from typing import Any + +from pypdf import PdfReader, PdfWriter +from reportlab.pdfgen import canvas + +from us_cpa.sources import TaxYearCorpus + + +FORM_TEMPLATES = { + "f1040": "f1040", + "f1040sb": "f1040sb", + "f1040sc": "f1040sc", + "f1040se": "f1040se", + "f1040s1": "f1040s1", +} + + +OVERLAY_FIELDS = { + "f1040": [ + (72, 725, lambda data: f"Taxpayer: {data['taxpayer']['fullName']}"), + (72, 705, lambda data: f"Filing status: {data['filingStatus']}"), + (72, 685, lambda data: f"Wages: {data['income']['wages']:.2f}"), + (72, 665, lambda data: f"Taxable interest: {data['income']['taxableInterest']:.2f}"), + (72, 645, lambda data: f"AGI: {data['totals']['adjustedGrossIncome']:.2f}"), + (72, 625, lambda data: f"Standard deduction: {data['deductions']['standardDeduction']:.2f}"), + (72, 605, lambda data: f"Taxable income: {data['totals']['taxableIncome']:.2f}"), + (72, 585, lambda data: f"Total tax: {data['taxes']['totalTax']:.2f}"), + (72, 565, lambda data: f"Withholding: {data['payments']['federalWithholding']:.2f}"), + (72, 545, lambda data: f"Refund: {data['totals']['refund']:.2f}"), + (72, 525, lambda data: f"Balance due: {data['totals']['balanceDue']:.2f}"), + ], +} + + +def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None: + reader = PdfReader(str(template_path)) + writer = PdfWriter(clone_from=str(template_path)) + + page = writer.pages[0] + width = float(page.mediabox.width) + height = float(page.mediabox.height) + buffer = BytesIO() + pdf = canvas.Canvas(buffer, pagesize=(width, height)) + for x, y, getter in OVERLAY_FIELDS.get(form_code, []): + pdf.drawString(x, y, getter(normalized)) + pdf.save() + buffer.seek(0) + overlay = PdfReader(buffer) + page.merge_page(overlay.pages[0]) + with output_path.open("wb") as handle: + writer.write(handle) + + +def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[str, Any]) -> dict[str, Any]: + output_dir = case_dir / "output" / "forms" + output_dir.mkdir(parents=True, exist_ok=True) + irs_dir = corpus.paths_for_year(normalized["taxYear"]).irs_dir + + artifacts = [] + for form_code in normalized["requiredForms"]: + template_slug = FORM_TEMPLATES.get(form_code) + if template_slug is None: + continue + template_path = irs_dir / f"{template_slug}.pdf" + output_path = output_dir / f"{form_code}.pdf" + _overlay_page(template_path, output_path, form_code, normalized) + artifacts.append( + { + "formCode": form_code, + "templatePath": str(template_path), + "outputPath": str(output_path), + "renderMethod": "overlay", + "reviewRequired": True, + } + ) + + artifact_manifest = { + "taxYear": normalized["taxYear"], + "artifactCount": len(artifacts), + "artifacts": artifacts, + } + (case_dir / "output" / "artifacts.json").write_text(json.dumps(artifact_manifest, indent=2)) + return artifact_manifest diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py new file mode 100644 index 0000000..33f5358 --- /dev/null +++ b/skills/us-cpa/tests/test_renderers.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.renderers import render_case_forms +from us_cpa.sources import TaxYearCorpus + + +class RendererTests(unittest.TestCase): + def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, "Template") + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0}, + "taxes": {"totalTax": 3883.5}, + "payments": {"federalWithholding": 6000.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifactCount"], 1) + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "overlay") + self.assertTrue(artifacts["artifacts"][0]["reviewRequired"]) + self.assertTrue((case_dir / "output" / "forms" / "f1040.pdf").exists()) + manifest = json.loads((case_dir / "output" / "artifacts.json").read_text()) + self.assertEqual(manifest["artifacts"][0]["formCode"], "f1040") + + +if __name__ == "__main__": + unittest.main() From 82cf3d9010d1069374357d868567af6450fcc631 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:28:22 -0500 Subject: [PATCH 07/14] feat: add us-cpa preparation workflow --- docs/us-cpa.md | 48 +++++++++++++++-- skills/us-cpa/SKILL.md | 3 ++ skills/us-cpa/src/us_cpa/cli.py | 36 ++++++++++++- skills/us-cpa/src/us_cpa/prepare.py | 75 +++++++++++++++++++++++++++ skills/us-cpa/src/us_cpa/questions.py | 6 +-- skills/us-cpa/tests/test_cli.py | 70 +++++++++++++++++++++++++ skills/us-cpa/tests/test_prepare.py | 75 +++++++++++++++++++++++++++ skills/us-cpa/tests/test_questions.py | 6 +-- 8 files changed, 309 insertions(+), 10 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/prepare.py create mode 100644 skills/us-cpa/tests/test_prepare.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index df8f9e1..0a5b7a1 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -4,14 +4,18 @@ ## Current Milestone -Milestone 2 now adds the first tax-year corpus layer: +Current implementation now includes: - deterministic cache layout under `~/.cache/us-cpa` by default - `fetch-year` download flow for the bootstrap IRS corpus - source manifest with URL, hash, authority rank, and local path traceability - authority ranking hooks for IRS materials and future primary-law escalation +- case-folder intake and conflict-stop handling +- question workflow with conversation and memo output +- prepare workflow for the current supported 1040 subset +- e-file-ready draft export payload generation -Tax logic, case workflows, rendering, and review logic are still pending. +Review logic and broader form coverage are still pending. ## CLI Surface @@ -102,7 +106,10 @@ Behavior: - `question` supports: - `--style conversation` - `--style memo` -- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"` +- `question` emits answered analysis output +- `prepare` emits a prepared return package summary +- `export-efile-ready` emits a draft e-file-ready payload +- `review` is still scaffolded - `fetch-year` emits a downloaded manifest location and source count ## Question Engine @@ -138,6 +145,41 @@ Current review rule: - overlay-rendered artifacts are marked `reviewRequired: true` +## Preparation Workflow + +Current `prepare` implementation: + +- loads case facts from `extracted/facts.json` +- normalizes them into the current supported 2025 federal return model +- computes the current supported 1040 subset +- resolves required forms for the current supported subset +- writes: + - `return/normalized-return.json` + - `output/artifacts.json` + - `reports/prepare-summary.json` + +Current supported calculation inputs: + +- `filingStatus` +- `wages` +- `taxableInterest` +- `businessIncome` +- `federalWithholding` + +## E-file-ready Export + +`export-efile-ready` writes: + +- `output/efile-ready.json` + +Current export behavior: + +- draft-only +- includes required forms +- includes refund or balance due summary +- includes attachment manifest +- includes unresolved issues + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 8c39ebe..8b34301 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json ``` @@ -55,5 +56,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict - rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review +- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory +- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 1249c51..ee1a471 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any from us_cpa.cases import CaseConflictError, CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms from us_cpa.questions import QuestionEngine, render_analysis, render_memo from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog @@ -167,7 +168,40 @@ def main(argv: list[str] | None = None) -> int: } return _emit(payload, args.format) - if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}: + if args.command == "prepare": + case_dir = _require_case_dir(args) + payload = { + "command": args.command, + "format": args.format, + **PrepareEngine().prepare_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "render-forms": + case_dir = _require_case_dir(args) + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = render_case_forms(case_dir, TaxYearCorpus(), normalized) + payload = { + "command": "render-forms", + "format": args.format, + "taxYear": manifest["taxYear"], + "status": "rendered", + **artifacts, + } + return _emit(payload, args.format) + + if args.command == "export-efile-ready": + case_dir = _require_case_dir(args) + payload = { + "command": "export-efile-ready", + "format": args.format, + **EfileExporter().export_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "review": case_dir = _require_case_dir(args) payload = { "command": args.command, diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py new file mode 100644 index 0000000..72ef8cd --- /dev/null +++ b/skills/us-cpa/src/us_cpa/prepare.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from us_cpa.cases import CaseManager +from us_cpa.renderers import render_case_forms +from us_cpa.returns import normalize_case_facts +from us_cpa.sources import TaxYearCorpus + + +def _load_case_facts(case_dir: Path) -> dict[str, Any]: + facts_path = case_dir / "extracted" / "facts.json" + payload = json.loads(facts_path.read_text()) + return {key: value["value"] for key, value in payload["facts"].items()} + + + +class PrepareEngine: + def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None: + self.corpus = corpus or TaxYearCorpus() + + def prepare_case(self, case_dir: Path) -> dict[str, Any]: + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + facts = _load_case_facts(manager.case_dir) + normalized = normalize_case_facts(facts, manifest["taxYear"]) + normalized_path = manager.case_dir / "return" / "normalized-return.json" + normalized_path.write_text(json.dumps(normalized, indent=2)) + + artifacts = render_case_forms(manager.case_dir, self.corpus, normalized) + unresolved_issues = json.loads(manager.issues_path.read_text())["issues"] + + summary = { + "requiredForms": normalized["requiredForms"], + "reviewRequiredArtifacts": [ + artifact["formCode"] for artifact in artifacts["artifacts"] if artifact["reviewRequired"] + ], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + "unresolvedIssueCount": len(unresolved_issues), + } + result = { + "status": "prepared", + "caseDir": str(manager.case_dir), + "taxYear": manifest["taxYear"], + "normalizedReturnPath": str(normalized_path), + "artifactManifestPath": str(manager.case_dir / "output" / "artifacts.json"), + "summary": summary, + } + (manager.case_dir / "reports" / "prepare-summary.json").write_text(json.dumps(result, indent=2)) + return result + + +class EfileExporter: + def export_case(self, case_dir: Path) -> dict[str, Any]: + case_dir = Path(case_dir).expanduser().resolve() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = json.loads((case_dir / "output" / "artifacts.json").read_text()) + issues = json.loads((case_dir / "issues" / "open-issues.json").read_text())["issues"] + payload = { + "status": "draft" if issues or any(a["reviewRequired"] for a in artifacts["artifacts"]) else "ready", + "taxYear": normalized["taxYear"], + "returnSummary": { + "requiredForms": normalized["requiredForms"], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + }, + "attachments": artifacts["artifacts"], + "unresolvedIssues": issues, + } + output_path = case_dir / "output" / "efile-ready.json" + output_path.write_text(json.dumps(payload, indent=2)) + return payload diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py index be4b103..3c65a43 100644 --- a/skills/us-cpa/src/us_cpa/questions.py +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -14,9 +14,9 @@ TOPIC_RULES = [ "keywords": ("standard deduction",), "authority_slugs": ("i1040gi",), "answer_by_status": { - "single": "$15,000", - "married_filing_jointly": "$30,000", - "head_of_household": "$22,500", + "single": "$15,750", + "married_filing_jointly": "$31,500", + "head_of_household": "$23,625", }, "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.", "confidence": "high", diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 3d21a17..32b458b 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -191,6 +191,76 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn("# Tax Memo", result.stdout) self.assertIn("## Conclusion", result.stdout) + def test_prepare_command_generates_return_package(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache") + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text( + json.dumps( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + } + ) + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "prepare", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "prepared") + self.assertEqual(payload["summary"]["requiredForms"], ["f1040"]) + self.assertTrue((case_dir / "output" / "artifacts.json").exists()) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_prepare.py b/skills/us-cpa/tests/test_prepare.py new file mode 100644 index 0000000..bba2e91 --- /dev/null +++ b/skills/us-cpa/tests/test_prepare.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.cases import CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class PrepareEngineTests(unittest.TestCase): + def build_case(self, temp_dir: str) -> tuple[CaseManager, TaxYearCorpus]: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + document_paths=[], + ) + + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + + def fake_fetch(url: str) -> bytes: + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, f"Template for {url}") + pdf.save() + return buffer.getvalue() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return manager, corpus + + def test_prepare_creates_normalized_return_and_artifacts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + + result = engine.prepare_case(manager.case_dir) + + self.assertEqual(result["status"], "prepared") + self.assertEqual(result["summary"]["requiredForms"], ["f1040"]) + self.assertEqual(result["summary"]["reviewRequiredArtifacts"], ["f1040"]) + self.assertTrue((manager.case_dir / "return" / "normalized-return.json").exists()) + self.assertTrue((manager.case_dir / "output" / "artifacts.json").exists()) + normalized = json.loads((manager.case_dir / "return" / "normalized-return.json").read_text()) + self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0) + self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0) + + def test_exporter_writes_efile_ready_payload(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + engine.prepare_case(manager.case_dir) + + export = EfileExporter().export_case(manager.case_dir) + + self.assertEqual(export["status"], "draft") + self.assertTrue((manager.case_dir / "output" / "efile-ready.json").exists()) + self.assertEqual(export["returnSummary"]["requiredForms"], ["f1040"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py index e392af2..b2fceef 100644 --- a/skills/us-cpa/tests/test_questions.py +++ b/skills/us-cpa/tests/test_questions.py @@ -31,7 +31,7 @@ class QuestionEngineTests(unittest.TestCase): self.assertEqual(analysis["issue"], "standard_deduction") self.assertEqual(analysis["taxYear"], 2025) - self.assertEqual(analysis["conclusion"]["answer"], "$15,000") + self.assertEqual(analysis["conclusion"]["answer"], "$15,750") self.assertEqual(analysis["confidence"], "high") self.assertTrue(analysis["authorities"]) self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") @@ -57,7 +57,7 @@ class QuestionEngineTests(unittest.TestCase): "factsUsed": [{"field": "filingStatus", "value": "single"}], "missingFacts": [], "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], - "conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."}, + "conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."}, "confidence": "high", "followUpQuestions": [], "primaryLawRequired": False, @@ -66,7 +66,7 @@ class QuestionEngineTests(unittest.TestCase): conversation = render_analysis(analysis) memo = render_memo(analysis) - self.assertIn("$15,000", conversation) + self.assertIn("$15,750", conversation) self.assertIn("Issue", memo) self.assertIn("Authorities", memo) From 10a9d40f1dce870a4bb00116e4b92964a058b651 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:31:43 -0500 Subject: [PATCH 08/14] feat: add us-cpa review workflow --- docs/us-cpa.md | 17 ++++- skills/us-cpa/SKILL.md | 2 + skills/us-cpa/src/us_cpa/cli.py | 18 +++-- skills/us-cpa/src/us_cpa/review.py | 111 +++++++++++++++++++++++++++++ skills/us-cpa/tests/test_cli.py | 88 +++++++++++++++++++++++ skills/us-cpa/tests/test_review.py | 90 +++++++++++++++++++++++ 6 files changed, 321 insertions(+), 5 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/review.py create mode 100644 skills/us-cpa/tests/test_review.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index 0a5b7a1..b5416dd 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -109,7 +109,7 @@ Behavior: - `question` emits answered analysis output - `prepare` emits a prepared return package summary - `export-efile-ready` emits a draft e-file-ready payload -- `review` is still scaffolded +- `review` emits a findings-first review result - `fetch-year` emits a downloaded manifest location and source count ## Question Engine @@ -180,6 +180,21 @@ Current export behavior: - includes attachment manifest - includes unresolved issues +## Review Workflow + +Current `review` implementation: + +- recomputes the return from current case facts +- compares stored normalized return values to recomputed values +- checks whether required rendered artifacts are present +- flags overlay-rendered artifacts as requiring human review +- sorts findings by severity + +Current render modes: + +- `--style conversation` +- `--style memo` + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 8b34301..a8d73f5 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -36,6 +36,7 @@ skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --style memo --format markdown skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json ``` @@ -58,5 +59,6 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review - `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory - `export-efile-ready` writes a draft transmission-ready payload without transmitting anything +- `review` recomputes the return from case facts, checks artifacts, and returns findings-first output in conversation or memo style For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index ee1a471..71f3602 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -9,6 +9,7 @@ from typing import Any from us_cpa.cases import CaseConflictError, CaseManager from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms from us_cpa.questions import QuestionEngine, render_analysis, render_memo +from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog COMMANDS = ( @@ -73,6 +74,7 @@ def build_parser() -> argparse.ArgumentParser: review = subparsers.add_parser("review", help="Review a return case.") _add_common_arguments(review) + review.add_argument("--style", choices=("conversation", "memo"), default="conversation") fetch_year = subparsers.add_parser( "fetch-year", help="Fetch tax-year forms and instructions." @@ -203,13 +205,21 @@ def main(argv: list[str] | None = None) -> int: if args.command == "review": case_dir = _require_case_dir(args) + review_payload = ReviewEngine().review_case(case_dir) payload = { - "command": args.command, + "command": "review", "format": args.format, - "taxYear": args.tax_year, - "caseDir": str(case_dir), - "status": "not_implemented", + "style": args.style, + **review_payload, } + payload["rendered"] = ( + render_review_memo(review_payload) + if args.style == "memo" + else render_review_summary(review_payload) + ) + if args.format == "markdown": + print(payload["rendered"]) + return 0 return _emit(payload, args.format) if args.command == "fetch-year": diff --git a/skills/us-cpa/src/us_cpa/review.py b/skills/us-cpa/src/us_cpa/review.py new file mode 100644 index 0000000..82576b9 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/review.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from us_cpa.returns import normalize_case_facts +from us_cpa.sources import TaxYearCorpus + + +def _severity_rank(severity: str) -> int: + return {"high": 0, "medium": 1, "low": 2}[severity] + + +class ReviewEngine: + def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None: + self.corpus = corpus or TaxYearCorpus() + + def review_case(self, case_dir: Path) -> dict[str, Any]: + case_dir = Path(case_dir).expanduser().resolve() + manifest = json.loads((case_dir / "case-manifest.json").read_text()) + stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text()) + facts = {key: value["value"] for key, value in facts_payload["facts"].items()} + recomputed = normalize_case_facts(facts, manifest["taxYear"]) + artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text()) + + findings: list[dict[str, Any]] = [] + if stored_return["totals"]["adjustedGrossIncome"] != recomputed["totals"]["adjustedGrossIncome"]: + findings.append( + { + "severity": "high", + "title": "Adjusted gross income mismatch", + "explanation": "Stored adjusted gross income does not match the recomputed return from case facts.", + "suggestedAction": f"Update AGI to {recomputed['totals']['adjustedGrossIncome']:.2f} on Form 1040 line 11.", + "authorities": [ + {"title": "Instructions for Form 1040 and Schedules 1-3", "sourceClass": "irs_instructions"} + ], + } + ) + + rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]} + for required_form in recomputed["requiredForms"]: + if required_form not in rendered_forms: + findings.append( + { + "severity": "high", + "title": f"Missing rendered artifact for {required_form}", + "explanation": "The return requires this form, but no rendered artifact is present in the artifact manifest.", + "suggestedAction": f"Render and review {required_form} before treating the package as complete.", + "authorities": [{"title": "Supported form manifest", "sourceClass": "irs_form"}], + } + ) + + for artifact in artifacts_payload["artifacts"]: + if artifact.get("reviewRequired"): + findings.append( + { + "severity": "medium", + "title": f"Human review required for {artifact['formCode']}", + "explanation": "The form was overlay-rendered on the official IRS PDF and must be reviewed before filing.", + "suggestedAction": f"Review the rendered {artifact['formCode']} artifact visually before any filing/export handoff.", + "authorities": [{"title": "Artifact render policy", "sourceClass": "irs_form"}], + } + ) + + findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"])) + review = { + "status": "reviewed", + "taxYear": manifest["taxYear"], + "caseDir": str(case_dir), + "findingCount": len(findings), + "findings": findings, + } + (case_dir / "reports" / "review-report.json").write_text(json.dumps(review, indent=2)) + return review + + +def render_review_summary(review: dict[str, Any]) -> str: + if not review["findings"]: + return "No findings detected in the reviewed return package." + lines = ["Review findings:"] + for finding in review["findings"]: + lines.append(f"- [{finding['severity'].upper()}] {finding['title']}: {finding['explanation']}") + return "\n".join(lines) + + +def render_review_memo(review: dict[str, Any]) -> str: + lines = ["# Review Memo", ""] + if not review["findings"]: + lines.append("No findings detected.") + return "\n".join(lines) + for index, finding in enumerate(review["findings"], start=1): + lines.extend( + [ + f"## Finding {index}: {finding['title']}", + f"Severity: {finding['severity']}", + "", + "### Explanation", + finding["explanation"], + "", + "### Suggested correction", + finding["suggestedAction"], + "", + "### Authorities", + ] + ) + for authority in finding["authorities"]: + lines.append(f"- {authority['title']}") + lines.append("") + return "\n".join(lines).rstrip() diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 32b458b..b8e7a4f 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -261,6 +261,94 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertEqual(payload["summary"]["requiredForms"], ["f1040"]) self.assertTrue((case_dir / "output" / "artifacts.json").exists()) + def test_review_command_returns_findings(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache") + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text( + json.dumps( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + } + ) + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "prepare", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["totals"]["adjustedGrossIncome"] = 99999.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "review", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "reviewed") + self.assertEqual(payload["findingCount"], 2) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_review.py b/skills/us-cpa/tests/test_review.py new file mode 100644 index 0000000..c46014d --- /dev/null +++ b/skills/us-cpa/tests/test_review.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.cases import CaseManager +from us_cpa.prepare import PrepareEngine +from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class ReviewEngineTests(unittest.TestCase): + def build_prepared_case(self, temp_dir: str) -> tuple[Path, TaxYearCorpus]: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + document_paths=[], + ) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + + def fake_fetch(url: str) -> bytes: + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, f"Template for {url}") + pdf.save() + return buffer.getvalue() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + PrepareEngine(corpus=corpus).prepare_case(case_dir) + return case_dir, corpus + + def test_review_detects_mismatched_return_and_missing_artifacts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["totals"]["adjustedGrossIncome"] = 99999.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + artifacts_path = case_dir / "output" / "artifacts.json" + artifacts = json.loads(artifacts_path.read_text()) + artifacts["artifacts"] = [] + artifacts["artifactCount"] = 0 + artifacts_path.write_text(json.dumps(artifacts, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertEqual(review["status"], "reviewed") + self.assertEqual(review["findings"][0]["severity"], "high") + self.assertIn("adjusted gross income", review["findings"][0]["title"].lower()) + self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"])) + + def test_review_renderers_produce_summary_and_memo(self) -> None: + review = { + "status": "reviewed", + "findings": [ + { + "severity": "high", + "title": "Adjusted gross income mismatch", + "explanation": "Stored AGI does not match recomputed AGI.", + "suggestedAction": "Update Form 1040 line 11.", + "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], + } + ], + } + + summary = render_review_summary(review) + memo = render_review_memo(review) + + self.assertIn("Adjusted gross income mismatch", summary) + self.assertIn("# Review Memo", memo) + self.assertIn("Suggested correction", memo) + + +if __name__ == "__main__": + unittest.main() From d3fd874330d363e490bce64ff2f423ac35d4d49f Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:34:14 -0500 Subject: [PATCH 09/14] docs: finalize us-cpa integration and fixtures --- README.md | 1 + docs/README.md | 1 + skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json | 6 ++++++ .../us-cpa/tests/fixtures/facts/review-mismatch-2025.json | 8 ++++++++ skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json | 6 ++++++ .../tests/fixtures/facts/simple-w2-interest-2025.json | 7 +++++++ 6 files changed, 29 insertions(+) create mode 100644 skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json create mode 100644 skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json create mode 100644 skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json create mode 100644 skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json diff --git a/README.md b/README.md index e4710fa..b050c14 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ This repository contains practical OpenClaw skills and companion integrations. I | `nordvpn-client` | Install, log in to, connect, disconnect, and verify NordVPN sessions across Linux CLI and macOS NordLynx/WireGuard backends. | `skills/nordvpn-client` | | `portainer` | Manage Portainer stacks via API (list, start/stop/restart, update, prune images). | `skills/portainer` | | `searxng` | Search through a local or self-hosted SearXNG instance for web, news, images, and more. | `skills/searxng` | +| `us-cpa` | Federal individual 1040 workflow for tax questions, case intake, preparation, review, and draft e-file-ready export. | `skills/us-cpa` | | `web-automation` | One-shot extraction plus broader browsing/scraping with Playwright-compatible CloakBrowser (auth flows, extraction, bot-protected sites). | `skills/web-automation` | ## Integrations diff --git a/docs/README.md b/docs/README.md index 339baf3..68757c5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,6 +9,7 @@ This folder contains detailed docs for each skill in this repository. - [`nordvpn-client`](nordvpn-client.md) — Cross-platform NordVPN install, login, connect, disconnect, and verification with Linux CLI and macOS NordLynx/WireGuard support - [`portainer`](portainer.md) — Portainer stack management (list, lifecycle, updates, image pruning) - [`searxng`](searxng.md) — Privacy-respecting metasearch via a local or self-hosted SearXNG instance +- [`us-cpa`](us-cpa.md) — Federal individual 1040 workflow for tax questions, case intake, preparation, review, and draft e-file-ready export - [`web-automation`](web-automation.md) — One-shot extraction plus Playwright-compatible CloakBrowser browser automation and scraping ## Integrations diff --git a/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json b/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json new file mode 100644 index 0000000..40fb20c --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json @@ -0,0 +1,6 @@ +{ + "taxpayer.fullName": "Olivia Overlay", + "filingStatus": "single", + "wages": 42000, + "federalWithholding": 5000 +} diff --git a/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json b/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json new file mode 100644 index 0000000..05b0809 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json @@ -0,0 +1,8 @@ +{ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + "expectedIssue": "agi_mismatch" +} diff --git a/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json b/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json new file mode 100644 index 0000000..7c50db4 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json @@ -0,0 +1,6 @@ +{ + "taxpayer.fullName": "Jamie Owner", + "filingStatus": "single", + "businessIncome": 12000, + "federalWithholding": 0 +} diff --git a/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json b/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json new file mode 100644 index 0000000..2a2c367 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json @@ -0,0 +1,7 @@ +{ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000 +} From 6c02e0b7c6fc435e89eba08f1727bad064708fa9 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 02:47:14 -0500 Subject: [PATCH 10/14] fix: add us-cpa tax year rules and package metadata --- docs/us-cpa.md | 18 +++++- skills/us-cpa/README.md | 45 +++++++++++++ skills/us-cpa/pyproject.toml | 10 ++- skills/us-cpa/src/us_cpa/questions.py | 14 ++++ skills/us-cpa/src/us_cpa/returns.py | 48 ++------------ skills/us-cpa/src/us_cpa/tax_years.py | 93 +++++++++++++++++++++++++++ skills/us-cpa/tests/test_cli.py | 18 ++++++ skills/us-cpa/tests/test_questions.py | 3 + skills/us-cpa/tests/test_returns.py | 7 ++ 9 files changed, 211 insertions(+), 45 deletions(-) create mode 100644 skills/us-cpa/README.md create mode 100644 skills/us-cpa/src/us_cpa/tax_years.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index b5416dd..66dc9eb 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -2,6 +2,21 @@ `us-cpa` is a Python CLI plus OpenClaw skill wrapper for U.S. federal individual tax work. +## Standalone package usage + +From `skills/us-cpa/`: + +```bash +pip install -e .[dev] +us-cpa --help +``` + +Without installing, the repo-local wrapper works directly: + +```bash +skills/us-cpa/scripts/us-cpa --help +``` + ## Current Milestone Current implementation now includes: @@ -13,10 +28,9 @@ Current implementation now includes: - case-folder intake and conflict-stop handling - question workflow with conversation and memo output - prepare workflow for the current supported 1040 subset +- review workflow with findings-first output - e-file-ready draft export payload generation -Review logic and broader form coverage are still pending. - ## CLI Surface ```bash diff --git a/skills/us-cpa/README.md b/skills/us-cpa/README.md new file mode 100644 index 0000000..54a3140 --- /dev/null +++ b/skills/us-cpa/README.md @@ -0,0 +1,45 @@ +# us-cpa package + +Standalone Python CLI package for the `us-cpa` skill. + +## Install + +From `skills/us-cpa/`: + +```bash +pip install -e .[dev] +``` + +## Run + +Installed entry point: + +```bash +us-cpa --help +``` + +Repo-local wrapper without installation: + +```bash +scripts/us-cpa --help +``` + +Module execution: + +```bash +python3 -m us_cpa.cli --help +``` + +## Tests + +From `skills/us-cpa/`: + +```bash +PYTHONPATH=src python3 -m unittest +``` + +Or with the dev extra installed: + +```bash +python -m unittest +``` diff --git a/skills/us-cpa/pyproject.toml b/skills/us-cpa/pyproject.toml index eb9a667..a448842 100644 --- a/skills/us-cpa/pyproject.toml +++ b/skills/us-cpa/pyproject.toml @@ -7,7 +7,15 @@ name = "us-cpa" version = "0.1.0" description = "US federal individual tax workflow CLI for questions, preparation, and review." requires-python = ">=3.9" -dependencies = [] +dependencies = [ + "pypdf>=5.0.0", + "reportlab>=4.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", +] [project.scripts] us-cpa = "us_cpa.cli:main" diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py index 3c65a43..1ee9554 100644 --- a/skills/us-cpa/src/us_cpa/questions.py +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -32,6 +32,13 @@ TOPIC_RULES = [ ] +RISK_BY_CONFIDENCE = { + "high": "low", + "medium": "medium", + "low": "high", +} + + def _normalize_question(question: str) -> str: return question.strip().lower() @@ -98,6 +105,7 @@ class QuestionEngine: "authorities": authorities, "conclusion": {"answer": answer, "summary": summary}, "confidence": rule["confidence"], + "riskLevel": RISK_BY_CONFIDENCE[rule["confidence"]], "followUpQuestions": [], "primaryLawRequired": False, } @@ -115,6 +123,7 @@ class QuestionEngine: "summary": "This question needs primary-law analysis before a reliable answer can be given.", }, "confidence": "low", + "riskLevel": "high", "followUpQuestions": [ "What facts drive the section-level issue?", "Is there an existing return position or drafted treatment to review?", @@ -125,6 +134,9 @@ class QuestionEngine: def render_analysis(analysis: dict[str, Any]) -> str: lines = [analysis["conclusion"]["summary"]] + lines.append( + f"Confidence: {analysis['confidence']}. Risk: {analysis['riskLevel']}." + ) if analysis["factsUsed"]: facts = ", ".join(f"{item['field']}={item['value']}" for item in analysis["factsUsed"]) lines.append(f"Facts used: {facts}.") @@ -160,6 +172,8 @@ def render_memo(analysis: dict[str, Any]) -> str: "", "## Analysis", analysis["conclusion"]["summary"], + f"Confidence: {analysis['confidence']}", + f"Risk level: {analysis['riskLevel']}", "", "## Conclusion", analysis["conclusion"]["answer"], diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py index c952e6a..3af4d10 100644 --- a/skills/us-cpa/src/us_cpa/returns.py +++ b/skills/us-cpa/src/us_cpa/returns.py @@ -1,45 +1,8 @@ from __future__ import annotations -from dataclasses import dataclass from typing import Any - -STANDARD_DEDUCTION_2025 = { - "single": 15750.0, - "married_filing_jointly": 31500.0, - "head_of_household": 23625.0, -} - - -TAX_BRACKETS_2025 = { - "single": [ - (11925.0, 0.10), - (48475.0, 0.12), - (103350.0, 0.22), - (197300.0, 0.24), - (250525.0, 0.32), - (626350.0, 0.35), - (float("inf"), 0.37), - ], - "married_filing_jointly": [ - (23850.0, 0.10), - (96950.0, 0.12), - (206700.0, 0.22), - (394600.0, 0.24), - (501050.0, 0.32), - (751600.0, 0.35), - (float("inf"), 0.37), - ], - "head_of_household": [ - (17000.0, 0.10), - (64850.0, 0.12), - (103350.0, 0.22), - (197300.0, 0.24), - (250500.0, 0.32), - (626350.0, 0.35), - (float("inf"), 0.37), - ], -} +from us_cpa.tax_years import tax_year_rules def _as_float(value: Any) -> float: @@ -48,9 +11,9 @@ def _as_float(value: Any) -> float: return float(value) -def tax_on_ordinary_income(amount: float, filing_status: str) -> float: +def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float: taxable = max(0.0, amount) - brackets = TAX_BRACKETS_2025[filing_status] + brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status] lower = 0.0 tax = 0.0 for upper, rate in brackets: @@ -72,6 +35,7 @@ def resolve_required_forms(normalized: dict[str, Any]) -> list[str]: def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]: + rules = tax_year_rules(tax_year) filing_status = facts.get("filingStatus", "single") wages = _as_float(facts.get("wages")) interest = _as_float(facts.get("taxableInterest")) @@ -79,9 +43,9 @@ def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any] withholding = _as_float(facts.get("federalWithholding")) adjusted_gross_income = wages + interest + business_income - standard_deduction = STANDARD_DEDUCTION_2025[filing_status] + standard_deduction = rules["standardDeduction"][filing_status] taxable_income = max(0.0, adjusted_gross_income - standard_deduction) - income_tax = tax_on_ordinary_income(taxable_income, filing_status) + income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year) self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2) total_tax = round(income_tax + self_employment_tax, 2) total_payments = withholding diff --git a/skills/us-cpa/src/us_cpa/tax_years.py b/skills/us-cpa/src/us_cpa/tax_years.py new file mode 100644 index 0000000..aef9b4f --- /dev/null +++ b/skills/us-cpa/src/us_cpa/tax_years.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from typing import Any + + +TAX_YEAR_DATA: dict[int, dict[str, Any]] = { + 2024: { + "standardDeduction": { + "single": 14600.0, + "married_filing_jointly": 29200.0, + "head_of_household": 21900.0, + }, + "ordinaryIncomeBrackets": { + "single": [ + (11600.0, 0.10), + (47150.0, 0.12), + (100525.0, 0.22), + (191950.0, 0.24), + (243725.0, 0.32), + (609350.0, 0.35), + (float("inf"), 0.37), + ], + "married_filing_jointly": [ + (23200.0, 0.10), + (94300.0, 0.12), + (201050.0, 0.22), + (383900.0, 0.24), + (487450.0, 0.32), + (731200.0, 0.35), + (float("inf"), 0.37), + ], + "head_of_household": [ + (16550.0, 0.10), + (63100.0, 0.12), + (100500.0, 0.22), + (191950.0, 0.24), + (243700.0, 0.32), + (609350.0, 0.35), + (float("inf"), 0.37), + ], + }, + }, + 2025: { + "standardDeduction": { + "single": 15750.0, + "married_filing_jointly": 31500.0, + "head_of_household": 23625.0, + }, + "ordinaryIncomeBrackets": { + "single": [ + (11925.0, 0.10), + (48475.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250525.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], + "married_filing_jointly": [ + (23850.0, 0.10), + (96950.0, 0.12), + (206700.0, 0.22), + (394600.0, 0.24), + (501050.0, 0.32), + (751600.0, 0.35), + (float("inf"), 0.37), + ], + "head_of_household": [ + (17000.0, 0.10), + (64850.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250500.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], + }, + }, +} + + +def supported_tax_years() -> list[int]: + return sorted(TAX_YEAR_DATA) + + +def tax_year_rules(tax_year: int) -> dict[str, Any]: + try: + return TAX_YEAR_DATA[tax_year] + except KeyError as exc: + years = ", ".join(str(year) for year in supported_tax_years()) + raise ValueError( + f"Unsupported tax year {tax_year}. Supported tax years: {years}." + ) from exc diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index b8e7a4f..da341f0 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -13,15 +13,33 @@ SKILL_DIR = Path(__file__).resolve().parents[1] SRC_DIR = SKILL_DIR / "src" +def _pyproject_text() -> str: + return (SKILL_DIR / "pyproject.toml").read_text() + + class UsCpaCliSmokeTests(unittest.TestCase): def test_skill_scaffold_files_exist(self) -> None: self.assertTrue((SKILL_DIR / "SKILL.md").exists()) self.assertTrue((SKILL_DIR / "pyproject.toml").exists()) + self.assertTrue((SKILL_DIR / "README.md").exists()) self.assertTrue((SKILL_DIR / "scripts" / "us-cpa").exists()) self.assertTrue( (SKILL_DIR.parent.parent / "docs" / "us-cpa.md").exists() ) + def test_pyproject_declares_runtime_and_dev_dependencies(self) -> None: + pyproject = _pyproject_text() + self.assertIn('"pypdf>=', pyproject) + self.assertIn('"reportlab>=', pyproject) + self.assertIn("[project.optional-dependencies]", pyproject) + self.assertIn('"pytest>=', pyproject) + + def test_readme_documents_install_and_script_usage(self) -> None: + readme = (SKILL_DIR / "README.md").read_text() + self.assertIn("pip install -e .[dev]", readme) + self.assertIn("scripts/us-cpa", readme) + self.assertIn("python -m unittest", readme) + def test_fixture_directories_exist(self) -> None: fixtures_dir = SKILL_DIR / "tests" / "fixtures" for name in ("irs", "facts", "documents", "returns"): diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py index b2fceef..7ab1d34 100644 --- a/skills/us-cpa/tests/test_questions.py +++ b/skills/us-cpa/tests/test_questions.py @@ -33,6 +33,7 @@ class QuestionEngineTests(unittest.TestCase): self.assertEqual(analysis["taxYear"], 2025) self.assertEqual(analysis["conclusion"]["answer"], "$15,750") self.assertEqual(analysis["confidence"], "high") + self.assertEqual(analysis["riskLevel"], "low") self.assertTrue(analysis["authorities"]) self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") @@ -47,6 +48,7 @@ class QuestionEngineTests(unittest.TestCase): ) self.assertEqual(analysis["confidence"], "low") + self.assertEqual(analysis["riskLevel"], "high") self.assertTrue(analysis["primaryLawRequired"]) self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) @@ -59,6 +61,7 @@ class QuestionEngineTests(unittest.TestCase): "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], "conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."}, "confidence": "high", + "riskLevel": "low", "followUpQuestions": [], "primaryLawRequired": False, } diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py index 2541e2c..ed5b66d 100644 --- a/skills/us-cpa/tests/test_returns.py +++ b/skills/us-cpa/tests/test_returns.py @@ -43,6 +43,13 @@ class ReturnModelTests(unittest.TestCase): def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None: self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5) + def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None: + self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0) + + def test_normalize_case_facts_rejects_unsupported_tax_year(self) -> None: + with self.assertRaisesRegex(ValueError, "Unsupported tax year"): + normalize_case_facts({"filingStatus": "single"}, 2023) + if __name__ == "__main__": unittest.main() From fb39fe76cb8b8e8c0eb455dbba0c933acb81b82f Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:01:16 -0500 Subject: [PATCH 11/14] fix: expand us-cpa extraction review and rendering --- docs/us-cpa.md | 57 ++++++-- skills/us-cpa/SKILL.md | 12 +- skills/us-cpa/src/us_cpa/cases.py | 85 +++++++++--- .../us-cpa/src/us_cpa/document_extractors.py | 54 ++++++++ skills/us-cpa/src/us_cpa/prepare.py | 6 +- skills/us-cpa/src/us_cpa/questions.py | 20 ++- skills/us-cpa/src/us_cpa/renderers.py | 38 +++++- skills/us-cpa/src/us_cpa/returns.py | 122 +++++++++++++++++- skills/us-cpa/src/us_cpa/review.py | 51 ++++++++ skills/us-cpa/src/us_cpa/sources.py | 64 ++++++++- .../fixtures/documents/interest-1099.txt | 3 + .../tests/fixtures/documents/simple-w2.txt | 4 + .../simple-w2-interest-2025-normalized.json | 16 +++ skills/us-cpa/tests/test_cases.py | 33 +++++ skills/us-cpa/tests/test_questions.py | 30 +++++ skills/us-cpa/tests/test_renderers.py | 53 +++++++- skills/us-cpa/tests/test_returns.py | 51 +++++++- skills/us-cpa/tests/test_review.py | 38 ++++++ skills/us-cpa/tests/test_sources.py | 12 ++ 19 files changed, 693 insertions(+), 56 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/document_extractors.py create mode 100644 skills/us-cpa/tests/fixtures/documents/interest-1099.txt create mode 100644 skills/us-cpa/tests/fixtures/documents/simple-w2.txt create mode 100644 skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json diff --git a/docs/us-cpa.md b/docs/us-cpa.md index 66dc9eb..e93ada0 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -24,11 +24,12 @@ Current implementation now includes: - deterministic cache layout under `~/.cache/us-cpa` by default - `fetch-year` download flow for the bootstrap IRS corpus - source manifest with URL, hash, authority rank, and local path traceability -- authority ranking hooks for IRS materials and future primary-law escalation -- case-folder intake and conflict-stop handling +- primary-law URL building for IRC and Treasury regulation escalation +- case-folder intake, document registration, and machine-usable fact extraction from JSON, text, and PDF inputs - question workflow with conversation and memo output -- prepare workflow for the current supported 1040 subset +- prepare workflow for the current supported multi-form 1040 package - review workflow with findings-first output +- fillable-PDF first rendering with overlay fallback - e-file-ready draft export payload generation ## CLI Surface @@ -61,10 +62,17 @@ US_CPA_CACHE_DIR=/tmp/us-cpa-cache skills/us-cpa/scripts/us-cpa fetch-year --tax Current `fetch-year` bootstrap corpus for tax year `2025` is verified against live IRS `irs-prior` PDFs for: - Form 1040 -- Schedules 1, 2, 3, A, B, C, D, SE, and 8812 -- Form 8949 +- Schedules 1, 2, 3, A, B, C, D, E, SE, and 8812 +- Forms 8949, 4562, 4797, 6251, 8606, 8863, 8889, 8959, 8960, 8995, 8995-A, 5329, 5695, and 1116 - General Form 1040 instructions and selected schedule/form instructions +Current bundled tax-year computation data: + +- 2024 +- 2025 + +Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added. + ## Interaction Model - `question` @@ -109,7 +117,8 @@ Behavior: - creates the full case directory layout when `--create-case` is used - copies input documents into `input/` -- stores normalized user-statement facts in `extracted/facts.json` +- stores normalized facts with source metadata in `extracted/facts.json` +- extracts machine-usable facts from JSON/text/PDF documents where supported - appends document registry entries to `case-manifest.json` - stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact @@ -142,21 +151,26 @@ Current implemented topics: - standard deduction - Schedule C / sole proprietorship reporting trigger +- Schedule D / capital gains reporting trigger +- Schedule E / rental income reporting trigger ## Form Rendering Current rendering path: - official IRS PDFs from the cached tax-year corpus -- overlay rendering onto those official PDFs using `reportlab` + `pypdf` +- deterministic field-fill when usable AcroForm fields are present +- overlay rendering onto those official PDFs using `reportlab` + `pypdf` as fallback - artifact manifest written to `output/artifacts.json` Current rendered form support: -- Form 1040 overlay artifact generation +- field-fill support for known mapped fillable forms +- overlay generation for the current required-form set resolved by the return model Current review rule: +- field-filled artifacts are not automatically flagged for review - overlay-rendered artifacts are marked `reviewRequired: true` ## Preparation Workflow @@ -164,9 +178,10 @@ Current review rule: Current `prepare` implementation: - loads case facts from `extracted/facts.json` -- normalizes them into the current supported 2025 federal return model -- computes the current supported 1040 subset -- resolves required forms for the current supported subset +- normalizes them into the current supported federal return model +- preserves source provenance for normalized values +- computes the current supported 1040 package +- resolves required forms across the current supported subset - writes: - `return/normalized-return.json` - `output/artifacts.json` @@ -175,10 +190,27 @@ Current `prepare` implementation: Current supported calculation inputs: - `filingStatus` +- `spouse.fullName` +- `dependents` - `wages` - `taxableInterest` - `businessIncome` +- `capitalGainLoss` +- `rentalIncome` - `federalWithholding` +- `itemizedDeductions` +- `hsaContribution` +- `educationCredit` +- `foreignTaxCredit` +- `qualifiedBusinessIncome` +- `traditionalIraBasis` +- `additionalMedicareTax` +- `netInvestmentIncomeTax` +- `alternativeMinimumTax` +- `additionalTaxPenalty` +- `energyCredit` +- `depreciationExpense` +- `section1231GainLoss` ## E-file-ready Export @@ -200,7 +232,10 @@ Current `review` implementation: - recomputes the return from current case facts - compares stored normalized return values to recomputed values +- flags source-fact mismatches for key income fields +- flags likely omitted income when document-extracted facts support an amount the stored return omits - checks whether required rendered artifacts are present +- flags high-complexity forms for specialist follow-up - flags overlay-rendered artifacts as requiring human review - sorts findings by severity diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index a8d73f5..010a6e4 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -46,7 +46,8 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - IRS materials first; escalate to primary law only when needed - stop on conflicting facts and ask the user to resolve the issue before continuing - official IRS PDFs are the target compiled-form artifacts -- overlay-rendered forms must be flagged for human review +- deterministic field-fill is the preferred render path when the official PDF exposes usable fields +- overlay-rendered forms are the fallback and must be flagged for human review ## Output @@ -55,10 +56,11 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - `question` supports `--style conversation|memo` - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation -- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict -- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review -- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory +- `extract-docs` creates or opens a case, registers documents, stores facts, extracts machine-usable facts from JSON/text/PDF sources where possible, and stops with a structured issue if facts conflict +- `question` currently has explicit IRS-first answers for standard deduction, Schedule C, Schedule D, and Schedule E questions; other questions escalate to primary-law research with official IRC/regulation URLs +- rendered form artifacts prefer fillable-field output when possible and otherwise fall back to overlay output +- `prepare` computes the current supported federal 1040 package, preserves fact provenance in the normalized return, and writes normalized return/artifact/report files into the case directory - `export-efile-ready` writes a draft transmission-ready payload without transmitting anything -- `review` recomputes the return from case facts, checks artifacts, and returns findings-first output in conversation or memo style +- `review` recomputes the return from case facts, checks artifacts, flags source-fact mismatches and likely omissions, and returns findings-first output in conversation or memo style For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cases.py b/skills/us-cpa/src/us_cpa/cases.py index 9f8827d..68cc90c 100644 --- a/skills/us-cpa/src/us_cpa/cases.py +++ b/skills/us-cpa/src/us_cpa/cases.py @@ -8,6 +8,8 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any +from us_cpa.document_extractors import extract_document_facts + CASE_SUBDIRECTORIES = ( "input", @@ -95,6 +97,48 @@ class CaseManager: current["issues"].append(issue) self.issues_path.write_text(json.dumps(current, indent=2)) + def _record_fact( + self, + facts_payload: dict[str, Any], + *, + field: str, + value: Any, + source_type: str, + source_name: str, + tax_year: int, + ) -> None: + existing = facts_payload["facts"].get(field) + if existing and existing["value"] != value: + issue = { + "status": "needs_resolution", + "issueType": "fact_conflict", + "field": field, + "existingValue": existing["value"], + "newValue": value, + "message": f"Conflicting values for {field}. Resolve before continuing.", + "createdAt": _timestamp(), + "taxYear": tax_year, + } + self._write_issue(issue) + raise CaseConflictError(issue) + + captured_at = _timestamp() + source_entry = { + "sourceType": source_type, + "sourceName": source_name, + "capturedAt": captured_at, + } + if existing: + existing["sources"].append(source_entry) + return + + facts_payload["facts"][field] = { + "value": value, + "sourceType": source_type, + "capturedAt": captured_at, + "sources": [source_entry], + } + def intake( self, *, @@ -124,27 +168,28 @@ class CaseManager: registered_documents.append(document_entry) facts_payload = self._load_facts() - for field, value in user_facts.items(): - existing = facts_payload["facts"].get(field) - if existing and existing["value"] != value: - issue = { - "status": "needs_resolution", - "issueType": "fact_conflict", - "field": field, - "existingValue": existing["value"], - "newValue": value, - "message": f"Conflicting values for {field}. Resolve before continuing.", - "createdAt": _timestamp(), - "taxYear": tax_year, - } - self._write_issue(issue) - raise CaseConflictError(issue) + for document_entry in registered_documents: + extracted = extract_document_facts(Path(document_entry["storedPath"])) + document_entry["extractedFacts"] = extracted + for field, value in extracted.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="document_extract", + source_name=document_entry["name"], + tax_year=tax_year, + ) - facts_payload["facts"][field] = { - "value": value, - "sourceType": "user_statement", - "capturedAt": _timestamp(), - } + for field, value in user_facts.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="user_statement", + source_name="interactive-intake", + tax_year=tax_year, + ) self._write_manifest(manifest) self._write_facts(facts_payload) diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py new file mode 100644 index 0000000..e5034db --- /dev/null +++ b/skills/us-cpa/src/us_cpa/document_extractors.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any + +from pypdf import PdfReader + + +_NUMBER = r"(-?\d+(?:,\d{3})*(?:\.\d+)?)" + + +def _parse_number(raw: str) -> float: + return float(raw.replace(",", "")) + + +def _extract_text(path: Path) -> str: + suffix = path.suffix.lower() + if suffix in {".txt", ".md"}: + return path.read_text() + if suffix == ".pdf": + reader = PdfReader(str(path)) + return "\n".join((page.extract_text() or "") for page in reader.pages) + return "" + + +def _facts_from_text(text: str) -> dict[str, Any]: + extracted: dict[str, Any] = {} + + if match := re.search(r"Employee:\s*(.+)", text): + extracted["taxpayer.fullName"] = match.group(1).strip() + if match := re.search(r"Recipient:\s*(.+)", text): + extracted.setdefault("taxpayer.fullName", match.group(1).strip()) + if match := re.search(r"Box 1 Wages, tips, other compensation\s+" + _NUMBER, text, re.I): + extracted["wages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I): + extracted["federalWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I): + extracted["taxableInterest"] = _parse_number(match.group(1)) + if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I): + extracted["businessIncome"] = _parse_number(match.group(1)) + + return extracted + + +def extract_document_facts(path: Path) -> dict[str, Any]: + suffix = path.suffix.lower() + if suffix == ".json": + payload = json.loads(path.read_text()) + if isinstance(payload, dict): + return payload + return {} + return _facts_from_text(_extract_text(path)) diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py index 72ef8cd..1338b83 100644 --- a/skills/us-cpa/src/us_cpa/prepare.py +++ b/skills/us-cpa/src/us_cpa/prepare.py @@ -13,7 +13,11 @@ from us_cpa.sources import TaxYearCorpus def _load_case_facts(case_dir: Path) -> dict[str, Any]: facts_path = case_dir / "extracted" / "facts.json" payload = json.loads(facts_path.read_text()) - return {key: value["value"] for key, value in payload["facts"].items()} + facts = {key: value["value"] for key, value in payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in payload["facts"].items() + } + return facts diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py index 1ee9554..3ee1502 100644 --- a/skills/us-cpa/src/us_cpa/questions.py +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any -from us_cpa.sources import TaxYearCorpus +from us_cpa.sources import TaxYearCorpus, build_primary_law_authorities TOPIC_RULES = [ @@ -29,6 +29,22 @@ TOPIC_RULES = [ "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.", "confidence": "medium", }, + { + "issue": "schedule_d_required", + "keywords": ("schedule d", "capital gains"), + "authority_slugs": ("f1040sd", "i1040sd", "f8949", "i8949"), + "answer": "Schedule D is generally required when a taxpayer reports capital gains or losses, often alongside Form 8949.", + "summary": "Capital gains and losses generally flow through Schedule D, with Form 8949 supporting detail when required.", + "confidence": "medium", + }, + { + "issue": "schedule_e_required", + "keywords": ("schedule e", "rental income"), + "authority_slugs": ("f1040se", "i1040se"), + "answer": "Schedule E is generally required when a taxpayer reports rental real-estate income or expenses.", + "summary": "Rental income and expenses generally belong on Schedule E.", + "confidence": "medium", + }, ] @@ -117,7 +133,7 @@ class QuestionEngine: "missingFacts": [ "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently." ], - "authorities": [], + "authorities": build_primary_law_authorities(question), "conclusion": { "answer": "Insufficient IRS-form and instruction support for a confident answer.", "summary": "This question needs primary-law analysis before a reliable answer can be given.", diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py index 440d8de..f41c16a 100644 --- a/skills/us-cpa/src/us_cpa/renderers.py +++ b/skills/us-cpa/src/us_cpa/renderers.py @@ -37,6 +37,32 @@ OVERLAY_FIELDS = { } +FIELD_FILL_VALUES = { + "f1040": lambda data: { + "taxpayer_full_name": data["taxpayer"]["fullName"], + "filing_status": data["filingStatus"], + "wages": f"{data['income']['wages']:.2f}", + "taxable_interest": f"{data['income']['taxableInterest']:.2f}", + } +} + + +def _field_fill_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> bool: + reader = PdfReader(str(template_path)) + fields = reader.get_fields() or {} + values = FIELD_FILL_VALUES.get(form_code, lambda _: {})(normalized) + matched = {key: value for key, value in values.items() if key in fields} + if not matched: + return False + + writer = PdfWriter(clone_from=str(template_path)) + writer.update_page_form_field_values(writer.pages[0], matched, auto_regenerate=False) + writer.set_need_appearances_writer() + with output_path.open("wb") as handle: + writer.write(handle) + return True + + def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None: reader = PdfReader(str(template_path)) writer = PdfWriter(clone_from=str(template_path)) @@ -68,14 +94,20 @@ def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[st continue template_path = irs_dir / f"{template_slug}.pdf" output_path = output_dir / f"{form_code}.pdf" - _overlay_page(template_path, output_path, form_code, normalized) + render_method = "overlay" + review_required = True + if _field_fill_page(template_path, output_path, form_code, normalized): + render_method = "field_fill" + review_required = False + else: + _overlay_page(template_path, output_path, form_code, normalized) artifacts.append( { "formCode": form_code, "templatePath": str(template_path), "outputPath": str(output_path), - "renderMethod": "overlay", - "reviewRequired": True, + "renderMethod": render_method, + "reviewRequired": review_required, } ) diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py index 3af4d10..7f0bcc5 100644 --- a/skills/us-cpa/src/us_cpa/returns.py +++ b/skills/us-cpa/src/us_cpa/returns.py @@ -11,6 +11,15 @@ def _as_float(value: Any) -> float: return float(value) +def _fact_metadata(facts: dict[str, Any]) -> dict[str, Any]: + return facts.get("_factMetadata", {}) + + +def _provenance_for(field: str, metadata: dict[str, Any]) -> dict[str, Any]: + entry = metadata.get(field, {}) + return {"sources": list(entry.get("sources", []))} + + def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float: taxable = max(0.0, amount) brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status] @@ -30,57 +39,156 @@ def resolve_required_forms(normalized: dict[str, Any]) -> list[str]: if normalized["income"]["taxableInterest"] > 1500: forms.append("f1040sb") if normalized["income"]["businessIncome"] != 0: - forms.extend(["f1040sc", "f1040se", "f1040s1"]) - return forms + forms.extend(["f1040sc", "f1040sse", "f1040s1", "f8995"]) + if normalized["income"]["capitalGainLoss"] != 0: + forms.extend(["f1040sd", "f8949"]) + if normalized["income"]["rentalIncome"] != 0: + forms.extend(["f1040se", "f1040s1"]) + if normalized["deductions"]["deductionType"] == "itemized": + forms.append("f1040sa") + if normalized["adjustments"]["hsaContribution"] != 0: + forms.append("f8889") + if normalized["credits"]["educationCredit"] != 0: + forms.append("f8863") + if normalized["credits"]["foreignTaxCredit"] != 0: + forms.append("f1116") + if normalized["business"]["qualifiedBusinessIncome"] != 0 and "f8995" not in forms: + forms.append("f8995") + if normalized["basis"]["traditionalIraBasis"] != 0: + forms.append("f8606") + if normalized["taxes"]["additionalMedicareTax"] != 0: + forms.append("f8959") + if normalized["taxes"]["netInvestmentIncomeTax"] != 0: + forms.append("f8960") + if normalized["taxes"]["alternativeMinimumTax"] != 0: + forms.append("f6251") + if normalized["taxes"]["additionalTaxPenalty"] != 0: + forms.append("f5329") + if normalized["credits"]["energyCredit"] != 0: + forms.append("f5695") + if normalized["depreciation"]["depreciationExpense"] != 0: + forms.append("f4562") + if normalized["assetSales"]["section1231GainLoss"] != 0: + forms.append("f4797") + return list(dict.fromkeys(forms)) def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]: rules = tax_year_rules(tax_year) + metadata = _fact_metadata(facts) filing_status = facts.get("filingStatus", "single") wages = _as_float(facts.get("wages")) interest = _as_float(facts.get("taxableInterest")) business_income = _as_float(facts.get("businessIncome")) + capital_gain_loss = _as_float(facts.get("capitalGainLoss")) + rental_income = _as_float(facts.get("rentalIncome")) withholding = _as_float(facts.get("federalWithholding")) + itemized_deductions = _as_float(facts.get("itemizedDeductions")) + hsa_contribution = _as_float(facts.get("hsaContribution")) + education_credit = _as_float(facts.get("educationCredit")) + foreign_tax_credit = _as_float(facts.get("foreignTaxCredit")) + qualified_business_income = _as_float(facts.get("qualifiedBusinessIncome")) + traditional_ira_basis = _as_float(facts.get("traditionalIraBasis")) + additional_medicare_tax = _as_float(facts.get("additionalMedicareTax")) + net_investment_income_tax = _as_float(facts.get("netInvestmentIncomeTax")) + alternative_minimum_tax = _as_float(facts.get("alternativeMinimumTax")) + additional_tax_penalty = _as_float(facts.get("additionalTaxPenalty")) + energy_credit = _as_float(facts.get("energyCredit")) + depreciation_expense = _as_float(facts.get("depreciationExpense")) + section1231_gain_loss = _as_float(facts.get("section1231GainLoss")) - adjusted_gross_income = wages + interest + business_income + adjusted_gross_income = wages + interest + business_income + capital_gain_loss + rental_income standard_deduction = rules["standardDeduction"][filing_status] - taxable_income = max(0.0, adjusted_gross_income - standard_deduction) + deduction_type = "itemized" if itemized_deductions > standard_deduction else "standard" + deduction_amount = itemized_deductions if deduction_type == "itemized" else standard_deduction + taxable_income = max(0.0, adjusted_gross_income - deduction_amount) income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year) self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2) - total_tax = round(income_tax + self_employment_tax, 2) + total_tax = round( + income_tax + + self_employment_tax + + additional_medicare_tax + + net_investment_income_tax + + alternative_minimum_tax + + additional_tax_penalty, + 2, + ) total_payments = withholding - refund = round(max(0.0, total_payments - total_tax), 2) - balance_due = round(max(0.0, total_tax - total_payments), 2) + total_credits = round(education_credit + foreign_tax_credit + energy_credit, 2) + refund = round(max(0.0, total_payments + total_credits - total_tax), 2) + balance_due = round(max(0.0, total_tax - total_payments - total_credits), 2) normalized = { "taxYear": tax_year, "taxpayer": { "fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"), }, + "spouse": { + "fullName": facts.get("spouse.fullName", ""), + }, + "dependents": list(facts.get("dependents", [])), "filingStatus": filing_status, "income": { "wages": wages, "taxableInterest": interest, "businessIncome": business_income, + "capitalGainLoss": capital_gain_loss, + "rentalIncome": rental_income, + }, + "adjustments": { + "hsaContribution": hsa_contribution, }, "payments": { "federalWithholding": withholding, }, "deductions": { "standardDeduction": standard_deduction, + "itemizedDeductions": itemized_deductions, + "deductionType": deduction_type, + "deductionAmount": deduction_amount, + }, + "credits": { + "educationCredit": education_credit, + "foreignTaxCredit": foreign_tax_credit, + "energyCredit": energy_credit, }, "taxes": { "incomeTax": income_tax, "selfEmploymentTax": self_employment_tax, + "additionalMedicareTax": additional_medicare_tax, + "netInvestmentIncomeTax": net_investment_income_tax, + "alternativeMinimumTax": alternative_minimum_tax, + "additionalTaxPenalty": additional_tax_penalty, "totalTax": total_tax, }, + "business": { + "qualifiedBusinessIncome": qualified_business_income, + }, + "basis": { + "traditionalIraBasis": traditional_ira_basis, + }, + "depreciation": { + "depreciationExpense": depreciation_expense, + }, + "assetSales": { + "section1231GainLoss": section1231_gain_loss, + }, "totals": { "adjustedGrossIncome": round(adjusted_gross_income, 2), "taxableIncome": round(taxable_income, 2), "totalPayments": round(total_payments, 2), + "totalCredits": total_credits, "refund": refund, "balanceDue": balance_due, }, + "provenance": { + "income.wages": _provenance_for("wages", metadata), + "income.taxableInterest": _provenance_for("taxableInterest", metadata), + "income.businessIncome": _provenance_for("businessIncome", metadata), + "income.capitalGainLoss": _provenance_for("capitalGainLoss", metadata), + "income.rentalIncome": _provenance_for("rentalIncome", metadata), + "payments.federalWithholding": _provenance_for("federalWithholding", metadata), + }, } normalized["requiredForms"] = resolve_required_forms(normalized) return normalized diff --git a/skills/us-cpa/src/us_cpa/review.py b/skills/us-cpa/src/us_cpa/review.py index 82576b9..8e50f85 100644 --- a/skills/us-cpa/src/us_cpa/review.py +++ b/skills/us-cpa/src/us_cpa/review.py @@ -22,6 +22,9 @@ class ReviewEngine: stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text()) facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text()) facts = {key: value["value"] for key, value in facts_payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in facts_payload["facts"].items() + } recomputed = normalize_case_facts(facts, manifest["taxYear"]) artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text()) @@ -39,6 +42,42 @@ class ReviewEngine: } ) + for field, label in ( + ("wages", "wages"), + ("taxableInterest", "taxable interest"), + ("businessIncome", "business income"), + ("capitalGainLoss", "capital gains or losses"), + ("rentalIncome", "rental income"), + ): + stored_value = stored_return["income"].get(field, 0.0) + recomputed_value = recomputed["income"].get(field, 0.0) + sources = recomputed.get("provenance", {}).get(f"income.{field}", {}).get("sources", []) + has_document_source = any(item.get("sourceType") == "document_extract" for item in sources) + if stored_value != recomputed_value: + findings.append( + { + "severity": "high" if has_document_source else "medium", + "title": f"Source fact mismatch for {label}", + "explanation": f"Stored return reports {stored_value:.2f} for {label}, but case facts support {recomputed_value:.2f}.", + "suggestedAction": f"Reconcile {label} to {recomputed_value:.2f} before treating the return as final.", + "authorities": [ + {"title": "Case fact registry", "sourceClass": "irs_form"} + ], + } + ) + if stored_value == 0 and recomputed_value > 0 and has_document_source: + findings.append( + { + "severity": "high", + "title": f"Likely omitted {label}", + "explanation": f"Document-extracted facts support {recomputed_value:.2f} of {label}, but the stored return reports none.", + "suggestedAction": f"Add {label} to the return and regenerate the required forms.", + "authorities": [ + {"title": "Case document extraction", "sourceClass": "irs_form"} + ], + } + ) + rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]} for required_form in recomputed["requiredForms"]: if required_form not in rendered_forms: @@ -64,6 +103,18 @@ class ReviewEngine: } ) + required_forms_union = set(recomputed["requiredForms"]) | set(stored_return.get("requiredForms", [])) + if any(form in required_forms_union for form in ("f6251", "f8960", "f8959", "f1116")): + findings.append( + { + "severity": "medium", + "title": "High-complexity tax position requires specialist follow-up", + "explanation": "The return includes forms or computations that usually require deeper technical support and careful authority review.", + "suggestedAction": "Review the supporting authority and computations for the high-complexity forms before treating the return as filing-ready.", + "authorities": [{"title": "Required form analysis", "sourceClass": "irs_instructions"}], + } + ) + findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"])) review = { "status": "reviewed", diff --git a/skills/us-cpa/src/us_cpa/sources.py b/skills/us-cpa/src/us_cpa/sources.py index 1f4190a..852b5aa 100644 --- a/skills/us-cpa/src/us_cpa/sources.py +++ b/skills/us-cpa/src/us_cpa/sources.py @@ -3,6 +3,7 @@ from __future__ import annotations import hashlib import json import os +import re from dataclasses import dataclass from datetime import datetime, timezone from enum import IntEnum @@ -63,6 +64,37 @@ def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str: return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf" +def build_primary_law_authorities(question: str) -> list[dict[str, str | int]]: + authorities: list[dict[str, str | int]] = [] + normalized = question.lower() + + for match in re.finditer(r"(?:section|sec\.)\s+(\d+[a-z0-9-]*)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"irc-{section}", + "title": f"Internal Revenue Code section {section}", + "sourceClass": "internal_revenue_code", + "url": f"https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title26-section{section}&num=0&edition=prelim", + "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE), + } + ) + + for match in re.finditer(r"(?:treas(?:ury)?\.?\s+reg(?:ulation)?\.?\s*)([\d.]+-\d+)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"reg-{section}", + "title": f"Treasury Regulation {section}", + "sourceClass": "treasury_regulation", + "url": f"https://www.ecfr.gov/current/title-26/section-{section}", + "authorityRank": int(AuthorityRank.TREASURY_REGULATION), + } + ) + + return authorities + + def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: entries = [ ("f1040", "Form 1040", "irs_form"), @@ -73,16 +105,44 @@ def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: ("f1040sb", "Schedule B (Form 1040)", "irs_form"), ("f1040sc", "Schedule C (Form 1040)", "irs_form"), ("f1040sd", "Schedule D (Form 1040)", "irs_form"), - ("f1040se", "Schedule SE (Form 1040)", "irs_form"), + ("f1040se", "Schedule E (Form 1040)", "irs_form"), + ("f1040sse", "Schedule SE (Form 1040)", "irs_form"), ("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"), ("f8949", "Form 8949", "irs_form"), + ("f4562", "Form 4562", "irs_form"), + ("f4797", "Form 4797", "irs_form"), + ("f6251", "Form 6251", "irs_form"), + ("f8606", "Form 8606", "irs_form"), + ("f8863", "Form 8863", "irs_form"), + ("f8889", "Form 8889", "irs_form"), + ("f8959", "Form 8959", "irs_form"), + ("f8960", "Form 8960", "irs_form"), + ("f8995", "Form 8995", "irs_form"), + ("f8995a", "Form 8995-A", "irs_form"), + ("f5329", "Form 5329", "irs_form"), + ("f5695", "Form 5695", "irs_form"), + ("f1116", "Form 1116", "irs_form"), ("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"), ("i1040sca", "Instructions for Schedule A", "irs_instructions"), ("i1040sc", "Instructions for Schedule C", "irs_instructions"), ("i1040sd", "Instructions for Schedule D", "irs_instructions"), - ("i1040se", "Instructions for Schedule SE", "irs_instructions"), + ("i1040se", "Instructions for Schedule E (Form 1040)", "irs_instructions"), + ("i1040sse", "Instructions for Schedule SE", "irs_instructions"), ("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"), ("i8949", "Instructions for Form 8949", "irs_instructions"), + ("i4562", "Instructions for Form 4562", "irs_instructions"), + ("i4797", "Instructions for Form 4797", "irs_instructions"), + ("i6251", "Instructions for Form 6251", "irs_instructions"), + ("i8606", "Instructions for Form 8606", "irs_instructions"), + ("i8863", "Instructions for Form 8863", "irs_instructions"), + ("i8889", "Instructions for Form 8889", "irs_instructions"), + ("i8959", "Instructions for Form 8959", "irs_instructions"), + ("i8960", "Instructions for Form 8960", "irs_instructions"), + ("i8995", "Instructions for Form 8995", "irs_instructions"), + ("i8995a", "Instructions for Form 8995-A", "irs_instructions"), + ("i5329", "Instructions for Form 5329", "irs_instructions"), + ("i5695", "Instructions for Form 5695", "irs_instructions"), + ("i1116", "Instructions for Form 1116", "irs_instructions"), ] return [ SourceDescriptor( diff --git a/skills/us-cpa/tests/fixtures/documents/interest-1099.txt b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt new file mode 100644 index 0000000..89d54f7 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt @@ -0,0 +1,3 @@ +Form 1099-INT +Recipient: Jane Doe +Box 1 Interest Income 1750 diff --git a/skills/us-cpa/tests/fixtures/documents/simple-w2.txt b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt new file mode 100644 index 0000000..b3336b1 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt @@ -0,0 +1,4 @@ +Form W-2 Wage and Tax Statement +Employee: Jane Doe +Box 1 Wages, tips, other compensation 50000 +Box 2 Federal income tax withheld 6000 diff --git a/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json new file mode 100644 index 0000000..39fbd78 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json @@ -0,0 +1,16 @@ +{ + "taxYear": 2025, + "filingStatus": "single", + "requiredForms": ["f1040", "f1040sb"], + "income": { + "wages": 50000.0, + "taxableInterest": 1750.0, + "businessIncome": 0.0, + "capitalGainLoss": 0.0, + "rentalIncome": 0.0 + }, + "totals": { + "adjustedGrossIncome": 51750.0, + "taxableIncome": 36000.0 + } +} diff --git a/skills/us-cpa/tests/test_cases.py b/skills/us-cpa/tests/test_cases.py index 71fa379..7ddb47f 100644 --- a/skills/us-cpa/tests/test_cases.py +++ b/skills/us-cpa/tests/test_cases.py @@ -51,6 +51,39 @@ class CaseManagerTests(unittest.TestCase): facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) self.assertEqual(facts["facts"]["filingStatus"]["value"], "single") + def test_intake_extracts_machine_usable_facts_from_text_documents(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + case_dir = root / "2025-jane-doe" + w2 = root / "w2.txt" + w2.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + ) + interest = root / "1099-int.txt" + interest.write_text( + "Form 1099-INT\n" + "Recipient: Jane Doe\n" + "Box 1 Interest Income 1750\n" + ) + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + + result = manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single"}, + document_paths=[w2, interest], + ) + + self.assertEqual(result["status"], "accepted") + facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) + self.assertEqual(facts["facts"]["wages"]["value"], 50000.0) + self.assertEqual(facts["facts"]["federalWithholding"]["value"], 6000.0) + self.assertEqual(facts["facts"]["taxableInterest"]["value"], 1750.0) + self.assertEqual(facts["facts"]["wages"]["sources"][0]["sourceType"], "document_extract") + def test_conflicting_facts_raise_structured_issue(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: case_dir = Path(temp_dir) / "2025-jane-doe" diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py index 7ab1d34..f464488 100644 --- a/skills/us-cpa/tests/test_questions.py +++ b/skills/us-cpa/tests/test_questions.py @@ -51,6 +51,36 @@ class QuestionEngineTests(unittest.TestCase): self.assertEqual(analysis["riskLevel"], "high") self.assertTrue(analysis["primaryLawRequired"]) self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in analysis["authorities"])) + + def test_capital_gains_question_returns_schedule_d_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule D for capital gains?", + tax_year=2025, + case_facts={"capitalGainLoss": 400}, + ) + + self.assertEqual(analysis["issue"], "schedule_d_required") + self.assertEqual(analysis["confidence"], "medium") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040sd" for item in analysis["authorities"])) + + def test_schedule_e_question_returns_rental_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule E for rental income?", + tax_year=2025, + case_facts={"rentalIncome": 1200}, + ) + + self.assertEqual(analysis["issue"], "schedule_e_required") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040se" for item in analysis["authorities"])) def test_renderers_produce_conversation_and_memo(self) -> None: analysis = { diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py index 33f5358..cc0e506 100644 --- a/skills/us-cpa/tests/test_renderers.py +++ b/skills/us-cpa/tests/test_renderers.py @@ -13,6 +13,47 @@ from us_cpa.sources import TaxYearCorpus class RendererTests(unittest.TestCase): + def test_render_case_forms_prefers_fillable_pdf_fields_when_available(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + form = pdf.acroForm + pdf.drawString(72, 720, "Name") + form.textfield(name="taxpayer_full_name", x=120, y=710, width=200, height=20) + pdf.drawString(72, 680, "Wages") + form.textfield(name="wages", x=120, y=670, width=200, height=20) + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, + "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "field_fill") + self.assertFalse(artifacts["artifacts"][0]["reviewRequired"]) + def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: case_dir = Path(temp_dir) / "case" @@ -32,10 +73,16 @@ class RendererTests(unittest.TestCase): "requiredForms": ["f1040"], "taxpayer": {"fullName": "Jane Doe"}, "filingStatus": "single", - "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0}, - "deductions": {"standardDeduction": 15750.0}, - "taxes": {"totalTax": 3883.5}, + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, } diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py index ed5b66d..f905877 100644 --- a/skills/us-cpa/tests/test_returns.py +++ b/skills/us-cpa/tests/test_returns.py @@ -37,11 +37,11 @@ class ReturnModelTests(unittest.TestCase): self.assertEqual( resolve_required_forms(normalized), - ["f1040", "f1040sb", "f1040sc", "f1040se", "f1040s1"], + ["f1040", "f1040sb", "f1040sc", "f1040sse", "f1040s1", "f8995"], ) def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None: - self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5) + self.assertEqual(tax_on_ordinary_income(34350.0, "single", 2025), 3883.5) def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None: self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0) @@ -50,6 +50,53 @@ class ReturnModelTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "Unsupported tax year"): normalize_case_facts({"filingStatus": "single"}, 2023) + def test_normalize_case_facts_preserves_provenance_and_expands_form_resolution(self) -> None: + normalized = normalize_case_facts( + { + "taxpayer.fullName": "Jane Doe", + "spouse.fullName": "John Doe", + "dependents": [{"fullName": "Kid Doe", "ssnLast4": "4321"}], + "filingStatus": "married_filing_jointly", + "wages": 50000, + "taxableInterest": 2001, + "capitalGainLoss": 400, + "rentalIncome": 1200, + "itemizedDeductions": 40000, + "hsaContribution": 1000, + "educationCredit": 500, + "foreignTaxCredit": 250, + "qualifiedBusinessIncome": 12000, + "traditionalIraBasis": 6000, + "additionalMedicareTax": 100, + "netInvestmentIncomeTax": 200, + "alternativeMinimumTax": 300, + "additionalTaxPenalty": 50, + "energyCredit": 600, + "_factMetadata": { + "wages": {"sources": [{"sourceType": "document_extract", "documentName": "w2.txt"}]}, + }, + }, + 2025, + ) + + self.assertEqual(normalized["spouse"]["fullName"], "John Doe") + self.assertEqual(normalized["dependents"][0]["fullName"], "Kid Doe") + self.assertEqual(normalized["provenance"]["income.wages"]["sources"][0]["documentName"], "w2.txt") + self.assertIn("f1040sa", normalized["requiredForms"]) + self.assertIn("f1040sd", normalized["requiredForms"]) + self.assertIn("f8949", normalized["requiredForms"]) + self.assertIn("f1040se", normalized["requiredForms"]) + self.assertIn("f8889", normalized["requiredForms"]) + self.assertIn("f8863", normalized["requiredForms"]) + self.assertIn("f1116", normalized["requiredForms"]) + self.assertIn("f8995", normalized["requiredForms"]) + self.assertIn("f8606", normalized["requiredForms"]) + self.assertIn("f8959", normalized["requiredForms"]) + self.assertIn("f8960", normalized["requiredForms"]) + self.assertIn("f6251", normalized["requiredForms"]) + self.assertIn("f5329", normalized["requiredForms"]) + self.assertIn("f5695", normalized["requiredForms"]) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_review.py b/skills/us-cpa/tests/test_review.py index c46014d..eb0cbb1 100644 --- a/skills/us-cpa/tests/test_review.py +++ b/skills/us-cpa/tests/test_review.py @@ -64,6 +64,44 @@ class ReviewEngineTests(unittest.TestCase): self.assertIn("adjusted gross income", review["findings"][0]["title"].lower()) self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"])) + def test_review_detects_reporting_omissions_from_source_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["income"]["taxableInterest"] = 0.0 + normalized["totals"]["adjustedGrossIncome"] = 50000.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + facts_path = case_dir / "extracted" / "facts.json" + facts_payload = json.loads(facts_path.read_text()) + facts_payload["facts"]["taxableInterest"] = { + "value": 1750.0, + "sources": [{"sourceType": "document_extract", "sourceName": "1099-int.txt"}], + } + facts_path.write_text(json.dumps(facts_payload, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("likely omitted taxable interest" in item["title"].lower() for item in review["findings"]) + ) + + def test_review_flags_high_complexity_positions_for_specialist_follow_up(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["requiredForms"].append("f6251") + normalized["taxes"]["alternativeMinimumTax"] = 300.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("high-complexity tax position" in item["title"].lower() for item in review["findings"]) + ) + def test_review_renderers_produce_summary_and_memo(self) -> None: review = { "status": "reviewed", diff --git a/skills/us-cpa/tests/test_sources.py b/skills/us-cpa/tests/test_sources.py index 206e4d0..a180ff9 100644 --- a/skills/us-cpa/tests/test_sources.py +++ b/skills/us-cpa/tests/test_sources.py @@ -12,6 +12,7 @@ from us_cpa.sources import ( authority_rank_for, bootstrap_irs_catalog, build_irs_prior_pdf_url, + build_primary_law_authorities, ) @@ -42,6 +43,17 @@ class SourceCatalogTests(unittest.TestCase): self.assertGreaterEqual(len(catalog), 5) self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf") self.assertTrue(any(item.slug == "i1040gi" for item in catalog)) + self.assertTrue(any(item.slug == "f1040sse" for item in catalog)) + + def test_primary_law_authorities_build_official_urls(self) -> None: + authorities = build_primary_law_authorities( + "Does section 469 apply and what does Treas. Reg. 1.469-1 say?" + ) + + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities)) + self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities)) + self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities)) + self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities)) class TaxYearCorpusTests(unittest.TestCase): From 1be03171928cf8ad857e9f400c97bdc28588cde2 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:11:24 -0500 Subject: [PATCH 12/14] test: add us-cpa module coverage and citations --- docs/us-cpa.md | 4 ++ .../us-cpa/src/us_cpa/document_extractors.py | 20 ++++++ skills/us-cpa/src/us_cpa/tax_years.py | 8 +++ .../us-cpa/tests/test_document_extractors.py | 66 +++++++++++++++++++ skills/us-cpa/tests/test_tax_years.py | 25 +++++++ 5 files changed, 123 insertions(+) create mode 100644 skills/us-cpa/tests/test_document_extractors.py create mode 100644 skills/us-cpa/tests/test_tax_years.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index e93ada0..1a6cdcb 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -73,6 +73,8 @@ Current bundled tax-year computation data: Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added. +Adding a new supported year is a deliberate data-table change in `tax_years.py`, not an automatic runtime discovery step. That is intentional for tax-engine correctness. + ## Interaction Model - `question` @@ -173,6 +175,8 @@ Current review rule: - field-filled artifacts are not automatically flagged for review - overlay-rendered artifacts are marked `reviewRequired: true` +Overlay coordinates are currently a fallback heuristic and are not treated as line-perfect authoritative field maps. Overlay output must be visually reviewed before any filing/export handoff. + ## Preparation Workflow Current `prepare` implementation: diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py index e5034db..1afff11 100644 --- a/skills/us-cpa/src/us_cpa/document_extractors.py +++ b/skills/us-cpa/src/us_cpa/document_extractors.py @@ -36,10 +36,30 @@ def _facts_from_text(text: str) -> dict[str, Any]: extracted["wages"] = _parse_number(match.group(1)) if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I): extracted["federalWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 16 State wages, tips, etc\.\s+" + _NUMBER, text, re.I): + extracted["stateWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 17 State income tax\s+" + _NUMBER, text, re.I): + extracted["stateWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Social security wages\s+" + _NUMBER, text, re.I): + extracted["socialSecurityWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 5 Medicare wages and tips\s+" + _NUMBER, text, re.I): + extracted["medicareWages"] = _parse_number(match.group(1)) if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I): extracted["taxableInterest"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1a Total ordinary dividends\s+" + _NUMBER, text, re.I): + extracted["ordinaryDividends"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Gross distribution\s+" + _NUMBER, text, re.I): + extracted["retirementDistribution"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Other income\s+" + _NUMBER, text, re.I): + extracted["otherIncome"] = _parse_number(match.group(1)) if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I): extracted["businessIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Adjusted gross income\s+" + _NUMBER, text, re.I): + extracted["priorYear.adjustedGrossIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Taxable income\s+" + _NUMBER, text, re.I): + extracted["priorYear.taxableIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Refund\s+" + _NUMBER, text, re.I): + extracted["priorYear.refund"] = _parse_number(match.group(1)) return extracted diff --git a/skills/us-cpa/src/us_cpa/tax_years.py b/skills/us-cpa/src/us_cpa/tax_years.py index aef9b4f..d5ba36f 100644 --- a/skills/us-cpa/src/us_cpa/tax_years.py +++ b/skills/us-cpa/src/us_cpa/tax_years.py @@ -39,6 +39,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = { (float("inf"), 0.37), ], }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + }, }, 2025: { "standardDeduction": { @@ -75,6 +79,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = { (float("inf"), 0.37), ], }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + }, }, } diff --git a/skills/us-cpa/tests/test_document_extractors.py b/skills/us-cpa/tests/test_document_extractors.py new file mode 100644 index 0000000..54f1000 --- /dev/null +++ b/skills/us-cpa/tests/test_document_extractors.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from us_cpa.document_extractors import extract_document_facts + + +class DocumentExtractorTests(unittest.TestCase): + def test_extracts_common_w2_fields(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "w2.txt" + path.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + "Box 16 State wages, tips, etc. 50000\n" + "Box 17 State income tax 1200\n" + "Box 3 Social security wages 50000\n" + "Box 5 Medicare wages and tips 50000\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["taxpayer.fullName"], "Jane Doe") + self.assertEqual(extracted["wages"], 50000.0) + self.assertEqual(extracted["federalWithholding"], 6000.0) + self.assertEqual(extracted["stateWages"], 50000.0) + self.assertEqual(extracted["stateWithholding"], 1200.0) + self.assertEqual(extracted["socialSecurityWages"], 50000.0) + self.assertEqual(extracted["medicareWages"], 50000.0) + + def test_extracts_common_1099_patterns(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + div_path = Path(temp_dir) / "1099-div.txt" + div_path.write_text("Form 1099-DIV\nRecipient: Jane Doe\nBox 1a Total ordinary dividends 250\n") + ret_path = Path(temp_dir) / "1099-r.txt" + ret_path.write_text("Form 1099-R\nRecipient: Jane Doe\nBox 1 Gross distribution 10000\n") + misc_path = Path(temp_dir) / "1099-misc.txt" + misc_path.write_text("Form 1099-MISC\nRecipient: Jane Doe\nBox 3 Other income 900\n") + + self.assertEqual(extract_document_facts(div_path)["ordinaryDividends"], 250.0) + self.assertEqual(extract_document_facts(ret_path)["retirementDistribution"], 10000.0) + self.assertEqual(extract_document_facts(misc_path)["otherIncome"], 900.0) + + def test_extracts_prior_year_return_summary_values(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "prior-return.txt" + path.write_text( + "2024 Form 1040 Summary\n" + "Adjusted gross income 72100\n" + "Taxable income 49800\n" + "Refund 2100\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["priorYear.adjustedGrossIncome"], 72100.0) + self.assertEqual(extracted["priorYear.taxableIncome"], 49800.0) + self.assertEqual(extracted["priorYear.refund"], 2100.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_tax_years.py b/skills/us-cpa/tests/test_tax_years.py new file mode 100644 index 0000000..5bc6621 --- /dev/null +++ b/skills/us-cpa/tests/test_tax_years.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import unittest + +from us_cpa.tax_years import supported_tax_years, tax_year_rules + + +class TaxYearRuleTests(unittest.TestCase): + def test_supported_years_are_listed(self) -> None: + self.assertEqual(supported_tax_years(), [2024, 2025]) + + def test_tax_year_rules_include_source_citations(self) -> None: + rules = tax_year_rules(2025) + + self.assertIn("sourceCitations", rules) + self.assertIn("standardDeduction", rules["sourceCitations"]) + self.assertIn("ordinaryIncomeBrackets", rules["sourceCitations"]) + + def test_unsupported_tax_year_raises_clear_error(self) -> None: + with self.assertRaisesRegex(ValueError, "Unsupported tax year 2023"): + tax_year_rules(2023) + + +if __name__ == "__main__": + unittest.main() From 9f650faf8825287ccd9ce77d731c3a198b0d3460 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:31:52 -0500 Subject: [PATCH 13/14] docs: add us-cpa openclaw installation guide --- docs/us-cpa.md | 29 +++++++++++++++++++++++++++ skills/us-cpa/README.md | 35 +++++++++++++++++++++++++++++++++ skills/us-cpa/SKILL.md | 6 ++++++ skills/us-cpa/scripts/us-cpa | 7 ++++++- skills/us-cpa/tests/test_cli.py | 17 ++++++++++++++++ 5 files changed, 93 insertions(+), 1 deletion(-) diff --git a/docs/us-cpa.md b/docs/us-cpa.md index 1a6cdcb..7eb7a0e 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -17,6 +17,35 @@ Without installing, the repo-local wrapper works directly: skills/us-cpa/scripts/us-cpa --help ``` +## OpenClaw installation + +To install the skill for OpenClaw itself, copy the repo skill into the workspace skill directory and install its Python dependencies there. + +1. Sync the repo copy into the workspace: + +```bash +rsync -a --delete \ + /Users/stefano/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + /Users/stefano/.openclaw/workspace/skills/us-cpa/ +``` + +2. Create a workspace-local virtualenv and install the package: + +```bash +cd /Users/stefano/.openclaw/workspace/skills/us-cpa +python3 -m venv .venv +. .venv/bin/activate +pip install -e .[dev] +``` + +3. Verify the installed workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +The wrapper prefers `.venv/bin/python` inside the skill directory when present, so OpenClaw can run the workspace copy without relying on global Python packages. + ## Current Milestone Current implementation now includes: diff --git a/skills/us-cpa/README.md b/skills/us-cpa/README.md index 54a3140..d9512a6 100644 --- a/skills/us-cpa/README.md +++ b/skills/us-cpa/README.md @@ -10,6 +10,35 @@ From `skills/us-cpa/`: pip install -e .[dev] ``` +## OpenClaw installation + +Install the skill into the OpenClaw workspace copy, not only in the repo checkout. + +1. Sync the skill into the workspace: + +```bash +rsync -a --delete \ + /Users/stefano/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + /Users/stefano/.openclaw/workspace/skills/us-cpa/ +``` + +2. Create a skill-local virtualenv in the workspace copy: + +```bash +cd /Users/stefano/.openclaw/workspace/skills/us-cpa +python3 -m venv .venv +. .venv/bin/activate +pip install -e .[dev] +``` + +3. Run the workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +The wrapper now prefers `~/.openclaw/workspace/skills/us-cpa/.venv/bin/python` when present and falls back to `python3` otherwise. + ## Run Installed entry point: @@ -24,6 +53,12 @@ Repo-local wrapper without installation: scripts/us-cpa --help ``` +OpenClaw workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + Module execution: ```bash diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 010a6e4..8feccd7 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -40,6 +40,12 @@ skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025- skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json ``` +When OpenClaw is using the installed workspace copy, the entrypoint is: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + ## Rules - federal individual returns only in v1 diff --git a/skills/us-cpa/scripts/us-cpa b/skills/us-cpa/scripts/us-cpa index fbcef77..01fb0bd 100755 --- a/skills/us-cpa/scripts/us-cpa +++ b/skills/us-cpa/scripts/us-cpa @@ -3,6 +3,11 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +PYTHON_BIN="${SKILL_DIR}/.venv/bin/python" export PYTHONPATH="${SKILL_DIR}/src${PYTHONPATH:+:${PYTHONPATH}}" -exec python3 -m us_cpa.cli "$@" +if [[ ! -x "${PYTHON_BIN}" ]]; then + PYTHON_BIN="python3" +fi + +exec "${PYTHON_BIN}" -m us_cpa.cli "$@" diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index da341f0..4a2416d 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -40,6 +40,23 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn("scripts/us-cpa", readme) self.assertIn("python -m unittest", readme) + def test_docs_explain_openclaw_installation_flow(self) -> None: + readme = (SKILL_DIR / "README.md").read_text() + operator_doc = (SKILL_DIR.parent.parent / "docs" / "us-cpa.md").read_text() + skill_doc = (SKILL_DIR / "SKILL.md").read_text() + + self.assertIn("OpenClaw installation", readme) + self.assertIn("~/.openclaw/workspace/skills/us-cpa", readme) + self.assertIn(".venv/bin/python", readme) + self.assertIn("OpenClaw installation", operator_doc) + self.assertIn("rsync -a --delete", operator_doc) + self.assertIn("~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa", skill_doc) + + def test_wrapper_prefers_local_virtualenv_python(self) -> None: + wrapper = (SKILL_DIR / "scripts" / "us-cpa").read_text() + self.assertIn('.venv/bin/python', wrapper) + self.assertIn('PYTHON_BIN', wrapper) + def test_fixture_directories_exist(self) -> None: fixtures_dir = SKILL_DIR / "tests" / "fixtures" for name in ("irs", "facts", "documents", "returns"): From fdfc9f099684abe2a2125fca0ce329a69bc8b679 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:35:24 -0500 Subject: [PATCH 14/14] docs: use home-relative us-cpa install paths --- docs/us-cpa.md | 6 +++--- skills/us-cpa/README.md | 6 +++--- skills/us-cpa/tests/test_cli.py | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/us-cpa.md b/docs/us-cpa.md index 7eb7a0e..5c7344a 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -25,14 +25,14 @@ To install the skill for OpenClaw itself, copy the repo skill into the workspace ```bash rsync -a --delete \ - /Users/stefano/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ - /Users/stefano/.openclaw/workspace/skills/us-cpa/ + ~/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + ~/.openclaw/workspace/skills/us-cpa/ ``` 2. Create a workspace-local virtualenv and install the package: ```bash -cd /Users/stefano/.openclaw/workspace/skills/us-cpa +cd ~/.openclaw/workspace/skills/us-cpa python3 -m venv .venv . .venv/bin/activate pip install -e .[dev] diff --git a/skills/us-cpa/README.md b/skills/us-cpa/README.md index d9512a6..8979ca7 100644 --- a/skills/us-cpa/README.md +++ b/skills/us-cpa/README.md @@ -18,14 +18,14 @@ Install the skill into the OpenClaw workspace copy, not only in the repo checkou ```bash rsync -a --delete \ - /Users/stefano/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ - /Users/stefano/.openclaw/workspace/skills/us-cpa/ + ~/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + ~/.openclaw/workspace/skills/us-cpa/ ``` 2. Create a skill-local virtualenv in the workspace copy: ```bash -cd /Users/stefano/.openclaw/workspace/skills/us-cpa +cd ~/.openclaw/workspace/skills/us-cpa python3 -m venv .venv . .venv/bin/activate pip install -e .[dev] diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 4a2416d..62bffc1 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -48,8 +48,11 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn("OpenClaw installation", readme) self.assertIn("~/.openclaw/workspace/skills/us-cpa", readme) self.assertIn(".venv/bin/python", readme) + self.assertNotIn("/Users/stefano/", readme) self.assertIn("OpenClaw installation", operator_doc) self.assertIn("rsync -a --delete", operator_doc) + self.assertIn("~/", operator_doc) + self.assertNotIn("/Users/stefano/", operator_doc) self.assertIn("~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa", skill_doc) def test_wrapper_prefers_local_virtualenv_python(self) -> None: