diff --git a/README.md b/README.md index e4710fa..b050c14 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ This repository contains practical OpenClaw skills and companion integrations. I | `nordvpn-client` | Install, log in to, connect, disconnect, and verify NordVPN sessions across Linux CLI and macOS NordLynx/WireGuard backends. | `skills/nordvpn-client` | | `portainer` | Manage Portainer stacks via API (list, start/stop/restart, update, prune images). | `skills/portainer` | | `searxng` | Search through a local or self-hosted SearXNG instance for web, news, images, and more. | `skills/searxng` | +| `us-cpa` | Federal individual 1040 workflow for tax questions, case intake, preparation, review, and draft e-file-ready export. | `skills/us-cpa` | | `web-automation` | One-shot extraction plus broader browsing/scraping with Playwright-compatible CloakBrowser (auth flows, extraction, bot-protected sites). | `skills/web-automation` | ## Integrations diff --git a/docs/README.md b/docs/README.md index 339baf3..68757c5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -9,6 +9,7 @@ This folder contains detailed docs for each skill in this repository. - [`nordvpn-client`](nordvpn-client.md) — Cross-platform NordVPN install, login, connect, disconnect, and verification with Linux CLI and macOS NordLynx/WireGuard support - [`portainer`](portainer.md) — Portainer stack management (list, lifecycle, updates, image pruning) - [`searxng`](searxng.md) — Privacy-respecting metasearch via a local or self-hosted SearXNG instance +- [`us-cpa`](us-cpa.md) — Federal individual 1040 workflow for tax questions, case intake, preparation, review, and draft e-file-ready export - [`web-automation`](web-automation.md) — One-shot extraction plus Playwright-compatible CloakBrowser browser automation and scraping ## Integrations diff --git a/docs/us-cpa.md b/docs/us-cpa.md new file mode 100644 index 0000000..5c7344a --- /dev/null +++ b/docs/us-cpa.md @@ -0,0 +1,298 @@ +# us-cpa + +`us-cpa` is a Python CLI plus OpenClaw skill wrapper for U.S. federal individual tax work. + +## Standalone package usage + +From `skills/us-cpa/`: + +```bash +pip install -e .[dev] +us-cpa --help +``` + +Without installing, the repo-local wrapper works directly: + +```bash +skills/us-cpa/scripts/us-cpa --help +``` + +## OpenClaw installation + +To install the skill for OpenClaw itself, copy the repo skill into the workspace skill directory and install its Python dependencies there. + +1. Sync the repo copy into the workspace: + +```bash +rsync -a --delete \ + ~/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + ~/.openclaw/workspace/skills/us-cpa/ +``` + +2. Create a workspace-local virtualenv and install the package: + +```bash +cd ~/.openclaw/workspace/skills/us-cpa +python3 -m venv .venv +. .venv/bin/activate +pip install -e .[dev] +``` + +3. Verify the installed workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +The wrapper prefers `.venv/bin/python` inside the skill directory when present, so OpenClaw can run the workspace copy without relying on global Python packages. + +## Current Milestone + +Current implementation now includes: + +- deterministic cache layout under `~/.cache/us-cpa` by default +- `fetch-year` download flow for the bootstrap IRS corpus +- source manifest with URL, hash, authority rank, and local path traceability +- primary-law URL building for IRC and Treasury regulation escalation +- case-folder intake, document registration, and machine-usable fact extraction from JSON, text, and PDF inputs +- question workflow with conversation and memo output +- prepare workflow for the current supported multi-form 1040 package +- review workflow with findings-first output +- fillable-PDF first rendering with overlay fallback +- e-file-ready draft export payload generation + +## CLI Surface + +```bash +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown +skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 +skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json +skills/us-cpa/scripts/us-cpa render-forms --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +``` + +## Tax-Year Cache + +Default cache root: + +```text +~/.cache/us-cpa +``` + +Override for isolated runs: + +```bash +US_CPA_CACHE_DIR=/tmp/us-cpa-cache skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 +``` + +Current `fetch-year` bootstrap corpus for tax year `2025` is verified against live IRS `irs-prior` PDFs for: + +- Form 1040 +- Schedules 1, 2, 3, A, B, C, D, E, SE, and 8812 +- Forms 8949, 4562, 4797, 6251, 8606, 8863, 8889, 8959, 8960, 8995, 8995-A, 5329, 5695, and 1116 +- General Form 1040 instructions and selected schedule/form instructions + +Current bundled tax-year computation data: + +- 2024 +- 2025 + +Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added. + +Adding a new supported year is a deliberate data-table change in `tax_years.py`, not an automatic runtime discovery step. That is intentional for tax-engine correctness. + +## Interaction Model + +- `question` + - stateless by default + - optional case context +- `prepare` + - requires a case directory + - if none exists, OpenClaw should ask whether to create one and where +- `review` + - requires a case directory + - can operate on an existing or newly-created review case + +## Planned Case Layout + +```text +/ + input/ + extracted/ + return/ + output/ + reports/ + issues/ + sources/ +``` + +Current implementation writes: + +- `case-manifest.json` +- `extracted/facts.json` +- `issues/open-issues.json` + +## Intake Flow + +Current `extract-docs` supports: + +- `--create-case` +- `--case-label` +- `--facts-json ` +- repeated `--input-file ` + +Behavior: + +- creates the full case directory layout when `--create-case` is used +- copies input documents into `input/` +- stores normalized facts with source metadata in `extracted/facts.json` +- extracts machine-usable facts from JSON/text/PDF documents where supported +- appends document registry entries to `case-manifest.json` +- stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact + +## Output Contract + +- JSON by default +- markdown available with `--format markdown` +- `question` supports: + - `--style conversation` + - `--style memo` +- `question` emits answered analysis output +- `prepare` emits a prepared return package summary +- `export-efile-ready` emits a draft e-file-ready payload +- `review` emits a findings-first review result +- `fetch-year` emits a downloaded manifest location and source count + +## Question Engine + +Current `question` implementation: + +- loads the cached tax-year corpus +- searches a small IRS-first topical rule set +- returns one canonical analysis object +- renders that analysis as: + - conversational output + - memo output +- marks questions outside the current topical rule set as requiring primary-law escalation + +Current implemented topics: + +- standard deduction +- Schedule C / sole proprietorship reporting trigger +- Schedule D / capital gains reporting trigger +- Schedule E / rental income reporting trigger + +## Form Rendering + +Current rendering path: + +- official IRS PDFs from the cached tax-year corpus +- deterministic field-fill when usable AcroForm fields are present +- overlay rendering onto those official PDFs using `reportlab` + `pypdf` as fallback +- artifact manifest written to `output/artifacts.json` + +Current rendered form support: + +- field-fill support for known mapped fillable forms +- overlay generation for the current required-form set resolved by the return model + +Current review rule: + +- field-filled artifacts are not automatically flagged for review +- overlay-rendered artifacts are marked `reviewRequired: true` + +Overlay coordinates are currently a fallback heuristic and are not treated as line-perfect authoritative field maps. Overlay output must be visually reviewed before any filing/export handoff. + +## Preparation Workflow + +Current `prepare` implementation: + +- loads case facts from `extracted/facts.json` +- normalizes them into the current supported federal return model +- preserves source provenance for normalized values +- computes the current supported 1040 package +- resolves required forms across the current supported subset +- writes: + - `return/normalized-return.json` + - `output/artifacts.json` + - `reports/prepare-summary.json` + +Current supported calculation inputs: + +- `filingStatus` +- `spouse.fullName` +- `dependents` +- `wages` +- `taxableInterest` +- `businessIncome` +- `capitalGainLoss` +- `rentalIncome` +- `federalWithholding` +- `itemizedDeductions` +- `hsaContribution` +- `educationCredit` +- `foreignTaxCredit` +- `qualifiedBusinessIncome` +- `traditionalIraBasis` +- `additionalMedicareTax` +- `netInvestmentIncomeTax` +- `alternativeMinimumTax` +- `additionalTaxPenalty` +- `energyCredit` +- `depreciationExpense` +- `section1231GainLoss` + +## E-file-ready Export + +`export-efile-ready` writes: + +- `output/efile-ready.json` + +Current export behavior: + +- draft-only +- includes required forms +- includes refund or balance due summary +- includes attachment manifest +- includes unresolved issues + +## Review Workflow + +Current `review` implementation: + +- recomputes the return from current case facts +- compares stored normalized return values to recomputed values +- flags source-fact mismatches for key income fields +- flags likely omitted income when document-extracted facts support an amount the stored return omits +- checks whether required rendered artifacts are present +- flags high-complexity forms for specialist follow-up +- flags overlay-rendered artifacts as requiring human review +- sorts findings by severity + +Current render modes: + +- `--style conversation` +- `--style memo` + +## Scope Rules + +- U.S. federal individual returns only in v1 +- official IRS artifacts are the target output for compiled forms +- conflicting facts must stop the workflow for user resolution + +## Authority Ranking + +Current authority classes are ranked to preserve source hierarchy: + +- IRS forms +- IRS instructions +- IRS publications +- IRS FAQs +- Internal Revenue Code +- Treasury regulations +- other primary authority + +Later research and review flows should consume this ranking rather than inventing their own. diff --git a/skills/us-cpa/README.md b/skills/us-cpa/README.md new file mode 100644 index 0000000..8979ca7 --- /dev/null +++ b/skills/us-cpa/README.md @@ -0,0 +1,80 @@ +# us-cpa package + +Standalone Python CLI package for the `us-cpa` skill. + +## Install + +From `skills/us-cpa/`: + +```bash +pip install -e .[dev] +``` + +## OpenClaw installation + +Install the skill into the OpenClaw workspace copy, not only in the repo checkout. + +1. Sync the skill into the workspace: + +```bash +rsync -a --delete \ + ~/.openclaw/workspace/projects/stef-openclaw-skills/skills/us-cpa/ \ + ~/.openclaw/workspace/skills/us-cpa/ +``` + +2. Create a skill-local virtualenv in the workspace copy: + +```bash +cd ~/.openclaw/workspace/skills/us-cpa +python3 -m venv .venv +. .venv/bin/activate +pip install -e .[dev] +``` + +3. Run the workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +The wrapper now prefers `~/.openclaw/workspace/skills/us-cpa/.venv/bin/python` when present and falls back to `python3` otherwise. + +## Run + +Installed entry point: + +```bash +us-cpa --help +``` + +Repo-local wrapper without installation: + +```bash +scripts/us-cpa --help +``` + +OpenClaw workspace wrapper: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +Module execution: + +```bash +python3 -m us_cpa.cli --help +``` + +## Tests + +From `skills/us-cpa/`: + +```bash +PYTHONPATH=src python3 -m unittest +``` + +Or with the dev extra installed: + +```bash +python -m unittest +``` diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md new file mode 100644 index 0000000..8feccd7 --- /dev/null +++ b/skills/us-cpa/SKILL.md @@ -0,0 +1,72 @@ +--- +name: us-cpa +description: Use when answering U.S. federal individual tax questions, preparing a federal Form 1040 return package, or reviewing a draft/completed federal individual return. +--- + +# US CPA + +`us-cpa` is a Python-first federal individual tax workflow skill. The CLI is the canonical engine. Use the skill to classify the request, gather missing inputs, and invoke the CLI. + +## Modes + +- `question` + - one-off federal tax question + - case folder optional +- `prepare` + - new or existing return-preparation case + - case folder required +- `review` + - new or existing return-review case + - case folder required + +## Agent Workflow + +1. Determine whether the request is: + - question-only + - a new preparation/review case + - work on an existing case +2. If the request is `prepare` or `review` and no case folder is supplied: + - ask whether to create a new case + - ask where to store it +3. Use the bundled CLI: + +```bash +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown +skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --style memo --format markdown +skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json +``` + +When OpenClaw is using the installed workspace copy, the entrypoint is: + +```bash +~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa --help +``` + +## Rules + +- federal individual returns only in v1 +- IRS materials first; escalate to primary law only when needed +- stop on conflicting facts and ask the user to resolve the issue before continuing +- official IRS PDFs are the target compiled-form artifacts +- deterministic field-fill is the preferred render path when the official PDF exposes usable fields +- overlay-rendered forms are the fallback and must be flagged for human review + +## Output + +- JSON by default +- markdown output available with `--format markdown` +- `question` supports `--style conversation|memo` +- `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default +- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation +- `extract-docs` creates or opens a case, registers documents, stores facts, extracts machine-usable facts from JSON/text/PDF sources where possible, and stops with a structured issue if facts conflict +- `question` currently has explicit IRS-first answers for standard deduction, Schedule C, Schedule D, and Schedule E questions; other questions escalate to primary-law research with official IRC/regulation URLs +- rendered form artifacts prefer fillable-field output when possible and otherwise fall back to overlay output +- `prepare` computes the current supported federal 1040 package, preserves fact provenance in the normalized return, and writes normalized return/artifact/report files into the case directory +- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything +- `review` recomputes the return from case facts, checks artifacts, flags source-fact mismatches and likely omissions, and returns findings-first output in conversation or memo style + +For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/pyproject.toml b/skills/us-cpa/pyproject.toml new file mode 100644 index 0000000..a448842 --- /dev/null +++ b/skills/us-cpa/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "us-cpa" +version = "0.1.0" +description = "US federal individual tax workflow CLI for questions, preparation, and review." +requires-python = ">=3.9" +dependencies = [ + "pypdf>=5.0.0", + "reportlab>=4.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", +] + +[project.scripts] +us-cpa = "us_cpa.cli:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/skills/us-cpa/scripts/us-cpa b/skills/us-cpa/scripts/us-cpa new file mode 100755 index 0000000..01fb0bd --- /dev/null +++ b/skills/us-cpa/scripts/us-cpa @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SKILL_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" +PYTHON_BIN="${SKILL_DIR}/.venv/bin/python" +export PYTHONPATH="${SKILL_DIR}/src${PYTHONPATH:+:${PYTHONPATH}}" + +if [[ ! -x "${PYTHON_BIN}" ]]; then + PYTHON_BIN="python3" +fi + +exec "${PYTHON_BIN}" -m us_cpa.cli "$@" diff --git a/skills/us-cpa/src/us_cpa/__init__.py b/skills/us-cpa/src/us_cpa/__init__.py new file mode 100644 index 0000000..8014833 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/__init__.py @@ -0,0 +1,2 @@ +"""us-cpa package.""" + diff --git a/skills/us-cpa/src/us_cpa/cases.py b/skills/us-cpa/src/us_cpa/cases.py new file mode 100644 index 0000000..68cc90c --- /dev/null +++ b/skills/us-cpa/src/us_cpa/cases.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +import hashlib +import json +import shutil +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from us_cpa.document_extractors import extract_document_facts + + +CASE_SUBDIRECTORIES = ( + "input", + "extracted", + "return", + "output", + "reports", + "issues", + "sources", +) + + +def _timestamp() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _sha256_path(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(65536), b""): + digest.update(chunk) + return digest.hexdigest() + + +class CaseConflictError(Exception): + def __init__(self, issue: dict[str, Any]) -> None: + super().__init__(issue["message"]) + self.issue = issue + + +@dataclass +class CaseManager: + case_dir: Path + + def __post_init__(self) -> None: + self.case_dir = self.case_dir.expanduser().resolve() + + @property + def manifest_path(self) -> Path: + return self.case_dir / "case-manifest.json" + + @property + def facts_path(self) -> Path: + return self.case_dir / "extracted" / "facts.json" + + @property + def issues_path(self) -> Path: + return self.case_dir / "issues" / "open-issues.json" + + def create_case(self, *, case_label: str, tax_year: int) -> dict[str, Any]: + self.case_dir.mkdir(parents=True, exist_ok=True) + for name in CASE_SUBDIRECTORIES: + (self.case_dir / name).mkdir(exist_ok=True) + + manifest = { + "caseLabel": case_label, + "taxYear": tax_year, + "createdAt": _timestamp(), + "updatedAt": _timestamp(), + "status": "open", + "documents": [], + } + self.manifest_path.write_text(json.dumps(manifest, indent=2)) + if not self.facts_path.exists(): + self.facts_path.write_text(json.dumps({"facts": {}}, indent=2)) + if not self.issues_path.exists(): + self.issues_path.write_text(json.dumps({"issues": []}, indent=2)) + return manifest + + def load_manifest(self) -> dict[str, Any]: + return json.loads(self.manifest_path.read_text()) + + def _load_facts(self) -> dict[str, Any]: + return json.loads(self.facts_path.read_text()) + + def _write_manifest(self, manifest: dict[str, Any]) -> None: + manifest["updatedAt"] = _timestamp() + self.manifest_path.write_text(json.dumps(manifest, indent=2)) + + def _write_facts(self, facts: dict[str, Any]) -> None: + self.facts_path.write_text(json.dumps(facts, indent=2)) + + def _write_issue(self, issue: dict[str, Any]) -> None: + current = json.loads(self.issues_path.read_text()) + current["issues"].append(issue) + self.issues_path.write_text(json.dumps(current, indent=2)) + + def _record_fact( + self, + facts_payload: dict[str, Any], + *, + field: str, + value: Any, + source_type: str, + source_name: str, + tax_year: int, + ) -> None: + existing = facts_payload["facts"].get(field) + if existing and existing["value"] != value: + issue = { + "status": "needs_resolution", + "issueType": "fact_conflict", + "field": field, + "existingValue": existing["value"], + "newValue": value, + "message": f"Conflicting values for {field}. Resolve before continuing.", + "createdAt": _timestamp(), + "taxYear": tax_year, + } + self._write_issue(issue) + raise CaseConflictError(issue) + + captured_at = _timestamp() + source_entry = { + "sourceType": source_type, + "sourceName": source_name, + "capturedAt": captured_at, + } + if existing: + existing["sources"].append(source_entry) + return + + facts_payload["facts"][field] = { + "value": value, + "sourceType": source_type, + "capturedAt": captured_at, + "sources": [source_entry], + } + + def intake( + self, + *, + tax_year: int, + user_facts: dict[str, Any], + document_paths: list[Path], + ) -> dict[str, Any]: + manifest = self.load_manifest() + if manifest["taxYear"] != tax_year: + raise ValueError( + f"Case tax year {manifest['taxYear']} does not match requested tax year {tax_year}." + ) + + registered_documents = [] + for source_path in document_paths: + source_path = source_path.expanduser().resolve() + destination = self.case_dir / "input" / source_path.name + shutil.copy2(source_path, destination) + document_entry = { + "name": source_path.name, + "sourcePath": str(source_path), + "storedPath": str(destination), + "sha256": _sha256_path(destination), + "registeredAt": _timestamp(), + } + manifest["documents"].append(document_entry) + registered_documents.append(document_entry) + + facts_payload = self._load_facts() + for document_entry in registered_documents: + extracted = extract_document_facts(Path(document_entry["storedPath"])) + document_entry["extractedFacts"] = extracted + for field, value in extracted.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="document_extract", + source_name=document_entry["name"], + tax_year=tax_year, + ) + + for field, value in user_facts.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="user_statement", + source_name="interactive-intake", + tax_year=tax_year, + ) + + self._write_manifest(manifest) + self._write_facts(facts_payload) + return { + "status": "accepted", + "caseDir": str(self.case_dir), + "taxYear": tax_year, + "registeredDocuments": registered_documents, + "factCount": len(facts_payload["facts"]), + } diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py new file mode 100644 index 0000000..71f3602 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -0,0 +1,243 @@ +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +from us_cpa.cases import CaseConflictError, CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms +from us_cpa.questions import QuestionEngine, render_analysis, render_memo +from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + +COMMANDS = ( + "question", + "prepare", + "review", + "fetch-year", + "extract-docs", + "render-forms", + "export-efile-ready", +) + + +def _add_common_arguments( + parser: argparse.ArgumentParser, *, include_tax_year: bool = True +) -> None: + if include_tax_year: + parser.add_argument("--tax-year", type=int, default=None) + parser.add_argument("--case-dir", default=None) + parser.add_argument("--format", choices=("json", "markdown"), default="json") + + +def _emit(payload: dict[str, Any], output_format: str) -> int: + if output_format == "markdown": + lines = [f"# {payload['command']}"] + for key, value in payload.items(): + if key == "command": + continue + lines.append(f"- **{key}**: {value}") + print("\n".join(lines)) + else: + print(json.dumps(payload, indent=2)) + return 0 + + +def _require_case_dir(args: argparse.Namespace) -> Path: + if not args.case_dir: + raise SystemExit("A case directory is required for this command.") + return Path(args.case_dir).expanduser().resolve() + + +def _load_json_file(path_value: str | None) -> dict[str, Any]: + if not path_value: + return {} + return json.loads(Path(path_value).expanduser().resolve().read_text()) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="us-cpa", + description="US federal individual tax workflow CLI.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + question = subparsers.add_parser("question", help="Answer a tax question.") + _add_common_arguments(question) + question.add_argument("--question", required=True) + question.add_argument("--style", choices=("conversation", "memo"), default="conversation") + + prepare = subparsers.add_parser("prepare", help="Prepare a return case.") + _add_common_arguments(prepare) + + review = subparsers.add_parser("review", help="Review a return case.") + _add_common_arguments(review) + review.add_argument("--style", choices=("conversation", "memo"), default="conversation") + + fetch_year = subparsers.add_parser( + "fetch-year", help="Fetch tax-year forms and instructions." + ) + _add_common_arguments(fetch_year, include_tax_year=False) + fetch_year.add_argument("--tax-year", type=int, required=True) + + extract_docs = subparsers.add_parser( + "extract-docs", help="Extract facts from case documents." + ) + _add_common_arguments(extract_docs) + extract_docs.add_argument("--create-case", action="store_true") + extract_docs.add_argument("--case-label") + extract_docs.add_argument("--facts-json") + extract_docs.add_argument("--input-file", action="append", default=[]) + + render_forms = subparsers.add_parser( + "render-forms", help="Render compiled IRS forms." + ) + _add_common_arguments(render_forms) + + export_efile = subparsers.add_parser( + "export-efile-ready", help="Export an e-file-ready payload." + ) + _add_common_arguments(export_efile) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + + if args.command == "question": + corpus = TaxYearCorpus() + engine = QuestionEngine(corpus=corpus) + case_facts: dict[str, Any] = {} + if args.case_dir: + manager = CaseManager(Path(args.case_dir)) + if manager.facts_path.exists(): + case_facts = { + key: value["value"] + for key, value in json.loads(manager.facts_path.read_text())["facts"].items() + } + analysis = engine.answer( + question=args.question, + tax_year=args.tax_year, + case_facts=case_facts, + ) + payload = { + "command": "question", + "format": args.format, + "style": args.style, + "taxYear": args.tax_year, + "caseDir": args.case_dir, + "question": args.question, + "status": "answered", + "analysis": analysis, + } + payload["rendered"] = ( + render_memo(analysis) if args.style == "memo" else render_analysis(analysis) + ) + if args.format == "markdown": + print(payload["rendered"]) + return 0 + return _emit(payload, args.format) + + if args.command == "extract-docs": + case_dir = _require_case_dir(args) + manager = CaseManager(case_dir) + if args.create_case: + if not args.case_label: + raise SystemExit("--case-label is required when --create-case is used.") + manager.create_case(case_label=args.case_label, tax_year=args.tax_year) + elif not manager.manifest_path.exists(): + raise SystemExit("Case manifest not found. Use --create-case for a new case.") + + try: + result = manager.intake( + tax_year=args.tax_year, + user_facts=_load_json_file(args.facts_json), + document_paths=[ + Path(path_value).expanduser().resolve() for path_value in args.input_file + ], + ) + except CaseConflictError as exc: + print(json.dumps(exc.issue, indent=2)) + return 1 + payload = { + "command": args.command, + "format": args.format, + **result, + } + return _emit(payload, args.format) + + if args.command == "prepare": + case_dir = _require_case_dir(args) + payload = { + "command": args.command, + "format": args.format, + **PrepareEngine().prepare_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "render-forms": + case_dir = _require_case_dir(args) + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = render_case_forms(case_dir, TaxYearCorpus(), normalized) + payload = { + "command": "render-forms", + "format": args.format, + "taxYear": manifest["taxYear"], + "status": "rendered", + **artifacts, + } + return _emit(payload, args.format) + + if args.command == "export-efile-ready": + case_dir = _require_case_dir(args) + payload = { + "command": "export-efile-ready", + "format": args.format, + **EfileExporter().export_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "review": + case_dir = _require_case_dir(args) + review_payload = ReviewEngine().review_case(case_dir) + payload = { + "command": "review", + "format": args.format, + "style": args.style, + **review_payload, + } + payload["rendered"] = ( + render_review_memo(review_payload) + if args.style == "memo" + else render_review_summary(review_payload) + ) + if args.format == "markdown": + print(payload["rendered"]) + return 0 + return _emit(payload, args.format) + + if args.command == "fetch-year": + corpus = TaxYearCorpus() + manifest = corpus.download_catalog(args.tax_year, bootstrap_irs_catalog(args.tax_year)) + payload = { + "command": "fetch-year", + "format": args.format, + "taxYear": args.tax_year, + "status": "downloaded", + "sourceCount": manifest["sourceCount"], + "manifestPath": corpus.paths_for_year(args.tax_year).manifest_path.as_posix(), + } + return _emit(payload, args.format) + + parser.error(f"Unsupported command: {args.command}") + return 2 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py new file mode 100644 index 0000000..1afff11 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/document_extractors.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any + +from pypdf import PdfReader + + +_NUMBER = r"(-?\d+(?:,\d{3})*(?:\.\d+)?)" + + +def _parse_number(raw: str) -> float: + return float(raw.replace(",", "")) + + +def _extract_text(path: Path) -> str: + suffix = path.suffix.lower() + if suffix in {".txt", ".md"}: + return path.read_text() + if suffix == ".pdf": + reader = PdfReader(str(path)) + return "\n".join((page.extract_text() or "") for page in reader.pages) + return "" + + +def _facts_from_text(text: str) -> dict[str, Any]: + extracted: dict[str, Any] = {} + + if match := re.search(r"Employee:\s*(.+)", text): + extracted["taxpayer.fullName"] = match.group(1).strip() + if match := re.search(r"Recipient:\s*(.+)", text): + extracted.setdefault("taxpayer.fullName", match.group(1).strip()) + if match := re.search(r"Box 1 Wages, tips, other compensation\s+" + _NUMBER, text, re.I): + extracted["wages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I): + extracted["federalWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 16 State wages, tips, etc\.\s+" + _NUMBER, text, re.I): + extracted["stateWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 17 State income tax\s+" + _NUMBER, text, re.I): + extracted["stateWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Social security wages\s+" + _NUMBER, text, re.I): + extracted["socialSecurityWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 5 Medicare wages and tips\s+" + _NUMBER, text, re.I): + extracted["medicareWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I): + extracted["taxableInterest"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1a Total ordinary dividends\s+" + _NUMBER, text, re.I): + extracted["ordinaryDividends"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Gross distribution\s+" + _NUMBER, text, re.I): + extracted["retirementDistribution"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Other income\s+" + _NUMBER, text, re.I): + extracted["otherIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I): + extracted["businessIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Adjusted gross income\s+" + _NUMBER, text, re.I): + extracted["priorYear.adjustedGrossIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Taxable income\s+" + _NUMBER, text, re.I): + extracted["priorYear.taxableIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Refund\s+" + _NUMBER, text, re.I): + extracted["priorYear.refund"] = _parse_number(match.group(1)) + + return extracted + + +def extract_document_facts(path: Path) -> dict[str, Any]: + suffix = path.suffix.lower() + if suffix == ".json": + payload = json.loads(path.read_text()) + if isinstance(payload, dict): + return payload + return {} + return _facts_from_text(_extract_text(path)) diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py new file mode 100644 index 0000000..1338b83 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/prepare.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from us_cpa.cases import CaseManager +from us_cpa.renderers import render_case_forms +from us_cpa.returns import normalize_case_facts +from us_cpa.sources import TaxYearCorpus + + +def _load_case_facts(case_dir: Path) -> dict[str, Any]: + facts_path = case_dir / "extracted" / "facts.json" + payload = json.loads(facts_path.read_text()) + facts = {key: value["value"] for key, value in payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in payload["facts"].items() + } + return facts + + + +class PrepareEngine: + def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None: + self.corpus = corpus or TaxYearCorpus() + + def prepare_case(self, case_dir: Path) -> dict[str, Any]: + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + facts = _load_case_facts(manager.case_dir) + normalized = normalize_case_facts(facts, manifest["taxYear"]) + normalized_path = manager.case_dir / "return" / "normalized-return.json" + normalized_path.write_text(json.dumps(normalized, indent=2)) + + artifacts = render_case_forms(manager.case_dir, self.corpus, normalized) + unresolved_issues = json.loads(manager.issues_path.read_text())["issues"] + + summary = { + "requiredForms": normalized["requiredForms"], + "reviewRequiredArtifacts": [ + artifact["formCode"] for artifact in artifacts["artifacts"] if artifact["reviewRequired"] + ], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + "unresolvedIssueCount": len(unresolved_issues), + } + result = { + "status": "prepared", + "caseDir": str(manager.case_dir), + "taxYear": manifest["taxYear"], + "normalizedReturnPath": str(normalized_path), + "artifactManifestPath": str(manager.case_dir / "output" / "artifacts.json"), + "summary": summary, + } + (manager.case_dir / "reports" / "prepare-summary.json").write_text(json.dumps(result, indent=2)) + return result + + +class EfileExporter: + def export_case(self, case_dir: Path) -> dict[str, Any]: + case_dir = Path(case_dir).expanduser().resolve() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = json.loads((case_dir / "output" / "artifacts.json").read_text()) + issues = json.loads((case_dir / "issues" / "open-issues.json").read_text())["issues"] + payload = { + "status": "draft" if issues or any(a["reviewRequired"] for a in artifacts["artifacts"]) else "ready", + "taxYear": normalized["taxYear"], + "returnSummary": { + "requiredForms": normalized["requiredForms"], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + }, + "attachments": artifacts["artifacts"], + "unresolvedIssues": issues, + } + output_path = case_dir / "output" / "efile-ready.json" + output_path.write_text(json.dumps(payload, indent=2)) + return payload diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py new file mode 100644 index 0000000..3ee1502 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from us_cpa.sources import TaxYearCorpus, build_primary_law_authorities + + +TOPIC_RULES = [ + { + "issue": "standard_deduction", + "keywords": ("standard deduction",), + "authority_slugs": ("i1040gi",), + "answer_by_status": { + "single": "$15,750", + "married_filing_jointly": "$31,500", + "head_of_household": "$23,625", + }, + "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.", + "confidence": "high", + }, + { + "issue": "schedule_c_required", + "keywords": ("schedule c", "sole proprietor", "self-employment"), + "authority_slugs": ("f1040sc", "i1040sc"), + "answer": "Schedule C is generally required when a taxpayer reports sole proprietorship business income or expenses.", + "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.", + "confidence": "medium", + }, + { + "issue": "schedule_d_required", + "keywords": ("schedule d", "capital gains"), + "authority_slugs": ("f1040sd", "i1040sd", "f8949", "i8949"), + "answer": "Schedule D is generally required when a taxpayer reports capital gains or losses, often alongside Form 8949.", + "summary": "Capital gains and losses generally flow through Schedule D, with Form 8949 supporting detail when required.", + "confidence": "medium", + }, + { + "issue": "schedule_e_required", + "keywords": ("schedule e", "rental income"), + "authority_slugs": ("f1040se", "i1040se"), + "answer": "Schedule E is generally required when a taxpayer reports rental real-estate income or expenses.", + "summary": "Rental income and expenses generally belong on Schedule E.", + "confidence": "medium", + }, +] + + +RISK_BY_CONFIDENCE = { + "high": "low", + "medium": "medium", + "low": "high", +} + + +def _normalize_question(question: str) -> str: + return question.strip().lower() + + +def _filing_status_label(status: str) -> str: + return status.replace("_", " ").title() + + +@dataclass +class QuestionEngine: + corpus: TaxYearCorpus + + def _manifest(self, tax_year: int) -> dict[str, Any]: + path = self.corpus.paths_for_year(tax_year).manifest_path + if not path.exists(): + raise FileNotFoundError( + f"Tax year {tax_year} corpus not found at {path}. Run fetch-year first." + ) + return json.loads(path.read_text()) + + def _authorities_for(self, manifest: dict[str, Any], slugs: tuple[str, ...]) -> list[dict[str, Any]]: + found = [] + sources = {item["slug"]: item for item in manifest["sources"]} + for slug in slugs: + if slug in sources: + source = sources[slug] + found.append( + { + "slug": source["slug"], + "title": source["title"], + "sourceClass": source["sourceClass"], + "url": source["url"], + "localPath": source["localPath"], + "authorityRank": source["authorityRank"], + } + ) + return found + + def answer(self, *, question: str, tax_year: int, case_facts: dict[str, Any]) -> dict[str, Any]: + manifest = self._manifest(tax_year) + normalized = _normalize_question(question) + facts_used = [{"field": key, "value": value} for key, value in sorted(case_facts.items())] + + for rule in TOPIC_RULES: + if all(keyword in normalized for keyword in rule["keywords"]): + authorities = self._authorities_for(manifest, rule["authority_slugs"]) + if rule["issue"] == "standard_deduction": + filing_status = case_facts.get("filingStatus", "single") + answer = rule["answer_by_status"].get(filing_status, rule["answer_by_status"]["single"]) + summary = rule["summary_template"].format( + filing_status_label=_filing_status_label(filing_status), + answer=answer, + tax_year=tax_year, + ) + else: + answer = rule["answer"] + summary = rule["summary"] + + return { + "issue": rule["issue"], + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [], + "authorities": authorities, + "conclusion": {"answer": answer, "summary": summary}, + "confidence": rule["confidence"], + "riskLevel": RISK_BY_CONFIDENCE[rule["confidence"]], + "followUpQuestions": [], + "primaryLawRequired": False, + } + + return { + "issue": "requires_primary_law_escalation", + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [ + "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently." + ], + "authorities": build_primary_law_authorities(question), + "conclusion": { + "answer": "Insufficient IRS-form and instruction support for a confident answer.", + "summary": "This question needs primary-law analysis before a reliable answer can be given.", + }, + "confidence": "low", + "riskLevel": "high", + "followUpQuestions": [ + "What facts drive the section-level issue?", + "Is there an existing return position or drafted treatment to review?", + ], + "primaryLawRequired": True, + } + + +def render_analysis(analysis: dict[str, Any]) -> str: + lines = [analysis["conclusion"]["summary"]] + lines.append( + f"Confidence: {analysis['confidence']}. Risk: {analysis['riskLevel']}." + ) + if analysis["factsUsed"]: + facts = ", ".join(f"{item['field']}={item['value']}" for item in analysis["factsUsed"]) + lines.append(f"Facts used: {facts}.") + if analysis["authorities"]: + titles = "; ".join(item["title"] for item in analysis["authorities"]) + lines.append(f"Authorities: {titles}.") + if analysis["missingFacts"]: + lines.append(f"Open items: {' '.join(analysis['missingFacts'])}") + return " ".join(lines) + + +def render_memo(analysis: dict[str, Any]) -> str: + lines = [ + "# Tax Memo", + "", + f"## Issue\n{analysis['issue']}", + "", + "## Facts", + ] + if analysis["factsUsed"]: + for item in analysis["factsUsed"]: + lines.append(f"- {item['field']}: {item['value']}") + else: + lines.append("- No case-specific facts supplied.") + lines.extend(["", "## Authorities"]) + if analysis["authorities"]: + for authority in analysis["authorities"]: + lines.append(f"- {authority['title']}") + else: + lines.append("- Primary-law escalation required.") + lines.extend( + [ + "", + "## Analysis", + analysis["conclusion"]["summary"], + f"Confidence: {analysis['confidence']}", + f"Risk level: {analysis['riskLevel']}", + "", + "## Conclusion", + analysis["conclusion"]["answer"], + ] + ) + if analysis["missingFacts"]: + lines.extend(["", "## Open Items"]) + for item in analysis["missingFacts"]: + lines.append(f"- {item}") + return "\n".join(lines) diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py new file mode 100644 index 0000000..f41c16a --- /dev/null +++ b/skills/us-cpa/src/us_cpa/renderers.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import json +from io import BytesIO +from pathlib import Path +from typing import Any + +from pypdf import PdfReader, PdfWriter +from reportlab.pdfgen import canvas + +from us_cpa.sources import TaxYearCorpus + + +FORM_TEMPLATES = { + "f1040": "f1040", + "f1040sb": "f1040sb", + "f1040sc": "f1040sc", + "f1040se": "f1040se", + "f1040s1": "f1040s1", +} + + +OVERLAY_FIELDS = { + "f1040": [ + (72, 725, lambda data: f"Taxpayer: {data['taxpayer']['fullName']}"), + (72, 705, lambda data: f"Filing status: {data['filingStatus']}"), + (72, 685, lambda data: f"Wages: {data['income']['wages']:.2f}"), + (72, 665, lambda data: f"Taxable interest: {data['income']['taxableInterest']:.2f}"), + (72, 645, lambda data: f"AGI: {data['totals']['adjustedGrossIncome']:.2f}"), + (72, 625, lambda data: f"Standard deduction: {data['deductions']['standardDeduction']:.2f}"), + (72, 605, lambda data: f"Taxable income: {data['totals']['taxableIncome']:.2f}"), + (72, 585, lambda data: f"Total tax: {data['taxes']['totalTax']:.2f}"), + (72, 565, lambda data: f"Withholding: {data['payments']['federalWithholding']:.2f}"), + (72, 545, lambda data: f"Refund: {data['totals']['refund']:.2f}"), + (72, 525, lambda data: f"Balance due: {data['totals']['balanceDue']:.2f}"), + ], +} + + +FIELD_FILL_VALUES = { + "f1040": lambda data: { + "taxpayer_full_name": data["taxpayer"]["fullName"], + "filing_status": data["filingStatus"], + "wages": f"{data['income']['wages']:.2f}", + "taxable_interest": f"{data['income']['taxableInterest']:.2f}", + } +} + + +def _field_fill_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> bool: + reader = PdfReader(str(template_path)) + fields = reader.get_fields() or {} + values = FIELD_FILL_VALUES.get(form_code, lambda _: {})(normalized) + matched = {key: value for key, value in values.items() if key in fields} + if not matched: + return False + + writer = PdfWriter(clone_from=str(template_path)) + writer.update_page_form_field_values(writer.pages[0], matched, auto_regenerate=False) + writer.set_need_appearances_writer() + with output_path.open("wb") as handle: + writer.write(handle) + return True + + +def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None: + reader = PdfReader(str(template_path)) + writer = PdfWriter(clone_from=str(template_path)) + + page = writer.pages[0] + width = float(page.mediabox.width) + height = float(page.mediabox.height) + buffer = BytesIO() + pdf = canvas.Canvas(buffer, pagesize=(width, height)) + for x, y, getter in OVERLAY_FIELDS.get(form_code, []): + pdf.drawString(x, y, getter(normalized)) + pdf.save() + buffer.seek(0) + overlay = PdfReader(buffer) + page.merge_page(overlay.pages[0]) + with output_path.open("wb") as handle: + writer.write(handle) + + +def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[str, Any]) -> dict[str, Any]: + output_dir = case_dir / "output" / "forms" + output_dir.mkdir(parents=True, exist_ok=True) + irs_dir = corpus.paths_for_year(normalized["taxYear"]).irs_dir + + artifacts = [] + for form_code in normalized["requiredForms"]: + template_slug = FORM_TEMPLATES.get(form_code) + if template_slug is None: + continue + template_path = irs_dir / f"{template_slug}.pdf" + output_path = output_dir / f"{form_code}.pdf" + render_method = "overlay" + review_required = True + if _field_fill_page(template_path, output_path, form_code, normalized): + render_method = "field_fill" + review_required = False + else: + _overlay_page(template_path, output_path, form_code, normalized) + artifacts.append( + { + "formCode": form_code, + "templatePath": str(template_path), + "outputPath": str(output_path), + "renderMethod": render_method, + "reviewRequired": review_required, + } + ) + + artifact_manifest = { + "taxYear": normalized["taxYear"], + "artifactCount": len(artifacts), + "artifacts": artifacts, + } + (case_dir / "output" / "artifacts.json").write_text(json.dumps(artifact_manifest, indent=2)) + return artifact_manifest diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py new file mode 100644 index 0000000..7f0bcc5 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/returns.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from typing import Any + +from us_cpa.tax_years import tax_year_rules + + +def _as_float(value: Any) -> float: + if value in (None, ""): + return 0.0 + return float(value) + + +def _fact_metadata(facts: dict[str, Any]) -> dict[str, Any]: + return facts.get("_factMetadata", {}) + + +def _provenance_for(field: str, metadata: dict[str, Any]) -> dict[str, Any]: + entry = metadata.get(field, {}) + return {"sources": list(entry.get("sources", []))} + + +def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float: + taxable = max(0.0, amount) + brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status] + lower = 0.0 + tax = 0.0 + for upper, rate in brackets: + if taxable <= lower: + break + portion = min(taxable, upper) - lower + tax += portion * rate + lower = upper + return round(tax, 2) + + +def resolve_required_forms(normalized: dict[str, Any]) -> list[str]: + forms = ["f1040"] + if normalized["income"]["taxableInterest"] > 1500: + forms.append("f1040sb") + if normalized["income"]["businessIncome"] != 0: + forms.extend(["f1040sc", "f1040sse", "f1040s1", "f8995"]) + if normalized["income"]["capitalGainLoss"] != 0: + forms.extend(["f1040sd", "f8949"]) + if normalized["income"]["rentalIncome"] != 0: + forms.extend(["f1040se", "f1040s1"]) + if normalized["deductions"]["deductionType"] == "itemized": + forms.append("f1040sa") + if normalized["adjustments"]["hsaContribution"] != 0: + forms.append("f8889") + if normalized["credits"]["educationCredit"] != 0: + forms.append("f8863") + if normalized["credits"]["foreignTaxCredit"] != 0: + forms.append("f1116") + if normalized["business"]["qualifiedBusinessIncome"] != 0 and "f8995" not in forms: + forms.append("f8995") + if normalized["basis"]["traditionalIraBasis"] != 0: + forms.append("f8606") + if normalized["taxes"]["additionalMedicareTax"] != 0: + forms.append("f8959") + if normalized["taxes"]["netInvestmentIncomeTax"] != 0: + forms.append("f8960") + if normalized["taxes"]["alternativeMinimumTax"] != 0: + forms.append("f6251") + if normalized["taxes"]["additionalTaxPenalty"] != 0: + forms.append("f5329") + if normalized["credits"]["energyCredit"] != 0: + forms.append("f5695") + if normalized["depreciation"]["depreciationExpense"] != 0: + forms.append("f4562") + if normalized["assetSales"]["section1231GainLoss"] != 0: + forms.append("f4797") + return list(dict.fromkeys(forms)) + + +def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]: + rules = tax_year_rules(tax_year) + metadata = _fact_metadata(facts) + filing_status = facts.get("filingStatus", "single") + wages = _as_float(facts.get("wages")) + interest = _as_float(facts.get("taxableInterest")) + business_income = _as_float(facts.get("businessIncome")) + capital_gain_loss = _as_float(facts.get("capitalGainLoss")) + rental_income = _as_float(facts.get("rentalIncome")) + withholding = _as_float(facts.get("federalWithholding")) + itemized_deductions = _as_float(facts.get("itemizedDeductions")) + hsa_contribution = _as_float(facts.get("hsaContribution")) + education_credit = _as_float(facts.get("educationCredit")) + foreign_tax_credit = _as_float(facts.get("foreignTaxCredit")) + qualified_business_income = _as_float(facts.get("qualifiedBusinessIncome")) + traditional_ira_basis = _as_float(facts.get("traditionalIraBasis")) + additional_medicare_tax = _as_float(facts.get("additionalMedicareTax")) + net_investment_income_tax = _as_float(facts.get("netInvestmentIncomeTax")) + alternative_minimum_tax = _as_float(facts.get("alternativeMinimumTax")) + additional_tax_penalty = _as_float(facts.get("additionalTaxPenalty")) + energy_credit = _as_float(facts.get("energyCredit")) + depreciation_expense = _as_float(facts.get("depreciationExpense")) + section1231_gain_loss = _as_float(facts.get("section1231GainLoss")) + + adjusted_gross_income = wages + interest + business_income + capital_gain_loss + rental_income + standard_deduction = rules["standardDeduction"][filing_status] + deduction_type = "itemized" if itemized_deductions > standard_deduction else "standard" + deduction_amount = itemized_deductions if deduction_type == "itemized" else standard_deduction + taxable_income = max(0.0, adjusted_gross_income - deduction_amount) + income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year) + self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2) + total_tax = round( + income_tax + + self_employment_tax + + additional_medicare_tax + + net_investment_income_tax + + alternative_minimum_tax + + additional_tax_penalty, + 2, + ) + total_payments = withholding + total_credits = round(education_credit + foreign_tax_credit + energy_credit, 2) + refund = round(max(0.0, total_payments + total_credits - total_tax), 2) + balance_due = round(max(0.0, total_tax - total_payments - total_credits), 2) + + normalized = { + "taxYear": tax_year, + "taxpayer": { + "fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"), + }, + "spouse": { + "fullName": facts.get("spouse.fullName", ""), + }, + "dependents": list(facts.get("dependents", [])), + "filingStatus": filing_status, + "income": { + "wages": wages, + "taxableInterest": interest, + "businessIncome": business_income, + "capitalGainLoss": capital_gain_loss, + "rentalIncome": rental_income, + }, + "adjustments": { + "hsaContribution": hsa_contribution, + }, + "payments": { + "federalWithholding": withholding, + }, + "deductions": { + "standardDeduction": standard_deduction, + "itemizedDeductions": itemized_deductions, + "deductionType": deduction_type, + "deductionAmount": deduction_amount, + }, + "credits": { + "educationCredit": education_credit, + "foreignTaxCredit": foreign_tax_credit, + "energyCredit": energy_credit, + }, + "taxes": { + "incomeTax": income_tax, + "selfEmploymentTax": self_employment_tax, + "additionalMedicareTax": additional_medicare_tax, + "netInvestmentIncomeTax": net_investment_income_tax, + "alternativeMinimumTax": alternative_minimum_tax, + "additionalTaxPenalty": additional_tax_penalty, + "totalTax": total_tax, + }, + "business": { + "qualifiedBusinessIncome": qualified_business_income, + }, + "basis": { + "traditionalIraBasis": traditional_ira_basis, + }, + "depreciation": { + "depreciationExpense": depreciation_expense, + }, + "assetSales": { + "section1231GainLoss": section1231_gain_loss, + }, + "totals": { + "adjustedGrossIncome": round(adjusted_gross_income, 2), + "taxableIncome": round(taxable_income, 2), + "totalPayments": round(total_payments, 2), + "totalCredits": total_credits, + "refund": refund, + "balanceDue": balance_due, + }, + "provenance": { + "income.wages": _provenance_for("wages", metadata), + "income.taxableInterest": _provenance_for("taxableInterest", metadata), + "income.businessIncome": _provenance_for("businessIncome", metadata), + "income.capitalGainLoss": _provenance_for("capitalGainLoss", metadata), + "income.rentalIncome": _provenance_for("rentalIncome", metadata), + "payments.federalWithholding": _provenance_for("federalWithholding", metadata), + }, + } + normalized["requiredForms"] = resolve_required_forms(normalized) + return normalized diff --git a/skills/us-cpa/src/us_cpa/review.py b/skills/us-cpa/src/us_cpa/review.py new file mode 100644 index 0000000..8e50f85 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/review.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from us_cpa.returns import normalize_case_facts +from us_cpa.sources import TaxYearCorpus + + +def _severity_rank(severity: str) -> int: + return {"high": 0, "medium": 1, "low": 2}[severity] + + +class ReviewEngine: + def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None: + self.corpus = corpus or TaxYearCorpus() + + def review_case(self, case_dir: Path) -> dict[str, Any]: + case_dir = Path(case_dir).expanduser().resolve() + manifest = json.loads((case_dir / "case-manifest.json").read_text()) + stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text()) + facts = {key: value["value"] for key, value in facts_payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in facts_payload["facts"].items() + } + recomputed = normalize_case_facts(facts, manifest["taxYear"]) + artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text()) + + findings: list[dict[str, Any]] = [] + if stored_return["totals"]["adjustedGrossIncome"] != recomputed["totals"]["adjustedGrossIncome"]: + findings.append( + { + "severity": "high", + "title": "Adjusted gross income mismatch", + "explanation": "Stored adjusted gross income does not match the recomputed return from case facts.", + "suggestedAction": f"Update AGI to {recomputed['totals']['adjustedGrossIncome']:.2f} on Form 1040 line 11.", + "authorities": [ + {"title": "Instructions for Form 1040 and Schedules 1-3", "sourceClass": "irs_instructions"} + ], + } + ) + + for field, label in ( + ("wages", "wages"), + ("taxableInterest", "taxable interest"), + ("businessIncome", "business income"), + ("capitalGainLoss", "capital gains or losses"), + ("rentalIncome", "rental income"), + ): + stored_value = stored_return["income"].get(field, 0.0) + recomputed_value = recomputed["income"].get(field, 0.0) + sources = recomputed.get("provenance", {}).get(f"income.{field}", {}).get("sources", []) + has_document_source = any(item.get("sourceType") == "document_extract" for item in sources) + if stored_value != recomputed_value: + findings.append( + { + "severity": "high" if has_document_source else "medium", + "title": f"Source fact mismatch for {label}", + "explanation": f"Stored return reports {stored_value:.2f} for {label}, but case facts support {recomputed_value:.2f}.", + "suggestedAction": f"Reconcile {label} to {recomputed_value:.2f} before treating the return as final.", + "authorities": [ + {"title": "Case fact registry", "sourceClass": "irs_form"} + ], + } + ) + if stored_value == 0 and recomputed_value > 0 and has_document_source: + findings.append( + { + "severity": "high", + "title": f"Likely omitted {label}", + "explanation": f"Document-extracted facts support {recomputed_value:.2f} of {label}, but the stored return reports none.", + "suggestedAction": f"Add {label} to the return and regenerate the required forms.", + "authorities": [ + {"title": "Case document extraction", "sourceClass": "irs_form"} + ], + } + ) + + rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]} + for required_form in recomputed["requiredForms"]: + if required_form not in rendered_forms: + findings.append( + { + "severity": "high", + "title": f"Missing rendered artifact for {required_form}", + "explanation": "The return requires this form, but no rendered artifact is present in the artifact manifest.", + "suggestedAction": f"Render and review {required_form} before treating the package as complete.", + "authorities": [{"title": "Supported form manifest", "sourceClass": "irs_form"}], + } + ) + + for artifact in artifacts_payload["artifacts"]: + if artifact.get("reviewRequired"): + findings.append( + { + "severity": "medium", + "title": f"Human review required for {artifact['formCode']}", + "explanation": "The form was overlay-rendered on the official IRS PDF and must be reviewed before filing.", + "suggestedAction": f"Review the rendered {artifact['formCode']} artifact visually before any filing/export handoff.", + "authorities": [{"title": "Artifact render policy", "sourceClass": "irs_form"}], + } + ) + + required_forms_union = set(recomputed["requiredForms"]) | set(stored_return.get("requiredForms", [])) + if any(form in required_forms_union for form in ("f6251", "f8960", "f8959", "f1116")): + findings.append( + { + "severity": "medium", + "title": "High-complexity tax position requires specialist follow-up", + "explanation": "The return includes forms or computations that usually require deeper technical support and careful authority review.", + "suggestedAction": "Review the supporting authority and computations for the high-complexity forms before treating the return as filing-ready.", + "authorities": [{"title": "Required form analysis", "sourceClass": "irs_instructions"}], + } + ) + + findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"])) + review = { + "status": "reviewed", + "taxYear": manifest["taxYear"], + "caseDir": str(case_dir), + "findingCount": len(findings), + "findings": findings, + } + (case_dir / "reports" / "review-report.json").write_text(json.dumps(review, indent=2)) + return review + + +def render_review_summary(review: dict[str, Any]) -> str: + if not review["findings"]: + return "No findings detected in the reviewed return package." + lines = ["Review findings:"] + for finding in review["findings"]: + lines.append(f"- [{finding['severity'].upper()}] {finding['title']}: {finding['explanation']}") + return "\n".join(lines) + + +def render_review_memo(review: dict[str, Any]) -> str: + lines = ["# Review Memo", ""] + if not review["findings"]: + lines.append("No findings detected.") + return "\n".join(lines) + for index, finding in enumerate(review["findings"], start=1): + lines.extend( + [ + f"## Finding {index}: {finding['title']}", + f"Severity: {finding['severity']}", + "", + "### Explanation", + finding["explanation"], + "", + "### Suggested correction", + finding["suggestedAction"], + "", + "### Authorities", + ] + ) + for authority in finding["authorities"]: + lines.append(f"- {authority['title']}") + lines.append("") + return "\n".join(lines).rstrip() diff --git a/skills/us-cpa/src/us_cpa/sources.py b/skills/us-cpa/src/us_cpa/sources.py new file mode 100644 index 0000000..852b5aa --- /dev/null +++ b/skills/us-cpa/src/us_cpa/sources.py @@ -0,0 +1,238 @@ +from __future__ import annotations + +import hashlib +import json +import os +import re +from dataclasses import dataclass +from datetime import datetime, timezone +from enum import IntEnum +from pathlib import Path +from typing import Callable +from urllib.request import urlopen + + +class AuthorityRank(IntEnum): + IRS_FORM = 10 + IRS_INSTRUCTIONS = 20 + IRS_PUBLICATION = 30 + IRS_FAQ = 40 + INTERNAL_REVENUE_CODE = 100 + TREASURY_REGULATION = 110 + OTHER_PRIMARY_AUTHORITY = 120 + + +AUTHORITY_RANKS: dict[str, AuthorityRank] = { + "irs_form": AuthorityRank.IRS_FORM, + "irs_instructions": AuthorityRank.IRS_INSTRUCTIONS, + "irs_publication": AuthorityRank.IRS_PUBLICATION, + "irs_faq": AuthorityRank.IRS_FAQ, + "internal_revenue_code": AuthorityRank.INTERNAL_REVENUE_CODE, + "treasury_regulation": AuthorityRank.TREASURY_REGULATION, + "other_primary_authority": AuthorityRank.OTHER_PRIMARY_AUTHORITY, +} + + +def authority_rank_for(source_class: str) -> AuthorityRank: + return AUTHORITY_RANKS[source_class] + + +@dataclass(frozen=True) +class SourceDescriptor: + slug: str + title: str + source_class: str + media_type: str + url: str + + +@dataclass(frozen=True) +class TaxYearPaths: + year_dir: Path + irs_dir: Path + manifest_path: Path + + +def default_cache_root() -> Path: + override = os.getenv("US_CPA_CACHE_DIR") + if override: + return Path(override).expanduser().resolve() + return (Path.home() / ".cache" / "us-cpa").resolve() + + +def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str: + return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf" + + +def build_primary_law_authorities(question: str) -> list[dict[str, str | int]]: + authorities: list[dict[str, str | int]] = [] + normalized = question.lower() + + for match in re.finditer(r"(?:section|sec\.)\s+(\d+[a-z0-9-]*)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"irc-{section}", + "title": f"Internal Revenue Code section {section}", + "sourceClass": "internal_revenue_code", + "url": f"https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title26-section{section}&num=0&edition=prelim", + "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE), + } + ) + + for match in re.finditer(r"(?:treas(?:ury)?\.?\s+reg(?:ulation)?\.?\s*)([\d.]+-\d+)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"reg-{section}", + "title": f"Treasury Regulation {section}", + "sourceClass": "treasury_regulation", + "url": f"https://www.ecfr.gov/current/title-26/section-{section}", + "authorityRank": int(AuthorityRank.TREASURY_REGULATION), + } + ) + + return authorities + + +def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: + entries = [ + ("f1040", "Form 1040", "irs_form"), + ("f1040s1", "Schedule 1 (Form 1040)", "irs_form"), + ("f1040s2", "Schedule 2 (Form 1040)", "irs_form"), + ("f1040s3", "Schedule 3 (Form 1040)", "irs_form"), + ("f1040sa", "Schedule A (Form 1040)", "irs_form"), + ("f1040sb", "Schedule B (Form 1040)", "irs_form"), + ("f1040sc", "Schedule C (Form 1040)", "irs_form"), + ("f1040sd", "Schedule D (Form 1040)", "irs_form"), + ("f1040se", "Schedule E (Form 1040)", "irs_form"), + ("f1040sse", "Schedule SE (Form 1040)", "irs_form"), + ("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"), + ("f8949", "Form 8949", "irs_form"), + ("f4562", "Form 4562", "irs_form"), + ("f4797", "Form 4797", "irs_form"), + ("f6251", "Form 6251", "irs_form"), + ("f8606", "Form 8606", "irs_form"), + ("f8863", "Form 8863", "irs_form"), + ("f8889", "Form 8889", "irs_form"), + ("f8959", "Form 8959", "irs_form"), + ("f8960", "Form 8960", "irs_form"), + ("f8995", "Form 8995", "irs_form"), + ("f8995a", "Form 8995-A", "irs_form"), + ("f5329", "Form 5329", "irs_form"), + ("f5695", "Form 5695", "irs_form"), + ("f1116", "Form 1116", "irs_form"), + ("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"), + ("i1040sca", "Instructions for Schedule A", "irs_instructions"), + ("i1040sc", "Instructions for Schedule C", "irs_instructions"), + ("i1040sd", "Instructions for Schedule D", "irs_instructions"), + ("i1040se", "Instructions for Schedule E (Form 1040)", "irs_instructions"), + ("i1040sse", "Instructions for Schedule SE", "irs_instructions"), + ("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"), + ("i8949", "Instructions for Form 8949", "irs_instructions"), + ("i4562", "Instructions for Form 4562", "irs_instructions"), + ("i4797", "Instructions for Form 4797", "irs_instructions"), + ("i6251", "Instructions for Form 6251", "irs_instructions"), + ("i8606", "Instructions for Form 8606", "irs_instructions"), + ("i8863", "Instructions for Form 8863", "irs_instructions"), + ("i8889", "Instructions for Form 8889", "irs_instructions"), + ("i8959", "Instructions for Form 8959", "irs_instructions"), + ("i8960", "Instructions for Form 8960", "irs_instructions"), + ("i8995", "Instructions for Form 8995", "irs_instructions"), + ("i8995a", "Instructions for Form 8995-A", "irs_instructions"), + ("i5329", "Instructions for Form 5329", "irs_instructions"), + ("i5695", "Instructions for Form 5695", "irs_instructions"), + ("i1116", "Instructions for Form 1116", "irs_instructions"), + ] + return [ + SourceDescriptor( + slug=slug, + title=title, + source_class=source_class, + media_type="application/pdf", + url=build_irs_prior_pdf_url(slug, tax_year), + ) + for slug, title, source_class in entries + ] + + +def _sha256_bytes(payload: bytes) -> str: + return hashlib.sha256(payload).hexdigest() + + +def _http_fetch(url: str) -> bytes: + with urlopen(url) as response: + return response.read() + + +class TaxYearCorpus: + def __init__(self, cache_root: Path | None = None) -> None: + self.cache_root = cache_root or default_cache_root() + + def paths_for_year(self, tax_year: int) -> TaxYearPaths: + year_dir = self.cache_root / "tax-years" / str(tax_year) + return TaxYearPaths( + year_dir=year_dir, + irs_dir=year_dir / "irs", + manifest_path=year_dir / "manifest.json", + ) + + def download_catalog( + self, + tax_year: int, + catalog: list[SourceDescriptor], + *, + fetcher: Callable[[str], bytes] = _http_fetch, + ) -> dict: + paths = self.paths_for_year(tax_year) + paths.irs_dir.mkdir(parents=True, exist_ok=True) + + fetched_at = datetime.now(timezone.utc).isoformat() + sources: list[dict] = [] + for descriptor in catalog: + payload = fetcher(descriptor.url) + destination = paths.irs_dir / f"{descriptor.slug}.pdf" + destination.write_bytes(payload) + sources.append( + { + "slug": descriptor.slug, + "title": descriptor.title, + "sourceClass": descriptor.source_class, + "mediaType": descriptor.media_type, + "url": descriptor.url, + "localPath": str(destination), + "sha256": _sha256_bytes(payload), + "fetchedAt": fetched_at, + "authorityRank": int(authority_rank_for(descriptor.source_class)), + } + ) + + manifest = { + "taxYear": tax_year, + "fetchedAt": fetched_at, + "cacheRoot": str(self.cache_root), + "sourceCount": len(sources), + "sources": sources, + "indexes": self.index_manifest(sources), + "primaryLawHooks": [ + { + "sourceClass": "internal_revenue_code", + "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE), + }, + { + "sourceClass": "treasury_regulation", + "authorityRank": int(AuthorityRank.TREASURY_REGULATION), + }, + ], + } + paths.manifest_path.write_text(json.dumps(manifest, indent=2)) + return manifest + + @staticmethod + def index_manifest(sources: list[dict]) -> dict[str, dict[str, list[str]]]: + by_class: dict[str, list[str]] = {} + by_slug: dict[str, list[str]] = {} + for source in sources: + by_class.setdefault(source["sourceClass"], []).append(source["slug"]) + by_slug.setdefault(source["slug"], []).append(source["localPath"]) + return {"bySourceClass": by_class, "bySlug": by_slug} diff --git a/skills/us-cpa/src/us_cpa/tax_years.py b/skills/us-cpa/src/us_cpa/tax_years.py new file mode 100644 index 0000000..d5ba36f --- /dev/null +++ b/skills/us-cpa/src/us_cpa/tax_years.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from typing import Any + + +TAX_YEAR_DATA: dict[int, dict[str, Any]] = { + 2024: { + "standardDeduction": { + "single": 14600.0, + "married_filing_jointly": 29200.0, + "head_of_household": 21900.0, + }, + "ordinaryIncomeBrackets": { + "single": [ + (11600.0, 0.10), + (47150.0, 0.12), + (100525.0, 0.22), + (191950.0, 0.24), + (243725.0, 0.32), + (609350.0, 0.35), + (float("inf"), 0.37), + ], + "married_filing_jointly": [ + (23200.0, 0.10), + (94300.0, 0.12), + (201050.0, 0.22), + (383900.0, 0.24), + (487450.0, 0.32), + (731200.0, 0.35), + (float("inf"), 0.37), + ], + "head_of_household": [ + (16550.0, 0.10), + (63100.0, 0.12), + (100500.0, 0.22), + (191950.0, 0.24), + (243700.0, 0.32), + (609350.0, 0.35), + (float("inf"), 0.37), + ], + }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + }, + }, + 2025: { + "standardDeduction": { + "single": 15750.0, + "married_filing_jointly": 31500.0, + "head_of_household": 23625.0, + }, + "ordinaryIncomeBrackets": { + "single": [ + (11925.0, 0.10), + (48475.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250525.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], + "married_filing_jointly": [ + (23850.0, 0.10), + (96950.0, 0.12), + (206700.0, 0.22), + (394600.0, 0.24), + (501050.0, 0.32), + (751600.0, 0.35), + (float("inf"), 0.37), + ], + "head_of_household": [ + (17000.0, 0.10), + (64850.0, 0.12), + (103350.0, 0.22), + (197300.0, 0.24), + (250500.0, 0.32), + (626350.0, 0.35), + (float("inf"), 0.37), + ], + }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + }, + }, +} + + +def supported_tax_years() -> list[int]: + return sorted(TAX_YEAR_DATA) + + +def tax_year_rules(tax_year: int) -> dict[str, Any]: + try: + return TAX_YEAR_DATA[tax_year] + except KeyError as exc: + years = ", ".join(str(year) for year in supported_tax_years()) + raise ValueError( + f"Unsupported tax year {tax_year}. Supported tax years: {years}." + ) from exc diff --git a/skills/us-cpa/tests/fixtures/documents/.gitkeep b/skills/us-cpa/tests/fixtures/documents/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/documents/interest-1099.txt b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt new file mode 100644 index 0000000..89d54f7 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt @@ -0,0 +1,3 @@ +Form 1099-INT +Recipient: Jane Doe +Box 1 Interest Income 1750 diff --git a/skills/us-cpa/tests/fixtures/documents/simple-w2.txt b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt new file mode 100644 index 0000000..b3336b1 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt @@ -0,0 +1,4 @@ +Form W-2 Wage and Tax Statement +Employee: Jane Doe +Box 1 Wages, tips, other compensation 50000 +Box 2 Federal income tax withheld 6000 diff --git a/skills/us-cpa/tests/fixtures/facts/.gitkeep b/skills/us-cpa/tests/fixtures/facts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json b/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json new file mode 100644 index 0000000..40fb20c --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/overlay-case-2025.json @@ -0,0 +1,6 @@ +{ + "taxpayer.fullName": "Olivia Overlay", + "filingStatus": "single", + "wages": 42000, + "federalWithholding": 5000 +} diff --git a/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json b/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json new file mode 100644 index 0000000..05b0809 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/review-mismatch-2025.json @@ -0,0 +1,8 @@ +{ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + "expectedIssue": "agi_mismatch" +} diff --git a/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json b/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json new file mode 100644 index 0000000..7c50db4 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/schedule-c-2025.json @@ -0,0 +1,6 @@ +{ + "taxpayer.fullName": "Jamie Owner", + "filingStatus": "single", + "businessIncome": 12000, + "federalWithholding": 0 +} diff --git a/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json b/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json new file mode 100644 index 0000000..2a2c367 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/facts/simple-w2-interest-2025.json @@ -0,0 +1,7 @@ +{ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000 +} diff --git a/skills/us-cpa/tests/fixtures/irs/.gitkeep b/skills/us-cpa/tests/fixtures/irs/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/irs/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/returns/.gitkeep b/skills/us-cpa/tests/fixtures/returns/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/returns/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json new file mode 100644 index 0000000..39fbd78 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json @@ -0,0 +1,16 @@ +{ + "taxYear": 2025, + "filingStatus": "single", + "requiredForms": ["f1040", "f1040sb"], + "income": { + "wages": 50000.0, + "taxableInterest": 1750.0, + "businessIncome": 0.0, + "capitalGainLoss": 0.0, + "rentalIncome": 0.0 + }, + "totals": { + "adjustedGrossIncome": 51750.0, + "taxableIncome": 36000.0 + } +} diff --git a/skills/us-cpa/tests/test_cases.py b/skills/us-cpa/tests/test_cases.py new file mode 100644 index 0000000..7ddb47f --- /dev/null +++ b/skills/us-cpa/tests/test_cases.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.cases import CaseConflictError, CaseManager + + +class CaseManagerTests(unittest.TestCase): + def test_create_case_builds_expected_layout(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + + manifest = manager.create_case(case_label="Jane Doe", tax_year=2025) + + self.assertEqual(manifest["caseLabel"], "Jane Doe") + self.assertEqual(manifest["taxYear"], 2025) + for name in ( + "input", + "extracted", + "return", + "output", + "reports", + "issues", + "sources", + ): + self.assertTrue((case_dir / name).is_dir()) + self.assertTrue((case_dir / "case-manifest.json").exists()) + + def test_intake_registers_documents_and_user_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + case_dir = root / "2025-jane-doe" + document = root / "w2.txt" + document.write_text("sample w2") + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + + result = manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single", "taxpayer.ssnLast4": "1234"}, + document_paths=[document], + ) + + self.assertEqual(result["status"], "accepted") + self.assertEqual(len(result["registeredDocuments"]), 1) + self.assertTrue((case_dir / "input" / "w2.txt").exists()) + facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) + self.assertEqual(facts["facts"]["filingStatus"]["value"], "single") + + def test_intake_extracts_machine_usable_facts_from_text_documents(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + case_dir = root / "2025-jane-doe" + w2 = root / "w2.txt" + w2.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + ) + interest = root / "1099-int.txt" + interest.write_text( + "Form 1099-INT\n" + "Recipient: Jane Doe\n" + "Box 1 Interest Income 1750\n" + ) + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + + result = manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single"}, + document_paths=[w2, interest], + ) + + self.assertEqual(result["status"], "accepted") + facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) + self.assertEqual(facts["facts"]["wages"]["value"], 50000.0) + self.assertEqual(facts["facts"]["federalWithholding"]["value"], 6000.0) + self.assertEqual(facts["facts"]["taxableInterest"]["value"], 1750.0) + self.assertEqual(facts["facts"]["wages"]["sources"][0]["sourceType"], "document_extract") + + def test_conflicting_facts_raise_structured_issue(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single"}, + document_paths=[], + ) + + with self.assertRaises(CaseConflictError) as context: + manager.intake( + tax_year=2025, + user_facts={"filingStatus": "married_filing_jointly"}, + document_paths=[], + ) + + issue = context.exception.issue + self.assertEqual(issue["status"], "needs_resolution") + self.assertEqual(issue["issueType"], "fact_conflict") + self.assertEqual(issue["field"], "filingStatus") + self.assertTrue((case_dir / "issues" / "open-issues.json").exists()) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py new file mode 100644 index 0000000..62bffc1 --- /dev/null +++ b/skills/us-cpa/tests/test_cli.py @@ -0,0 +1,392 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + + +SKILL_DIR = Path(__file__).resolve().parents[1] +SRC_DIR = SKILL_DIR / "src" + + +def _pyproject_text() -> str: + return (SKILL_DIR / "pyproject.toml").read_text() + + +class UsCpaCliSmokeTests(unittest.TestCase): + def test_skill_scaffold_files_exist(self) -> None: + self.assertTrue((SKILL_DIR / "SKILL.md").exists()) + self.assertTrue((SKILL_DIR / "pyproject.toml").exists()) + self.assertTrue((SKILL_DIR / "README.md").exists()) + self.assertTrue((SKILL_DIR / "scripts" / "us-cpa").exists()) + self.assertTrue( + (SKILL_DIR.parent.parent / "docs" / "us-cpa.md").exists() + ) + + def test_pyproject_declares_runtime_and_dev_dependencies(self) -> None: + pyproject = _pyproject_text() + self.assertIn('"pypdf>=', pyproject) + self.assertIn('"reportlab>=', pyproject) + self.assertIn("[project.optional-dependencies]", pyproject) + self.assertIn('"pytest>=', pyproject) + + def test_readme_documents_install_and_script_usage(self) -> None: + readme = (SKILL_DIR / "README.md").read_text() + self.assertIn("pip install -e .[dev]", readme) + self.assertIn("scripts/us-cpa", readme) + self.assertIn("python -m unittest", readme) + + def test_docs_explain_openclaw_installation_flow(self) -> None: + readme = (SKILL_DIR / "README.md").read_text() + operator_doc = (SKILL_DIR.parent.parent / "docs" / "us-cpa.md").read_text() + skill_doc = (SKILL_DIR / "SKILL.md").read_text() + + self.assertIn("OpenClaw installation", readme) + self.assertIn("~/.openclaw/workspace/skills/us-cpa", readme) + self.assertIn(".venv/bin/python", readme) + self.assertNotIn("/Users/stefano/", readme) + self.assertIn("OpenClaw installation", operator_doc) + self.assertIn("rsync -a --delete", operator_doc) + self.assertIn("~/", operator_doc) + self.assertNotIn("/Users/stefano/", operator_doc) + self.assertIn("~/.openclaw/workspace/skills/us-cpa/scripts/us-cpa", skill_doc) + + def test_wrapper_prefers_local_virtualenv_python(self) -> None: + wrapper = (SKILL_DIR / "scripts" / "us-cpa").read_text() + self.assertIn('.venv/bin/python', wrapper) + self.assertIn('PYTHON_BIN', wrapper) + + def test_fixture_directories_exist(self) -> None: + fixtures_dir = SKILL_DIR / "tests" / "fixtures" + for name in ("irs", "facts", "documents", "returns"): + self.assertTrue((fixtures_dir / name).exists()) + + def run_cli(self, *args: str) -> subprocess.CompletedProcess[str]: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + return subprocess.run( + [sys.executable, "-m", "us_cpa.cli", *args], + text=True, + capture_output=True, + env=env, + ) + + def test_help_lists_all_commands(self) -> None: + result = self.run_cli("--help") + + self.assertEqual(result.returncode, 0, result.stderr) + for command in ( + "question", + "prepare", + "review", + "fetch-year", + "extract-docs", + "render-forms", + "export-efile-ready", + ): + self.assertIn(command, result.stdout) + + def test_question_command_emits_json_by_default(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["command"], "question") + self.assertEqual(payload["format"], "json") + self.assertEqual(payload["question"], "What is the standard deduction?") + self.assertEqual(payload["status"], "answered") + self.assertIn("analysis", payload) + + def test_prepare_requires_case_dir(self) -> None: + result = self.run_cli("prepare", "--tax-year", "2025") + + self.assertNotEqual(result.returncode, 0) + self.assertIn("case directory", result.stderr.lower()) + + def test_extract_docs_can_create_case_and_register_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text(json.dumps({"filingStatus": "single"})) + + result = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "accepted") + self.assertEqual(payload["factCount"], 1) + self.assertTrue((case_dir / "case-manifest.json").exists()) + + def test_extract_docs_stops_on_conflicts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "2025-jane-doe" + first_facts = Path(temp_dir) / "facts-1.json" + second_facts = Path(temp_dir) / "facts-2.json" + first_facts.write_text(json.dumps({"filingStatus": "single"})) + second_facts.write_text(json.dumps({"filingStatus": "married_filing_jointly"})) + + first = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(first_facts), + ) + self.assertEqual(first.returncode, 0, first.stderr) + + second = self.run_cli( + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--facts-json", + str(second_facts), + ) + self.assertNotEqual(second.returncode, 0) + payload = json.loads(second.stdout) + self.assertEqual(payload["status"], "needs_resolution") + self.assertEqual(payload["issueType"], "fact_conflict") + + def test_question_markdown_memo_mode_renders_tax_memo(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--format", + "markdown", + "--style", + "memo", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertIn("# Tax Memo", result.stdout) + self.assertIn("## Conclusion", result.stdout) + + def test_prepare_command_generates_return_package(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache") + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text( + json.dumps( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + } + ) + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "prepare", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "prepared") + self.assertEqual(payload["summary"]["requiredForms"], ["f1040"]) + self.assertTrue((case_dir / "output" / "artifacts.json").exists()) + + def test_review_command_returns_findings(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache") + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text( + json.dumps( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + } + ) + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "prepare", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["totals"]["adjustedGrossIncome"] = 99999.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "review", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "reviewed") + self.assertEqual(payload["findingCount"], 2) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_document_extractors.py b/skills/us-cpa/tests/test_document_extractors.py new file mode 100644 index 0000000..54f1000 --- /dev/null +++ b/skills/us-cpa/tests/test_document_extractors.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from us_cpa.document_extractors import extract_document_facts + + +class DocumentExtractorTests(unittest.TestCase): + def test_extracts_common_w2_fields(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "w2.txt" + path.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + "Box 16 State wages, tips, etc. 50000\n" + "Box 17 State income tax 1200\n" + "Box 3 Social security wages 50000\n" + "Box 5 Medicare wages and tips 50000\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["taxpayer.fullName"], "Jane Doe") + self.assertEqual(extracted["wages"], 50000.0) + self.assertEqual(extracted["federalWithholding"], 6000.0) + self.assertEqual(extracted["stateWages"], 50000.0) + self.assertEqual(extracted["stateWithholding"], 1200.0) + self.assertEqual(extracted["socialSecurityWages"], 50000.0) + self.assertEqual(extracted["medicareWages"], 50000.0) + + def test_extracts_common_1099_patterns(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + div_path = Path(temp_dir) / "1099-div.txt" + div_path.write_text("Form 1099-DIV\nRecipient: Jane Doe\nBox 1a Total ordinary dividends 250\n") + ret_path = Path(temp_dir) / "1099-r.txt" + ret_path.write_text("Form 1099-R\nRecipient: Jane Doe\nBox 1 Gross distribution 10000\n") + misc_path = Path(temp_dir) / "1099-misc.txt" + misc_path.write_text("Form 1099-MISC\nRecipient: Jane Doe\nBox 3 Other income 900\n") + + self.assertEqual(extract_document_facts(div_path)["ordinaryDividends"], 250.0) + self.assertEqual(extract_document_facts(ret_path)["retirementDistribution"], 10000.0) + self.assertEqual(extract_document_facts(misc_path)["otherIncome"], 900.0) + + def test_extracts_prior_year_return_summary_values(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "prior-return.txt" + path.write_text( + "2024 Form 1040 Summary\n" + "Adjusted gross income 72100\n" + "Taxable income 49800\n" + "Refund 2100\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["priorYear.adjustedGrossIncome"], 72100.0) + self.assertEqual(extracted["priorYear.taxableIncome"], 49800.0) + self.assertEqual(extracted["priorYear.refund"], 2100.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_prepare.py b/skills/us-cpa/tests/test_prepare.py new file mode 100644 index 0000000..bba2e91 --- /dev/null +++ b/skills/us-cpa/tests/test_prepare.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.cases import CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class PrepareEngineTests(unittest.TestCase): + def build_case(self, temp_dir: str) -> tuple[CaseManager, TaxYearCorpus]: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + document_paths=[], + ) + + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + + def fake_fetch(url: str) -> bytes: + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, f"Template for {url}") + pdf.save() + return buffer.getvalue() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return manager, corpus + + def test_prepare_creates_normalized_return_and_artifacts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + + result = engine.prepare_case(manager.case_dir) + + self.assertEqual(result["status"], "prepared") + self.assertEqual(result["summary"]["requiredForms"], ["f1040"]) + self.assertEqual(result["summary"]["reviewRequiredArtifacts"], ["f1040"]) + self.assertTrue((manager.case_dir / "return" / "normalized-return.json").exists()) + self.assertTrue((manager.case_dir / "output" / "artifacts.json").exists()) + normalized = json.loads((manager.case_dir / "return" / "normalized-return.json").read_text()) + self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0) + self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0) + + def test_exporter_writes_efile_ready_payload(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + engine.prepare_case(manager.case_dir) + + export = EfileExporter().export_case(manager.case_dir) + + self.assertEqual(export["status"], "draft") + self.assertTrue((manager.case_dir / "output" / "efile-ready.json").exists()) + self.assertEqual(export["returnSummary"]["requiredForms"], ["f1040"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py new file mode 100644 index 0000000..f464488 --- /dev/null +++ b/skills/us-cpa/tests/test_questions.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.questions import QuestionEngine, render_analysis, render_memo +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class QuestionEngineTests(unittest.TestCase): + def build_engine(self, temp_dir: str) -> QuestionEngine: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + + def fake_fetch(url: str) -> bytes: + return f"source for {url}".encode() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return QuestionEngine(corpus=corpus) + + def test_standard_deduction_question_returns_structured_analysis(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="What is the standard deduction for single filers?", + tax_year=2025, + case_facts={"filingStatus": "single"}, + ) + + self.assertEqual(analysis["issue"], "standard_deduction") + self.assertEqual(analysis["taxYear"], 2025) + self.assertEqual(analysis["conclusion"]["answer"], "$15,750") + self.assertEqual(analysis["confidence"], "high") + self.assertEqual(analysis["riskLevel"], "low") + self.assertTrue(analysis["authorities"]) + self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") + + def test_complex_question_flags_primary_law_escalation(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Does section 469 passive activity loss limitation apply here?", + tax_year=2025, + case_facts={}, + ) + + self.assertEqual(analysis["confidence"], "low") + self.assertEqual(analysis["riskLevel"], "high") + self.assertTrue(analysis["primaryLawRequired"]) + self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in analysis["authorities"])) + + def test_capital_gains_question_returns_schedule_d_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule D for capital gains?", + tax_year=2025, + case_facts={"capitalGainLoss": 400}, + ) + + self.assertEqual(analysis["issue"], "schedule_d_required") + self.assertEqual(analysis["confidence"], "medium") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040sd" for item in analysis["authorities"])) + + def test_schedule_e_question_returns_rental_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule E for rental income?", + tax_year=2025, + case_facts={"rentalIncome": 1200}, + ) + + self.assertEqual(analysis["issue"], "schedule_e_required") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040se" for item in analysis["authorities"])) + + def test_renderers_produce_conversation_and_memo(self) -> None: + analysis = { + "issue": "standard_deduction", + "taxYear": 2025, + "factsUsed": [{"field": "filingStatus", "value": "single"}], + "missingFacts": [], + "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], + "conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."}, + "confidence": "high", + "riskLevel": "low", + "followUpQuestions": [], + "primaryLawRequired": False, + } + + conversation = render_analysis(analysis) + memo = render_memo(analysis) + + self.assertIn("$15,750", conversation) + self.assertIn("Issue", memo) + self.assertIn("Authorities", memo) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py new file mode 100644 index 0000000..cc0e506 --- /dev/null +++ b/skills/us-cpa/tests/test_renderers.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.renderers import render_case_forms +from us_cpa.sources import TaxYearCorpus + + +class RendererTests(unittest.TestCase): + def test_render_case_forms_prefers_fillable_pdf_fields_when_available(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + form = pdf.acroForm + pdf.drawString(72, 720, "Name") + form.textfield(name="taxpayer_full_name", x=120, y=710, width=200, height=20) + pdf.drawString(72, 680, "Wages") + form.textfield(name="wages", x=120, y=670, width=200, height=20) + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, + "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "field_fill") + self.assertFalse(artifacts["artifacts"][0]["reviewRequired"]) + + def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, "Template") + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, + "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifactCount"], 1) + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "overlay") + self.assertTrue(artifacts["artifacts"][0]["reviewRequired"]) + self.assertTrue((case_dir / "output" / "forms" / "f1040.pdf").exists()) + manifest = json.loads((case_dir / "output" / "artifacts.json").read_text()) + self.assertEqual(manifest["artifacts"][0]["formCode"], "f1040") + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py new file mode 100644 index 0000000..f905877 --- /dev/null +++ b/skills/us-cpa/tests/test_returns.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import unittest + +from us_cpa.returns import normalize_case_facts, resolve_required_forms, tax_on_ordinary_income + + +class ReturnModelTests(unittest.TestCase): + def test_normalize_case_facts_computes_basic_1040_totals(self) -> None: + normalized = normalize_case_facts( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + 2025, + ) + + self.assertEqual(normalized["requiredForms"], ["f1040"]) + self.assertEqual(normalized["deductions"]["standardDeduction"], 15750.0) + self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0) + self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0) + self.assertEqual(normalized["totals"]["refund"], 2116.5) + + def test_resolve_required_forms_adds_business_and_interest_forms(self) -> None: + normalized = normalize_case_facts( + { + "filingStatus": "single", + "wages": 0, + "taxableInterest": 2000, + "businessIncome": 12000, + }, + 2025, + ) + + self.assertEqual( + resolve_required_forms(normalized), + ["f1040", "f1040sb", "f1040sc", "f1040sse", "f1040s1", "f8995"], + ) + + def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None: + self.assertEqual(tax_on_ordinary_income(34350.0, "single", 2025), 3883.5) + + def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None: + self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0) + + def test_normalize_case_facts_rejects_unsupported_tax_year(self) -> None: + with self.assertRaisesRegex(ValueError, "Unsupported tax year"): + normalize_case_facts({"filingStatus": "single"}, 2023) + + def test_normalize_case_facts_preserves_provenance_and_expands_form_resolution(self) -> None: + normalized = normalize_case_facts( + { + "taxpayer.fullName": "Jane Doe", + "spouse.fullName": "John Doe", + "dependents": [{"fullName": "Kid Doe", "ssnLast4": "4321"}], + "filingStatus": "married_filing_jointly", + "wages": 50000, + "taxableInterest": 2001, + "capitalGainLoss": 400, + "rentalIncome": 1200, + "itemizedDeductions": 40000, + "hsaContribution": 1000, + "educationCredit": 500, + "foreignTaxCredit": 250, + "qualifiedBusinessIncome": 12000, + "traditionalIraBasis": 6000, + "additionalMedicareTax": 100, + "netInvestmentIncomeTax": 200, + "alternativeMinimumTax": 300, + "additionalTaxPenalty": 50, + "energyCredit": 600, + "_factMetadata": { + "wages": {"sources": [{"sourceType": "document_extract", "documentName": "w2.txt"}]}, + }, + }, + 2025, + ) + + self.assertEqual(normalized["spouse"]["fullName"], "John Doe") + self.assertEqual(normalized["dependents"][0]["fullName"], "Kid Doe") + self.assertEqual(normalized["provenance"]["income.wages"]["sources"][0]["documentName"], "w2.txt") + self.assertIn("f1040sa", normalized["requiredForms"]) + self.assertIn("f1040sd", normalized["requiredForms"]) + self.assertIn("f8949", normalized["requiredForms"]) + self.assertIn("f1040se", normalized["requiredForms"]) + self.assertIn("f8889", normalized["requiredForms"]) + self.assertIn("f8863", normalized["requiredForms"]) + self.assertIn("f1116", normalized["requiredForms"]) + self.assertIn("f8995", normalized["requiredForms"]) + self.assertIn("f8606", normalized["requiredForms"]) + self.assertIn("f8959", normalized["requiredForms"]) + self.assertIn("f8960", normalized["requiredForms"]) + self.assertIn("f6251", normalized["requiredForms"]) + self.assertIn("f5329", normalized["requiredForms"]) + self.assertIn("f5695", normalized["requiredForms"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_review.py b/skills/us-cpa/tests/test_review.py new file mode 100644 index 0000000..eb0cbb1 --- /dev/null +++ b/skills/us-cpa/tests/test_review.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.cases import CaseManager +from us_cpa.prepare import PrepareEngine +from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class ReviewEngineTests(unittest.TestCase): + def build_prepared_case(self, temp_dir: str) -> tuple[Path, TaxYearCorpus]: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + document_paths=[], + ) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + + def fake_fetch(url: str) -> bytes: + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, f"Template for {url}") + pdf.save() + return buffer.getvalue() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + PrepareEngine(corpus=corpus).prepare_case(case_dir) + return case_dir, corpus + + def test_review_detects_mismatched_return_and_missing_artifacts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["totals"]["adjustedGrossIncome"] = 99999.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + artifacts_path = case_dir / "output" / "artifacts.json" + artifacts = json.loads(artifacts_path.read_text()) + artifacts["artifacts"] = [] + artifacts["artifactCount"] = 0 + artifacts_path.write_text(json.dumps(artifacts, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertEqual(review["status"], "reviewed") + self.assertEqual(review["findings"][0]["severity"], "high") + self.assertIn("adjusted gross income", review["findings"][0]["title"].lower()) + self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"])) + + def test_review_detects_reporting_omissions_from_source_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["income"]["taxableInterest"] = 0.0 + normalized["totals"]["adjustedGrossIncome"] = 50000.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + facts_path = case_dir / "extracted" / "facts.json" + facts_payload = json.loads(facts_path.read_text()) + facts_payload["facts"]["taxableInterest"] = { + "value": 1750.0, + "sources": [{"sourceType": "document_extract", "sourceName": "1099-int.txt"}], + } + facts_path.write_text(json.dumps(facts_payload, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("likely omitted taxable interest" in item["title"].lower() for item in review["findings"]) + ) + + def test_review_flags_high_complexity_positions_for_specialist_follow_up(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["requiredForms"].append("f6251") + normalized["taxes"]["alternativeMinimumTax"] = 300.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("high-complexity tax position" in item["title"].lower() for item in review["findings"]) + ) + + def test_review_renderers_produce_summary_and_memo(self) -> None: + review = { + "status": "reviewed", + "findings": [ + { + "severity": "high", + "title": "Adjusted gross income mismatch", + "explanation": "Stored AGI does not match recomputed AGI.", + "suggestedAction": "Update Form 1040 line 11.", + "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], + } + ], + } + + summary = render_review_summary(review) + memo = render_review_memo(review) + + self.assertIn("Adjusted gross income mismatch", summary) + self.assertIn("# Review Memo", memo) + self.assertIn("Suggested correction", memo) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_sources.py b/skills/us-cpa/tests/test_sources.py new file mode 100644 index 0000000..a180ff9 --- /dev/null +++ b/skills/us-cpa/tests/test_sources.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.sources import ( + AuthorityRank, + SourceDescriptor, + TaxYearCorpus, + authority_rank_for, + bootstrap_irs_catalog, + build_irs_prior_pdf_url, + build_primary_law_authorities, +) + + +class SourceCatalogTests(unittest.TestCase): + def test_build_irs_prior_pdf_url_uses_expected_pattern(self) -> None: + self.assertEqual( + build_irs_prior_pdf_url("f1040", 2025), + "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf", + ) + self.assertEqual( + build_irs_prior_pdf_url("i1040gi", 2025), + "https://www.irs.gov/pub/irs-prior/i1040gi--2025.pdf", + ) + + def test_authority_ranking_orders_irs_before_primary_law(self) -> None: + self.assertEqual(authority_rank_for("irs_form"), AuthorityRank.IRS_FORM) + self.assertEqual( + authority_rank_for("treasury_regulation"), + AuthorityRank.TREASURY_REGULATION, + ) + self.assertLess( + authority_rank_for("irs_form"), authority_rank_for("internal_revenue_code") + ) + + def test_bootstrap_catalog_builds_tax_year_specific_urls(self) -> None: + catalog = bootstrap_irs_catalog(2025) + + self.assertGreaterEqual(len(catalog), 5) + self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf") + self.assertTrue(any(item.slug == "i1040gi" for item in catalog)) + self.assertTrue(any(item.slug == "f1040sse" for item in catalog)) + + def test_primary_law_authorities_build_official_urls(self) -> None: + authorities = build_primary_law_authorities( + "Does section 469 apply and what does Treas. Reg. 1.469-1 say?" + ) + + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities)) + self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities)) + self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities)) + self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities)) + + +class TaxYearCorpusTests(unittest.TestCase): + def test_tax_year_layout_is_deterministic(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + paths = corpus.paths_for_year(2025) + + self.assertEqual(paths.year_dir, Path(temp_dir) / "tax-years" / "2025") + self.assertEqual(paths.irs_dir, paths.year_dir / "irs") + self.assertEqual(paths.manifest_path, paths.year_dir / "manifest.json") + + def test_download_catalog_writes_files_and_manifest(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + catalog = [ + SourceDescriptor( + slug="f1040", + title="Form 1040", + source_class="irs_form", + media_type="application/pdf", + url=build_irs_prior_pdf_url("f1040", 2025), + ), + SourceDescriptor( + slug="i1040gi", + title="Instructions for Form 1040", + source_class="irs_instructions", + media_type="application/pdf", + url=build_irs_prior_pdf_url("i1040gi", 2025), + ), + ] + + def fake_fetch(url: str) -> bytes: + return f"downloaded:{url}".encode() + + manifest = corpus.download_catalog(2025, catalog, fetcher=fake_fetch) + + self.assertEqual(manifest["taxYear"], 2025) + self.assertEqual(manifest["sourceCount"], 2) + self.assertTrue(corpus.paths_for_year(2025).manifest_path.exists()) + + first = manifest["sources"][0] + self.assertEqual(first["slug"], "f1040") + self.assertEqual(first["authorityRank"], int(AuthorityRank.IRS_FORM)) + self.assertTrue(Path(first["localPath"]).exists()) + + saved = json.loads(corpus.paths_for_year(2025).manifest_path.read_text()) + self.assertEqual(saved["sourceCount"], 2) + self.assertEqual(saved["sources"][1]["slug"], "i1040gi") + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_tax_years.py b/skills/us-cpa/tests/test_tax_years.py new file mode 100644 index 0000000..5bc6621 --- /dev/null +++ b/skills/us-cpa/tests/test_tax_years.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import unittest + +from us_cpa.tax_years import supported_tax_years, tax_year_rules + + +class TaxYearRuleTests(unittest.TestCase): + def test_supported_years_are_listed(self) -> None: + self.assertEqual(supported_tax_years(), [2024, 2025]) + + def test_tax_year_rules_include_source_citations(self) -> None: + rules = tax_year_rules(2025) + + self.assertIn("sourceCitations", rules) + self.assertIn("standardDeduction", rules["sourceCitations"]) + self.assertIn("ordinaryIncomeBrackets", rules["sourceCitations"]) + + def test_unsupported_tax_year_raises_clear_error(self) -> None: + with self.assertRaisesRegex(ValueError, "Unsupported tax year 2023"): + tax_year_rules(2023) + + +if __name__ == "__main__": + unittest.main()