From 82cf3d9010d1069374357d868567af6450fcc631 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:28:22 -0500 Subject: [PATCH] feat: add us-cpa preparation workflow --- docs/us-cpa.md | 48 +++++++++++++++-- skills/us-cpa/SKILL.md | 3 ++ skills/us-cpa/src/us_cpa/cli.py | 36 ++++++++++++- skills/us-cpa/src/us_cpa/prepare.py | 75 +++++++++++++++++++++++++++ skills/us-cpa/src/us_cpa/questions.py | 6 +-- skills/us-cpa/tests/test_cli.py | 70 +++++++++++++++++++++++++ skills/us-cpa/tests/test_prepare.py | 75 +++++++++++++++++++++++++++ skills/us-cpa/tests/test_questions.py | 6 +-- 8 files changed, 309 insertions(+), 10 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/prepare.py create mode 100644 skills/us-cpa/tests/test_prepare.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index df8f9e1..0a5b7a1 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -4,14 +4,18 @@ ## Current Milestone -Milestone 2 now adds the first tax-year corpus layer: +Current implementation now includes: - deterministic cache layout under `~/.cache/us-cpa` by default - `fetch-year` download flow for the bootstrap IRS corpus - source manifest with URL, hash, authority rank, and local path traceability - authority ranking hooks for IRS materials and future primary-law escalation +- case-folder intake and conflict-stop handling +- question workflow with conversation and memo output +- prepare workflow for the current supported 1040 subset +- e-file-ready draft export payload generation -Tax logic, case workflows, rendering, and review logic are still pending. +Review logic and broader form coverage are still pending. ## CLI Surface @@ -102,7 +106,10 @@ Behavior: - `question` supports: - `--style conversation` - `--style memo` -- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"` +- `question` emits answered analysis output +- `prepare` emits a prepared return package summary +- `export-efile-ready` emits a draft e-file-ready payload +- `review` is still scaffolded - `fetch-year` emits a downloaded manifest location and source count ## Question Engine @@ -138,6 +145,41 @@ Current review rule: - overlay-rendered artifacts are marked `reviewRequired: true` +## Preparation Workflow + +Current `prepare` implementation: + +- loads case facts from `extracted/facts.json` +- normalizes them into the current supported 2025 federal return model +- computes the current supported 1040 subset +- resolves required forms for the current supported subset +- writes: + - `return/normalized-return.json` + - `output/artifacts.json` + - `reports/prepare-summary.json` + +Current supported calculation inputs: + +- `filingStatus` +- `wages` +- `taxableInterest` +- `businessIncome` +- `federalWithholding` + +## E-file-ready Export + +`export-efile-ready` writes: + +- `output/efile-ready.json` + +Current export behavior: + +- draft-only +- includes required forms +- includes refund or balance due summary +- includes attachment manifest +- includes unresolved issues + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 8c39ebe..8b34301 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe +skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json ``` @@ -55,5 +56,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict - rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review +- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory +- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 1249c51..ee1a471 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any from us_cpa.cases import CaseConflictError, CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms from us_cpa.questions import QuestionEngine, render_analysis, render_memo from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog @@ -167,7 +168,40 @@ def main(argv: list[str] | None = None) -> int: } return _emit(payload, args.format) - if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}: + if args.command == "prepare": + case_dir = _require_case_dir(args) + payload = { + "command": args.command, + "format": args.format, + **PrepareEngine().prepare_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "render-forms": + case_dir = _require_case_dir(args) + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = render_case_forms(case_dir, TaxYearCorpus(), normalized) + payload = { + "command": "render-forms", + "format": args.format, + "taxYear": manifest["taxYear"], + "status": "rendered", + **artifacts, + } + return _emit(payload, args.format) + + if args.command == "export-efile-ready": + case_dir = _require_case_dir(args) + payload = { + "command": "export-efile-ready", + "format": args.format, + **EfileExporter().export_case(case_dir), + } + return _emit(payload, args.format) + + if args.command == "review": case_dir = _require_case_dir(args) payload = { "command": args.command, diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py new file mode 100644 index 0000000..72ef8cd --- /dev/null +++ b/skills/us-cpa/src/us_cpa/prepare.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from us_cpa.cases import CaseManager +from us_cpa.renderers import render_case_forms +from us_cpa.returns import normalize_case_facts +from us_cpa.sources import TaxYearCorpus + + +def _load_case_facts(case_dir: Path) -> dict[str, Any]: + facts_path = case_dir / "extracted" / "facts.json" + payload = json.loads(facts_path.read_text()) + return {key: value["value"] for key, value in payload["facts"].items()} + + + +class PrepareEngine: + def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None: + self.corpus = corpus or TaxYearCorpus() + + def prepare_case(self, case_dir: Path) -> dict[str, Any]: + manager = CaseManager(case_dir) + manifest = manager.load_manifest() + facts = _load_case_facts(manager.case_dir) + normalized = normalize_case_facts(facts, manifest["taxYear"]) + normalized_path = manager.case_dir / "return" / "normalized-return.json" + normalized_path.write_text(json.dumps(normalized, indent=2)) + + artifacts = render_case_forms(manager.case_dir, self.corpus, normalized) + unresolved_issues = json.loads(manager.issues_path.read_text())["issues"] + + summary = { + "requiredForms": normalized["requiredForms"], + "reviewRequiredArtifacts": [ + artifact["formCode"] for artifact in artifacts["artifacts"] if artifact["reviewRequired"] + ], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + "unresolvedIssueCount": len(unresolved_issues), + } + result = { + "status": "prepared", + "caseDir": str(manager.case_dir), + "taxYear": manifest["taxYear"], + "normalizedReturnPath": str(normalized_path), + "artifactManifestPath": str(manager.case_dir / "output" / "artifacts.json"), + "summary": summary, + } + (manager.case_dir / "reports" / "prepare-summary.json").write_text(json.dumps(result, indent=2)) + return result + + +class EfileExporter: + def export_case(self, case_dir: Path) -> dict[str, Any]: + case_dir = Path(case_dir).expanduser().resolve() + normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text()) + artifacts = json.loads((case_dir / "output" / "artifacts.json").read_text()) + issues = json.loads((case_dir / "issues" / "open-issues.json").read_text())["issues"] + payload = { + "status": "draft" if issues or any(a["reviewRequired"] for a in artifacts["artifacts"]) else "ready", + "taxYear": normalized["taxYear"], + "returnSummary": { + "requiredForms": normalized["requiredForms"], + "refund": normalized["totals"]["refund"], + "balanceDue": normalized["totals"]["balanceDue"], + }, + "attachments": artifacts["artifacts"], + "unresolvedIssues": issues, + } + output_path = case_dir / "output" / "efile-ready.json" + output_path.write_text(json.dumps(payload, indent=2)) + return payload diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py index be4b103..3c65a43 100644 --- a/skills/us-cpa/src/us_cpa/questions.py +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -14,9 +14,9 @@ TOPIC_RULES = [ "keywords": ("standard deduction",), "authority_slugs": ("i1040gi",), "answer_by_status": { - "single": "$15,000", - "married_filing_jointly": "$30,000", - "head_of_household": "$22,500", + "single": "$15,750", + "married_filing_jointly": "$31,500", + "head_of_household": "$23,625", }, "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.", "confidence": "high", diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 3d21a17..32b458b 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -191,6 +191,76 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn("# Tax Memo", result.stdout) self.assertIn("## Conclusion", result.stdout) + def test_prepare_command_generates_return_package(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache") + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + + case_dir = Path(temp_dir) / "2025-jane-doe" + facts_path = Path(temp_dir) / "facts.json" + facts_path.write_text( + json.dumps( + { + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + } + ) + ) + subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "extract-docs", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + "--create-case", + "--case-label", + "Jane Doe", + "--facts-json", + str(facts_path), + ], + text=True, + capture_output=True, + env=env, + check=True, + ) + + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "prepare", + "--tax-year", + "2025", + "--case-dir", + str(case_dir), + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + payload = json.loads(result.stdout) + self.assertEqual(payload["status"], "prepared") + self.assertEqual(payload["summary"]["requiredForms"], ["f1040"]) + self.assertTrue((case_dir / "output" / "artifacts.json").exists()) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_prepare.py b/skills/us-cpa/tests/test_prepare.py new file mode 100644 index 0000000..bba2e91 --- /dev/null +++ b/skills/us-cpa/tests/test_prepare.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.cases import CaseManager +from us_cpa.prepare import EfileExporter, PrepareEngine +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class PrepareEngineTests(unittest.TestCase): + def build_case(self, temp_dir: str) -> tuple[CaseManager, TaxYearCorpus]: + case_dir = Path(temp_dir) / "2025-jane-doe" + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + manager.intake( + tax_year=2025, + user_facts={ + "taxpayer.fullName": "Jane Doe", + "filingStatus": "single", + "wages": 50000, + "taxableInterest": 100, + "federalWithholding": 6000, + }, + document_paths=[], + ) + + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + + def fake_fetch(url: str) -> bytes: + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, f"Template for {url}") + pdf.save() + return buffer.getvalue() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return manager, corpus + + def test_prepare_creates_normalized_return_and_artifacts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + + result = engine.prepare_case(manager.case_dir) + + self.assertEqual(result["status"], "prepared") + self.assertEqual(result["summary"]["requiredForms"], ["f1040"]) + self.assertEqual(result["summary"]["reviewRequiredArtifacts"], ["f1040"]) + self.assertTrue((manager.case_dir / "return" / "normalized-return.json").exists()) + self.assertTrue((manager.case_dir / "output" / "artifacts.json").exists()) + normalized = json.loads((manager.case_dir / "return" / "normalized-return.json").read_text()) + self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0) + self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0) + + def test_exporter_writes_efile_ready_payload(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + manager, corpus = self.build_case(temp_dir) + engine = PrepareEngine(corpus=corpus) + engine.prepare_case(manager.case_dir) + + export = EfileExporter().export_case(manager.case_dir) + + self.assertEqual(export["status"], "draft") + self.assertTrue((manager.case_dir / "output" / "efile-ready.json").exists()) + self.assertEqual(export["returnSummary"]["requiredForms"], ["f1040"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py index e392af2..b2fceef 100644 --- a/skills/us-cpa/tests/test_questions.py +++ b/skills/us-cpa/tests/test_questions.py @@ -31,7 +31,7 @@ class QuestionEngineTests(unittest.TestCase): self.assertEqual(analysis["issue"], "standard_deduction") self.assertEqual(analysis["taxYear"], 2025) - self.assertEqual(analysis["conclusion"]["answer"], "$15,000") + self.assertEqual(analysis["conclusion"]["answer"], "$15,750") self.assertEqual(analysis["confidence"], "high") self.assertTrue(analysis["authorities"]) self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") @@ -57,7 +57,7 @@ class QuestionEngineTests(unittest.TestCase): "factsUsed": [{"field": "filingStatus", "value": "single"}], "missingFacts": [], "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], - "conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."}, + "conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."}, "confidence": "high", "followUpQuestions": [], "primaryLawRequired": False, @@ -66,7 +66,7 @@ class QuestionEngineTests(unittest.TestCase): conversation = render_analysis(analysis) memo = render_memo(analysis) - self.assertIn("$15,000", conversation) + self.assertIn("$15,750", conversation) self.assertIn("Issue", memo) self.assertIn("Authorities", memo)