feat: add us-cpa preparation workflow

2026-03-15 01:28:22 -05:00
parent decf3132d5
commit 82cf3d9010
8 changed files with 309 additions and 10 deletions
@@ -4,14 +4,18 @@

 ## Current Milestone

-Milestone 2 now adds the first tax-year corpus layer:
+Current implementation now includes:

 - deterministic cache layout under `~/.cache/us-cpa` by default
 - `fetch-year` download flow for the bootstrap IRS corpus
 - source manifest with URL, hash, authority rank, and local path traceability
 - authority ranking hooks for IRS materials and future primary-law escalation
+- case-folder intake and conflict-stop handling
+- question workflow with conversation and memo output
+- prepare workflow for the current supported 1040 subset
+- e-file-ready draft export payload generation

-Tax logic, case workflows, rendering, and review logic are still pending.
+Review logic and broader form coverage are still pending.

 ## CLI Surface

@@ -102,7 +106,10 @@ Behavior:
 - `question` supports:
  - `--style conversation`
  - `--style memo`
- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"`
+- `question` emits answered analysis output
+- `prepare` emits a prepared return package summary
+- `export-efile-ready` emits a draft e-file-ready payload
+- `review` is still scaffolded
 - `fetch-year` emits a downloaded manifest location and source count

 ## Question Engine
@@ -138,6 +145,41 @@ Current review rule:

 - overlay-rendered artifacts are marked `reviewRequired: true`

+## Preparation Workflow
+
+Current `prepare` implementation:
+
+- loads case facts from `extracted/facts.json`
+- normalizes them into the current supported 2025 federal return model
+- computes the current supported 1040 subset
+- resolves required forms for the current supported subset
+- writes:
+  - `return/normalized-return.json`
+  - `output/artifacts.json`
+  - `reports/prepare-summary.json`
+
+Current supported calculation inputs:
+
+- `filingStatus`
+- `wages`
+- `taxableInterest`
+- `businessIncome`
+- `federalWithholding`
+
+## E-file-ready Export
+
+`export-efile-ready` writes:
+
+- `output/efile-ready.json`
+
+Current export behavior:
+
+- draft-only
+- includes required forms
+- includes refund or balance due summary
+- includes attachment manifest
+- includes unresolved issues
+
 ## Scope Rules

 - U.S. federal individual returns only in v1
@@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing
 skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025
 skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown
 skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
+skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
 skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
 skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
 ```
@@ -55,5 +56,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
 - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
 - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
 - rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review
+- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory
+- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything

 For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.
@@ -7,6 +7,7 @@ from pathlib import Path
 from typing import Any

 from us_cpa.cases import CaseConflictError, CaseManager
+from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms
 from us_cpa.questions import QuestionEngine, render_analysis, render_memo
 from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog

@@ -167,7 +168,40 @@ def main(argv: list[str] | None = None) -> int:
        }
        return _emit(payload, args.format)

-    if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}:
+    if args.command == "prepare":
+        case_dir = _require_case_dir(args)
+        payload = {
+            "command": args.command,
+            "format": args.format,
+            **PrepareEngine().prepare_case(case_dir),
+        }
+        return _emit(payload, args.format)
+
+    if args.command == "render-forms":
+        case_dir = _require_case_dir(args)
+        manager = CaseManager(case_dir)
+        manifest = manager.load_manifest()
+        normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text())
+        artifacts = render_case_forms(case_dir, TaxYearCorpus(), normalized)
+        payload = {
+            "command": "render-forms",
+            "format": args.format,
+            "taxYear": manifest["taxYear"],
+            "status": "rendered",
+            **artifacts,
+        }
+        return _emit(payload, args.format)
+
+    if args.command == "export-efile-ready":
+        case_dir = _require_case_dir(args)
+        payload = {
+            "command": "export-efile-ready",
+            "format": args.format,
+            **EfileExporter().export_case(case_dir),
+        }
+        return _emit(payload, args.format)
+
+    if args.command == "review":
        case_dir = _require_case_dir(args)
        payload = {
            "command": args.command,
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from us_cpa.cases import CaseManager
+from us_cpa.renderers import render_case_forms
+from us_cpa.returns import normalize_case_facts
+from us_cpa.sources import TaxYearCorpus
+
+
+def _load_case_facts(case_dir: Path) -> dict[str, Any]:
+    facts_path = case_dir / "extracted" / "facts.json"
+    payload = json.loads(facts_path.read_text())
+    return {key: value["value"] for key, value in payload["facts"].items()}
+
+
+
+class PrepareEngine:
+    def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None:
+        self.corpus = corpus or TaxYearCorpus()
+
+    def prepare_case(self, case_dir: Path) -> dict[str, Any]:
+        manager = CaseManager(case_dir)
+        manifest = manager.load_manifest()
+        facts = _load_case_facts(manager.case_dir)
+        normalized = normalize_case_facts(facts, manifest["taxYear"])
+        normalized_path = manager.case_dir / "return" / "normalized-return.json"
+        normalized_path.write_text(json.dumps(normalized, indent=2))
+
+        artifacts = render_case_forms(manager.case_dir, self.corpus, normalized)
+        unresolved_issues = json.loads(manager.issues_path.read_text())["issues"]
+
+        summary = {
+            "requiredForms": normalized["requiredForms"],
+            "reviewRequiredArtifacts": [
+                artifact["formCode"] for artifact in artifacts["artifacts"] if artifact["reviewRequired"]
+            ],
+            "refund": normalized["totals"]["refund"],
+            "balanceDue": normalized["totals"]["balanceDue"],
+            "unresolvedIssueCount": len(unresolved_issues),
+        }
+        result = {
+            "status": "prepared",
+            "caseDir": str(manager.case_dir),
+            "taxYear": manifest["taxYear"],
+            "normalizedReturnPath": str(normalized_path),
+            "artifactManifestPath": str(manager.case_dir / "output" / "artifacts.json"),
+            "summary": summary,
+        }
+        (manager.case_dir / "reports" / "prepare-summary.json").write_text(json.dumps(result, indent=2))
+        return result
+
+
+class EfileExporter:
+    def export_case(self, case_dir: Path) -> dict[str, Any]:
+        case_dir = Path(case_dir).expanduser().resolve()
+        normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text())
+        artifacts = json.loads((case_dir / "output" / "artifacts.json").read_text())
+        issues = json.loads((case_dir / "issues" / "open-issues.json").read_text())["issues"]
+        payload = {
+            "status": "draft" if issues or any(a["reviewRequired"] for a in artifacts["artifacts"]) else "ready",
+            "taxYear": normalized["taxYear"],
+            "returnSummary": {
+                "requiredForms": normalized["requiredForms"],
+                "refund": normalized["totals"]["refund"],
+                "balanceDue": normalized["totals"]["balanceDue"],
+            },
+            "attachments": artifacts["artifacts"],
+            "unresolvedIssues": issues,
+        }
+        output_path = case_dir / "output" / "efile-ready.json"
+        output_path.write_text(json.dumps(payload, indent=2))
+        return payload
@@ -14,9 +14,9 @@ TOPIC_RULES = [
        "keywords": ("standard deduction",),
        "authority_slugs": ("i1040gi",),
        "answer_by_status": {
-            "single": "$15,000",
-            "married_filing_jointly": "$30,000",
-            "head_of_household": "$22,500",
+            "single": "$15,750",
+            "married_filing_jointly": "$31,500",
+            "head_of_household": "$23,625",
        },
        "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.",
        "confidence": "high",
@@ -191,6 +191,76 @@ class UsCpaCliSmokeTests(unittest.TestCase):
            self.assertIn("# Tax Memo", result.stdout)
            self.assertIn("## Conclusion", result.stdout)

+    def test_prepare_command_generates_return_package(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            env = os.environ.copy()
+            env["PYTHONPATH"] = str(SRC_DIR)
+            env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache")
+            subprocess.run(
+                [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"],
+                text=True,
+                capture_output=True,
+                env=env,
+                check=True,
+            )
+
+            case_dir = Path(temp_dir) / "2025-jane-doe"
+            facts_path = Path(temp_dir) / "facts.json"
+            facts_path.write_text(
+                json.dumps(
+                    {
+                        "taxpayer.fullName": "Jane Doe",
+                        "filingStatus": "single",
+                        "wages": 50000,
+                        "taxableInterest": 100,
+                        "federalWithholding": 6000,
+                    }
+                )
+            )
+            subprocess.run(
+                [
+                    sys.executable,
+                    "-m",
+                    "us_cpa.cli",
+                    "extract-docs",
+                    "--tax-year",
+                    "2025",
+                    "--case-dir",
+                    str(case_dir),
+                    "--create-case",
+                    "--case-label",
+                    "Jane Doe",
+                    "--facts-json",
+                    str(facts_path),
+                ],
+                text=True,
+                capture_output=True,
+                env=env,
+                check=True,
+            )
+
+            result = subprocess.run(
+                [
+                    sys.executable,
+                    "-m",
+                    "us_cpa.cli",
+                    "prepare",
+                    "--tax-year",
+                    "2025",
+                    "--case-dir",
+                    str(case_dir),
+                ],
+                text=True,
+                capture_output=True,
+                env=env,
+            )
+
+            self.assertEqual(result.returncode, 0, result.stderr)
+            payload = json.loads(result.stdout)
+            self.assertEqual(payload["status"], "prepared")
+            self.assertEqual(payload["summary"]["requiredForms"], ["f1040"])
+            self.assertTrue((case_dir / "output" / "artifacts.json").exists())
+

 if __name__ == "__main__":
    unittest.main()
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import json
+import tempfile
+import unittest
+from io import BytesIO
+from pathlib import Path
+
+from reportlab.pdfgen import canvas
+
+from us_cpa.cases import CaseManager
+from us_cpa.prepare import EfileExporter, PrepareEngine
+from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
+
+
+class PrepareEngineTests(unittest.TestCase):
+    def build_case(self, temp_dir: str) -> tuple[CaseManager, TaxYearCorpus]:
+        case_dir = Path(temp_dir) / "2025-jane-doe"
+        manager = CaseManager(case_dir)
+        manager.create_case(case_label="Jane Doe", tax_year=2025)
+        manager.intake(
+            tax_year=2025,
+            user_facts={
+                "taxpayer.fullName": "Jane Doe",
+                "filingStatus": "single",
+                "wages": 50000,
+                "taxableInterest": 100,
+                "federalWithholding": 6000,
+            },
+            document_paths=[],
+        )
+
+        corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
+
+        def fake_fetch(url: str) -> bytes:
+            buffer = BytesIO()
+            pdf = canvas.Canvas(buffer)
+            pdf.drawString(72, 720, f"Template for {url}")
+            pdf.save()
+            return buffer.getvalue()
+
+        corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch)
+        return manager, corpus
+
+    def test_prepare_creates_normalized_return_and_artifacts(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            manager, corpus = self.build_case(temp_dir)
+            engine = PrepareEngine(corpus=corpus)
+
+            result = engine.prepare_case(manager.case_dir)
+
+            self.assertEqual(result["status"], "prepared")
+            self.assertEqual(result["summary"]["requiredForms"], ["f1040"])
+            self.assertEqual(result["summary"]["reviewRequiredArtifacts"], ["f1040"])
+            self.assertTrue((manager.case_dir / "return" / "normalized-return.json").exists())
+            self.assertTrue((manager.case_dir / "output" / "artifacts.json").exists())
+            normalized = json.loads((manager.case_dir / "return" / "normalized-return.json").read_text())
+            self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0)
+            self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0)
+
+    def test_exporter_writes_efile_ready_payload(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            manager, corpus = self.build_case(temp_dir)
+            engine = PrepareEngine(corpus=corpus)
+            engine.prepare_case(manager.case_dir)
+
+            export = EfileExporter().export_case(manager.case_dir)
+
+            self.assertEqual(export["status"], "draft")
+            self.assertTrue((manager.case_dir / "output" / "efile-ready.json").exists())
+            self.assertEqual(export["returnSummary"]["requiredForms"], ["f1040"])
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -31,7 +31,7 @@ class QuestionEngineTests(unittest.TestCase):

            self.assertEqual(analysis["issue"], "standard_deduction")
            self.assertEqual(analysis["taxYear"], 2025)
-            self.assertEqual(analysis["conclusion"]["answer"], "$15,000")
+            self.assertEqual(analysis["conclusion"]["answer"], "$15,750")
            self.assertEqual(analysis["confidence"], "high")
            self.assertTrue(analysis["authorities"])
            self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions")
@@ -57,7 +57,7 @@ class QuestionEngineTests(unittest.TestCase):
            "factsUsed": [{"field": "filingStatus", "value": "single"}],
            "missingFacts": [],
            "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}],
-            "conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."},
+            "conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."},
            "confidence": "high",
            "followUpQuestions": [],
            "primaryLawRequired": False,
@@ -66,7 +66,7 @@ class QuestionEngineTests(unittest.TestCase):
        conversation = render_analysis(analysis)
        memo = render_memo(analysis)

-        self.assertIn("$15,000", conversation)
+        self.assertIn("$15,750", conversation)
        self.assertIn("Issue", memo)
        self.assertIn("Authorities", memo)