feat: add us-cpa preparation workflow
This commit is contained in:
@@ -4,14 +4,18 @@
|
||||
|
||||
## Current Milestone
|
||||
|
||||
Milestone 2 now adds the first tax-year corpus layer:
|
||||
Current implementation now includes:
|
||||
|
||||
- deterministic cache layout under `~/.cache/us-cpa` by default
|
||||
- `fetch-year` download flow for the bootstrap IRS corpus
|
||||
- source manifest with URL, hash, authority rank, and local path traceability
|
||||
- authority ranking hooks for IRS materials and future primary-law escalation
|
||||
- case-folder intake and conflict-stop handling
|
||||
- question workflow with conversation and memo output
|
||||
- prepare workflow for the current supported 1040 subset
|
||||
- e-file-ready draft export payload generation
|
||||
|
||||
Tax logic, case workflows, rendering, and review logic are still pending.
|
||||
Review logic and broader form coverage are still pending.
|
||||
|
||||
## CLI Surface
|
||||
|
||||
@@ -102,7 +106,10 @@ Behavior:
|
||||
- `question` supports:
|
||||
- `--style conversation`
|
||||
- `--style memo`
|
||||
- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"`
|
||||
- `question` emits answered analysis output
|
||||
- `prepare` emits a prepared return package summary
|
||||
- `export-efile-ready` emits a draft e-file-ready payload
|
||||
- `review` is still scaffolded
|
||||
- `fetch-year` emits a downloaded manifest location and source count
|
||||
|
||||
## Question Engine
|
||||
@@ -138,6 +145,41 @@ Current review rule:
|
||||
|
||||
- overlay-rendered artifacts are marked `reviewRequired: true`
|
||||
|
||||
## Preparation Workflow
|
||||
|
||||
Current `prepare` implementation:
|
||||
|
||||
- loads case facts from `extracted/facts.json`
|
||||
- normalizes them into the current supported 2025 federal return model
|
||||
- computes the current supported 1040 subset
|
||||
- resolves required forms for the current supported subset
|
||||
- writes:
|
||||
- `return/normalized-return.json`
|
||||
- `output/artifacts.json`
|
||||
- `reports/prepare-summary.json`
|
||||
|
||||
Current supported calculation inputs:
|
||||
|
||||
- `filingStatus`
|
||||
- `wages`
|
||||
- `taxableInterest`
|
||||
- `businessIncome`
|
||||
- `federalWithholding`
|
||||
|
||||
## E-file-ready Export
|
||||
|
||||
`export-efile-ready` writes:
|
||||
|
||||
- `output/efile-ready.json`
|
||||
|
||||
Current export behavior:
|
||||
|
||||
- draft-only
|
||||
- includes required forms
|
||||
- includes refund or balance due summary
|
||||
- includes attachment manifest
|
||||
- includes unresolved issues
|
||||
|
||||
## Scope Rules
|
||||
|
||||
- U.S. federal individual returns only in v1
|
||||
|
||||
@@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown
|
||||
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
|
||||
```
|
||||
@@ -55,5 +56,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
|
||||
- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
|
||||
- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
|
||||
- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review
|
||||
- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory
|
||||
- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything
|
||||
|
||||
For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.
|
||||
|
||||
@@ -7,6 +7,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.cases import CaseConflictError, CaseManager
|
||||
from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms
|
||||
from us_cpa.questions import QuestionEngine, render_analysis, render_memo
|
||||
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
|
||||
|
||||
@@ -167,7 +168,40 @@ def main(argv: list[str] | None = None) -> int:
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}:
|
||||
if args.command == "prepare":
|
||||
case_dir = _require_case_dir(args)
|
||||
payload = {
|
||||
"command": args.command,
|
||||
"format": args.format,
|
||||
**PrepareEngine().prepare_case(case_dir),
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command == "render-forms":
|
||||
case_dir = _require_case_dir(args)
|
||||
manager = CaseManager(case_dir)
|
||||
manifest = manager.load_manifest()
|
||||
normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text())
|
||||
artifacts = render_case_forms(case_dir, TaxYearCorpus(), normalized)
|
||||
payload = {
|
||||
"command": "render-forms",
|
||||
"format": args.format,
|
||||
"taxYear": manifest["taxYear"],
|
||||
"status": "rendered",
|
||||
**artifacts,
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command == "export-efile-ready":
|
||||
case_dir = _require_case_dir(args)
|
||||
payload = {
|
||||
"command": "export-efile-ready",
|
||||
"format": args.format,
|
||||
**EfileExporter().export_case(case_dir),
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command == "review":
|
||||
case_dir = _require_case_dir(args)
|
||||
payload = {
|
||||
"command": args.command,
|
||||
|
||||
75
skills/us-cpa/src/us_cpa/prepare.py
Normal file
75
skills/us-cpa/src/us_cpa/prepare.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.cases import CaseManager
|
||||
from us_cpa.renderers import render_case_forms
|
||||
from us_cpa.returns import normalize_case_facts
|
||||
from us_cpa.sources import TaxYearCorpus
|
||||
|
||||
|
||||
def _load_case_facts(case_dir: Path) -> dict[str, Any]:
|
||||
facts_path = case_dir / "extracted" / "facts.json"
|
||||
payload = json.loads(facts_path.read_text())
|
||||
return {key: value["value"] for key, value in payload["facts"].items()}
|
||||
|
||||
|
||||
|
||||
class PrepareEngine:
|
||||
def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None:
|
||||
self.corpus = corpus or TaxYearCorpus()
|
||||
|
||||
def prepare_case(self, case_dir: Path) -> dict[str, Any]:
|
||||
manager = CaseManager(case_dir)
|
||||
manifest = manager.load_manifest()
|
||||
facts = _load_case_facts(manager.case_dir)
|
||||
normalized = normalize_case_facts(facts, manifest["taxYear"])
|
||||
normalized_path = manager.case_dir / "return" / "normalized-return.json"
|
||||
normalized_path.write_text(json.dumps(normalized, indent=2))
|
||||
|
||||
artifacts = render_case_forms(manager.case_dir, self.corpus, normalized)
|
||||
unresolved_issues = json.loads(manager.issues_path.read_text())["issues"]
|
||||
|
||||
summary = {
|
||||
"requiredForms": normalized["requiredForms"],
|
||||
"reviewRequiredArtifacts": [
|
||||
artifact["formCode"] for artifact in artifacts["artifacts"] if artifact["reviewRequired"]
|
||||
],
|
||||
"refund": normalized["totals"]["refund"],
|
||||
"balanceDue": normalized["totals"]["balanceDue"],
|
||||
"unresolvedIssueCount": len(unresolved_issues),
|
||||
}
|
||||
result = {
|
||||
"status": "prepared",
|
||||
"caseDir": str(manager.case_dir),
|
||||
"taxYear": manifest["taxYear"],
|
||||
"normalizedReturnPath": str(normalized_path),
|
||||
"artifactManifestPath": str(manager.case_dir / "output" / "artifacts.json"),
|
||||
"summary": summary,
|
||||
}
|
||||
(manager.case_dir / "reports" / "prepare-summary.json").write_text(json.dumps(result, indent=2))
|
||||
return result
|
||||
|
||||
|
||||
class EfileExporter:
|
||||
def export_case(self, case_dir: Path) -> dict[str, Any]:
|
||||
case_dir = Path(case_dir).expanduser().resolve()
|
||||
normalized = json.loads((case_dir / "return" / "normalized-return.json").read_text())
|
||||
artifacts = json.loads((case_dir / "output" / "artifacts.json").read_text())
|
||||
issues = json.loads((case_dir / "issues" / "open-issues.json").read_text())["issues"]
|
||||
payload = {
|
||||
"status": "draft" if issues or any(a["reviewRequired"] for a in artifacts["artifacts"]) else "ready",
|
||||
"taxYear": normalized["taxYear"],
|
||||
"returnSummary": {
|
||||
"requiredForms": normalized["requiredForms"],
|
||||
"refund": normalized["totals"]["refund"],
|
||||
"balanceDue": normalized["totals"]["balanceDue"],
|
||||
},
|
||||
"attachments": artifacts["artifacts"],
|
||||
"unresolvedIssues": issues,
|
||||
}
|
||||
output_path = case_dir / "output" / "efile-ready.json"
|
||||
output_path.write_text(json.dumps(payload, indent=2))
|
||||
return payload
|
||||
@@ -14,9 +14,9 @@ TOPIC_RULES = [
|
||||
"keywords": ("standard deduction",),
|
||||
"authority_slugs": ("i1040gi",),
|
||||
"answer_by_status": {
|
||||
"single": "$15,000",
|
||||
"married_filing_jointly": "$30,000",
|
||||
"head_of_household": "$22,500",
|
||||
"single": "$15,750",
|
||||
"married_filing_jointly": "$31,500",
|
||||
"head_of_household": "$23,625",
|
||||
},
|
||||
"summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.",
|
||||
"confidence": "high",
|
||||
|
||||
@@ -191,6 +191,76 @@ class UsCpaCliSmokeTests(unittest.TestCase):
|
||||
self.assertIn("# Tax Memo", result.stdout)
|
||||
self.assertIn("## Conclusion", result.stdout)
|
||||
|
||||
def test_prepare_command_generates_return_package(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = str(SRC_DIR)
|
||||
env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache")
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
check=True,
|
||||
)
|
||||
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
facts_path = Path(temp_dir) / "facts.json"
|
||||
facts_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"taxpayer.fullName": "Jane Doe",
|
||||
"filingStatus": "single",
|
||||
"wages": 50000,
|
||||
"taxableInterest": 100,
|
||||
"federalWithholding": 6000,
|
||||
}
|
||||
)
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"us_cpa.cli",
|
||||
"extract-docs",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--case-dir",
|
||||
str(case_dir),
|
||||
"--create-case",
|
||||
"--case-label",
|
||||
"Jane Doe",
|
||||
"--facts-json",
|
||||
str(facts_path),
|
||||
],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
check=True,
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"us_cpa.cli",
|
||||
"prepare",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--case-dir",
|
||||
str(case_dir),
|
||||
],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
payload = json.loads(result.stdout)
|
||||
self.assertEqual(payload["status"], "prepared")
|
||||
self.assertEqual(payload["summary"]["requiredForms"], ["f1040"])
|
||||
self.assertTrue((case_dir / "output" / "artifacts.json").exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
75
skills/us-cpa/tests/test_prepare.py
Normal file
75
skills/us-cpa/tests/test_prepare.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from us_cpa.cases import CaseManager
|
||||
from us_cpa.prepare import EfileExporter, PrepareEngine
|
||||
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
|
||||
|
||||
|
||||
class PrepareEngineTests(unittest.TestCase):
|
||||
def build_case(self, temp_dir: str) -> tuple[CaseManager, TaxYearCorpus]:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
manager = CaseManager(case_dir)
|
||||
manager.create_case(case_label="Jane Doe", tax_year=2025)
|
||||
manager.intake(
|
||||
tax_year=2025,
|
||||
user_facts={
|
||||
"taxpayer.fullName": "Jane Doe",
|
||||
"filingStatus": "single",
|
||||
"wages": 50000,
|
||||
"taxableInterest": 100,
|
||||
"federalWithholding": 6000,
|
||||
},
|
||||
document_paths=[],
|
||||
)
|
||||
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
|
||||
|
||||
def fake_fetch(url: str) -> bytes:
|
||||
buffer = BytesIO()
|
||||
pdf = canvas.Canvas(buffer)
|
||||
pdf.drawString(72, 720, f"Template for {url}")
|
||||
pdf.save()
|
||||
return buffer.getvalue()
|
||||
|
||||
corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch)
|
||||
return manager, corpus
|
||||
|
||||
def test_prepare_creates_normalized_return_and_artifacts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
manager, corpus = self.build_case(temp_dir)
|
||||
engine = PrepareEngine(corpus=corpus)
|
||||
|
||||
result = engine.prepare_case(manager.case_dir)
|
||||
|
||||
self.assertEqual(result["status"], "prepared")
|
||||
self.assertEqual(result["summary"]["requiredForms"], ["f1040"])
|
||||
self.assertEqual(result["summary"]["reviewRequiredArtifacts"], ["f1040"])
|
||||
self.assertTrue((manager.case_dir / "return" / "normalized-return.json").exists())
|
||||
self.assertTrue((manager.case_dir / "output" / "artifacts.json").exists())
|
||||
normalized = json.loads((manager.case_dir / "return" / "normalized-return.json").read_text())
|
||||
self.assertEqual(normalized["totals"]["adjustedGrossIncome"], 50100.0)
|
||||
self.assertEqual(normalized["totals"]["taxableIncome"], 34350.0)
|
||||
|
||||
def test_exporter_writes_efile_ready_payload(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
manager, corpus = self.build_case(temp_dir)
|
||||
engine = PrepareEngine(corpus=corpus)
|
||||
engine.prepare_case(manager.case_dir)
|
||||
|
||||
export = EfileExporter().export_case(manager.case_dir)
|
||||
|
||||
self.assertEqual(export["status"], "draft")
|
||||
self.assertTrue((manager.case_dir / "output" / "efile-ready.json").exists())
|
||||
self.assertEqual(export["returnSummary"]["requiredForms"], ["f1040"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -31,7 +31,7 @@ class QuestionEngineTests(unittest.TestCase):
|
||||
|
||||
self.assertEqual(analysis["issue"], "standard_deduction")
|
||||
self.assertEqual(analysis["taxYear"], 2025)
|
||||
self.assertEqual(analysis["conclusion"]["answer"], "$15,000")
|
||||
self.assertEqual(analysis["conclusion"]["answer"], "$15,750")
|
||||
self.assertEqual(analysis["confidence"], "high")
|
||||
self.assertTrue(analysis["authorities"])
|
||||
self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions")
|
||||
@@ -57,7 +57,7 @@ class QuestionEngineTests(unittest.TestCase):
|
||||
"factsUsed": [{"field": "filingStatus", "value": "single"}],
|
||||
"missingFacts": [],
|
||||
"authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}],
|
||||
"conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."},
|
||||
"conclusion": {"answer": "$15,750", "summary": "Single filers use a $15,750 standard deduction for tax year 2025."},
|
||||
"confidence": "high",
|
||||
"followUpQuestions": [],
|
||||
"primaryLawRequired": False,
|
||||
@@ -66,7 +66,7 @@ class QuestionEngineTests(unittest.TestCase):
|
||||
conversation = render_analysis(analysis)
|
||||
memo = render_memo(analysis)
|
||||
|
||||
self.assertIn("$15,000", conversation)
|
||||
self.assertIn("$15,750", conversation)
|
||||
self.assertIn("Issue", memo)
|
||||
self.assertIn("Authorities", memo)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user