feat: add us-cpa review workflow

This commit is contained in:
Stefano Fiorini
2026-03-15 01:31:43 -05:00
parent 82cf3d9010
commit 10a9d40f1d
6 changed files with 321 additions and 5 deletions

View File

@@ -109,7 +109,7 @@ Behavior:
- `question` emits answered analysis output
- `prepare` emits a prepared return package summary
- `export-efile-ready` emits a draft e-file-ready payload
- `review` is still scaffolded
- `review` emits a findings-first review result
- `fetch-year` emits a downloaded manifest location and source count
## Question Engine
@@ -180,6 +180,21 @@ Current export behavior:
- includes attachment manifest
- includes unresolved issues
## Review Workflow
Current `review` implementation:
- recomputes the return from current case facts
- compares stored normalized return values to recomputed values
- checks whether required rendered artifacts are present
- flags overlay-rendered artifacts as requiring human review
- sorts findings by severity
Current render modes:
- `--style conversation`
- `--style memo`
## Scope Rules
- U.S. federal individual returns only in v1

View File

@@ -36,6 +36,7 @@ skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --style memo --format markdown
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
```
@@ -58,5 +59,6 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review
- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory
- `export-efile-ready` writes a draft transmission-ready payload without transmitting anything
- `review` recomputes the return from case facts, checks artifacts, and returns findings-first output in conversation or memo style
For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.

View File

@@ -9,6 +9,7 @@ from typing import Any
from us_cpa.cases import CaseConflictError, CaseManager
from us_cpa.prepare import EfileExporter, PrepareEngine, render_case_forms
from us_cpa.questions import QuestionEngine, render_analysis, render_memo
from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
COMMANDS = (
@@ -73,6 +74,7 @@ def build_parser() -> argparse.ArgumentParser:
review = subparsers.add_parser("review", help="Review a return case.")
_add_common_arguments(review)
review.add_argument("--style", choices=("conversation", "memo"), default="conversation")
fetch_year = subparsers.add_parser(
"fetch-year", help="Fetch tax-year forms and instructions."
@@ -203,13 +205,21 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "review":
case_dir = _require_case_dir(args)
review_payload = ReviewEngine().review_case(case_dir)
payload = {
"command": args.command,
"command": "review",
"format": args.format,
"taxYear": args.tax_year,
"caseDir": str(case_dir),
"status": "not_implemented",
"style": args.style,
**review_payload,
}
payload["rendered"] = (
render_review_memo(review_payload)
if args.style == "memo"
else render_review_summary(review_payload)
)
if args.format == "markdown":
print(payload["rendered"])
return 0
return _emit(payload, args.format)
if args.command == "fetch-year":

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from us_cpa.returns import normalize_case_facts
from us_cpa.sources import TaxYearCorpus
def _severity_rank(severity: str) -> int:
return {"high": 0, "medium": 1, "low": 2}[severity]
class ReviewEngine:
def __init__(self, *, corpus: TaxYearCorpus | None = None) -> None:
self.corpus = corpus or TaxYearCorpus()
def review_case(self, case_dir: Path) -> dict[str, Any]:
case_dir = Path(case_dir).expanduser().resolve()
manifest = json.loads((case_dir / "case-manifest.json").read_text())
stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text())
facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text())
facts = {key: value["value"] for key, value in facts_payload["facts"].items()}
recomputed = normalize_case_facts(facts, manifest["taxYear"])
artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text())
findings: list[dict[str, Any]] = []
if stored_return["totals"]["adjustedGrossIncome"] != recomputed["totals"]["adjustedGrossIncome"]:
findings.append(
{
"severity": "high",
"title": "Adjusted gross income mismatch",
"explanation": "Stored adjusted gross income does not match the recomputed return from case facts.",
"suggestedAction": f"Update AGI to {recomputed['totals']['adjustedGrossIncome']:.2f} on Form 1040 line 11.",
"authorities": [
{"title": "Instructions for Form 1040 and Schedules 1-3", "sourceClass": "irs_instructions"}
],
}
)
rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]}
for required_form in recomputed["requiredForms"]:
if required_form not in rendered_forms:
findings.append(
{
"severity": "high",
"title": f"Missing rendered artifact for {required_form}",
"explanation": "The return requires this form, but no rendered artifact is present in the artifact manifest.",
"suggestedAction": f"Render and review {required_form} before treating the package as complete.",
"authorities": [{"title": "Supported form manifest", "sourceClass": "irs_form"}],
}
)
for artifact in artifacts_payload["artifacts"]:
if artifact.get("reviewRequired"):
findings.append(
{
"severity": "medium",
"title": f"Human review required for {artifact['formCode']}",
"explanation": "The form was overlay-rendered on the official IRS PDF and must be reviewed before filing.",
"suggestedAction": f"Review the rendered {artifact['formCode']} artifact visually before any filing/export handoff.",
"authorities": [{"title": "Artifact render policy", "sourceClass": "irs_form"}],
}
)
findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"]))
review = {
"status": "reviewed",
"taxYear": manifest["taxYear"],
"caseDir": str(case_dir),
"findingCount": len(findings),
"findings": findings,
}
(case_dir / "reports" / "review-report.json").write_text(json.dumps(review, indent=2))
return review
def render_review_summary(review: dict[str, Any]) -> str:
if not review["findings"]:
return "No findings detected in the reviewed return package."
lines = ["Review findings:"]
for finding in review["findings"]:
lines.append(f"- [{finding['severity'].upper()}] {finding['title']}: {finding['explanation']}")
return "\n".join(lines)
def render_review_memo(review: dict[str, Any]) -> str:
lines = ["# Review Memo", ""]
if not review["findings"]:
lines.append("No findings detected.")
return "\n".join(lines)
for index, finding in enumerate(review["findings"], start=1):
lines.extend(
[
f"## Finding {index}: {finding['title']}",
f"Severity: {finding['severity']}",
"",
"### Explanation",
finding["explanation"],
"",
"### Suggested correction",
finding["suggestedAction"],
"",
"### Authorities",
]
)
for authority in finding["authorities"]:
lines.append(f"- {authority['title']}")
lines.append("")
return "\n".join(lines).rstrip()

View File

@@ -261,6 +261,94 @@ class UsCpaCliSmokeTests(unittest.TestCase):
self.assertEqual(payload["summary"]["requiredForms"], ["f1040"])
self.assertTrue((case_dir / "output" / "artifacts.json").exists())
def test_review_command_returns_findings(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
env = os.environ.copy()
env["PYTHONPATH"] = str(SRC_DIR)
env["US_CPA_CACHE_DIR"] = str(Path(temp_dir) / "cache")
subprocess.run(
[sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"],
text=True,
capture_output=True,
env=env,
check=True,
)
case_dir = Path(temp_dir) / "2025-jane-doe"
facts_path = Path(temp_dir) / "facts.json"
facts_path.write_text(
json.dumps(
{
"taxpayer.fullName": "Jane Doe",
"filingStatus": "single",
"wages": 50000,
"taxableInterest": 100,
"federalWithholding": 6000,
}
)
)
subprocess.run(
[
sys.executable,
"-m",
"us_cpa.cli",
"extract-docs",
"--tax-year",
"2025",
"--case-dir",
str(case_dir),
"--create-case",
"--case-label",
"Jane Doe",
"--facts-json",
str(facts_path),
],
text=True,
capture_output=True,
env=env,
check=True,
)
subprocess.run(
[
sys.executable,
"-m",
"us_cpa.cli",
"prepare",
"--tax-year",
"2025",
"--case-dir",
str(case_dir),
],
text=True,
capture_output=True,
env=env,
check=True,
)
normalized_path = case_dir / "return" / "normalized-return.json"
normalized = json.loads(normalized_path.read_text())
normalized["totals"]["adjustedGrossIncome"] = 99999.0
normalized_path.write_text(json.dumps(normalized, indent=2))
result = subprocess.run(
[
sys.executable,
"-m",
"us_cpa.cli",
"review",
"--tax-year",
"2025",
"--case-dir",
str(case_dir),
],
text=True,
capture_output=True,
env=env,
)
self.assertEqual(result.returncode, 0, result.stderr)
payload = json.loads(result.stdout)
self.assertEqual(payload["status"], "reviewed")
self.assertEqual(payload["findingCount"], 2)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,90 @@
from __future__ import annotations
import json
import tempfile
import unittest
from io import BytesIO
from pathlib import Path
from reportlab.pdfgen import canvas
from us_cpa.cases import CaseManager
from us_cpa.prepare import PrepareEngine
from us_cpa.review import ReviewEngine, render_review_memo, render_review_summary
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
class ReviewEngineTests(unittest.TestCase):
def build_prepared_case(self, temp_dir: str) -> tuple[Path, TaxYearCorpus]:
case_dir = Path(temp_dir) / "2025-jane-doe"
manager = CaseManager(case_dir)
manager.create_case(case_label="Jane Doe", tax_year=2025)
manager.intake(
tax_year=2025,
user_facts={
"taxpayer.fullName": "Jane Doe",
"filingStatus": "single",
"wages": 50000,
"taxableInterest": 100,
"federalWithholding": 6000,
},
document_paths=[],
)
corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
def fake_fetch(url: str) -> bytes:
buffer = BytesIO()
pdf = canvas.Canvas(buffer)
pdf.drawString(72, 720, f"Template for {url}")
pdf.save()
return buffer.getvalue()
corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch)
PrepareEngine(corpus=corpus).prepare_case(case_dir)
return case_dir, corpus
def test_review_detects_mismatched_return_and_missing_artifacts(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
case_dir, corpus = self.build_prepared_case(temp_dir)
normalized_path = case_dir / "return" / "normalized-return.json"
normalized = json.loads(normalized_path.read_text())
normalized["totals"]["adjustedGrossIncome"] = 99999.0
normalized_path.write_text(json.dumps(normalized, indent=2))
artifacts_path = case_dir / "output" / "artifacts.json"
artifacts = json.loads(artifacts_path.read_text())
artifacts["artifacts"] = []
artifacts["artifactCount"] = 0
artifacts_path.write_text(json.dumps(artifacts, indent=2))
review = ReviewEngine(corpus=corpus).review_case(case_dir)
self.assertEqual(review["status"], "reviewed")
self.assertEqual(review["findings"][0]["severity"], "high")
self.assertIn("adjusted gross income", review["findings"][0]["title"].lower())
self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"]))
def test_review_renderers_produce_summary_and_memo(self) -> None:
review = {
"status": "reviewed",
"findings": [
{
"severity": "high",
"title": "Adjusted gross income mismatch",
"explanation": "Stored AGI does not match recomputed AGI.",
"suggestedAction": "Update Form 1040 line 11.",
"authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}],
}
],
}
summary = render_review_summary(review)
memo = render_review_memo(review)
self.assertIn("Adjusted gross income mismatch", summary)
self.assertIn("# Review Memo", memo)
self.assertIn("Suggested correction", memo)
if __name__ == "__main__":
unittest.main()