feat: add us-cpa case intake workflow
This commit is contained in:
@@ -20,7 +20,7 @@ skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction
|
||||
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025
|
||||
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
|
||||
skills/us-cpa/scripts/us-cpa render-forms --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa export-efile-ready --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
```
|
||||
@@ -71,6 +71,29 @@ Current `fetch-year` bootstrap corpus for tax year `2025` is verified against li
|
||||
sources/
|
||||
```
|
||||
|
||||
Current implementation writes:
|
||||
|
||||
- `case-manifest.json`
|
||||
- `extracted/facts.json`
|
||||
- `issues/open-issues.json`
|
||||
|
||||
## Intake Flow
|
||||
|
||||
Current `extract-docs` supports:
|
||||
|
||||
- `--create-case`
|
||||
- `--case-label`
|
||||
- `--facts-json <path>`
|
||||
- repeated `--input-file <path>`
|
||||
|
||||
Behavior:
|
||||
|
||||
- creates the full case directory layout when `--create-case` is used
|
||||
- copies input documents into `input/`
|
||||
- stores normalized user-statement facts in `extracted/facts.json`
|
||||
- appends document registry entries to `case-manifest.json`
|
||||
- stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact
|
||||
|
||||
## Output Contract
|
||||
|
||||
- JSON by default
|
||||
|
||||
@@ -34,6 +34,7 @@ description: Use when answering U.S. federal individual tax questions, preparing
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025
|
||||
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
|
||||
```
|
||||
|
||||
## Rules
|
||||
@@ -50,5 +51,6 @@ skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-
|
||||
- markdown output available with `--format markdown`
|
||||
- `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default
|
||||
- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
|
||||
- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
|
||||
|
||||
For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.
|
||||
|
||||
157
skills/us-cpa/src/us_cpa/cases.py
Normal file
157
skills/us-cpa/src/us_cpa/cases.py
Normal file
@@ -0,0 +1,157 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
CASE_SUBDIRECTORIES = (
|
||||
"input",
|
||||
"extracted",
|
||||
"return",
|
||||
"output",
|
||||
"reports",
|
||||
"issues",
|
||||
"sources",
|
||||
)
|
||||
|
||||
|
||||
def _timestamp() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _sha256_path(path: Path) -> str:
|
||||
digest = hashlib.sha256()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(65536), b""):
|
||||
digest.update(chunk)
|
||||
return digest.hexdigest()
|
||||
|
||||
|
||||
class CaseConflictError(Exception):
|
||||
def __init__(self, issue: dict[str, Any]) -> None:
|
||||
super().__init__(issue["message"])
|
||||
self.issue = issue
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaseManager:
|
||||
case_dir: Path
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.case_dir = self.case_dir.expanduser().resolve()
|
||||
|
||||
@property
|
||||
def manifest_path(self) -> Path:
|
||||
return self.case_dir / "case-manifest.json"
|
||||
|
||||
@property
|
||||
def facts_path(self) -> Path:
|
||||
return self.case_dir / "extracted" / "facts.json"
|
||||
|
||||
@property
|
||||
def issues_path(self) -> Path:
|
||||
return self.case_dir / "issues" / "open-issues.json"
|
||||
|
||||
def create_case(self, *, case_label: str, tax_year: int) -> dict[str, Any]:
|
||||
self.case_dir.mkdir(parents=True, exist_ok=True)
|
||||
for name in CASE_SUBDIRECTORIES:
|
||||
(self.case_dir / name).mkdir(exist_ok=True)
|
||||
|
||||
manifest = {
|
||||
"caseLabel": case_label,
|
||||
"taxYear": tax_year,
|
||||
"createdAt": _timestamp(),
|
||||
"updatedAt": _timestamp(),
|
||||
"status": "open",
|
||||
"documents": [],
|
||||
}
|
||||
self.manifest_path.write_text(json.dumps(manifest, indent=2))
|
||||
if not self.facts_path.exists():
|
||||
self.facts_path.write_text(json.dumps({"facts": {}}, indent=2))
|
||||
if not self.issues_path.exists():
|
||||
self.issues_path.write_text(json.dumps({"issues": []}, indent=2))
|
||||
return manifest
|
||||
|
||||
def load_manifest(self) -> dict[str, Any]:
|
||||
return json.loads(self.manifest_path.read_text())
|
||||
|
||||
def _load_facts(self) -> dict[str, Any]:
|
||||
return json.loads(self.facts_path.read_text())
|
||||
|
||||
def _write_manifest(self, manifest: dict[str, Any]) -> None:
|
||||
manifest["updatedAt"] = _timestamp()
|
||||
self.manifest_path.write_text(json.dumps(manifest, indent=2))
|
||||
|
||||
def _write_facts(self, facts: dict[str, Any]) -> None:
|
||||
self.facts_path.write_text(json.dumps(facts, indent=2))
|
||||
|
||||
def _write_issue(self, issue: dict[str, Any]) -> None:
|
||||
current = json.loads(self.issues_path.read_text())
|
||||
current["issues"].append(issue)
|
||||
self.issues_path.write_text(json.dumps(current, indent=2))
|
||||
|
||||
def intake(
|
||||
self,
|
||||
*,
|
||||
tax_year: int,
|
||||
user_facts: dict[str, Any],
|
||||
document_paths: list[Path],
|
||||
) -> dict[str, Any]:
|
||||
manifest = self.load_manifest()
|
||||
if manifest["taxYear"] != tax_year:
|
||||
raise ValueError(
|
||||
f"Case tax year {manifest['taxYear']} does not match requested tax year {tax_year}."
|
||||
)
|
||||
|
||||
registered_documents = []
|
||||
for source_path in document_paths:
|
||||
source_path = source_path.expanduser().resolve()
|
||||
destination = self.case_dir / "input" / source_path.name
|
||||
shutil.copy2(source_path, destination)
|
||||
document_entry = {
|
||||
"name": source_path.name,
|
||||
"sourcePath": str(source_path),
|
||||
"storedPath": str(destination),
|
||||
"sha256": _sha256_path(destination),
|
||||
"registeredAt": _timestamp(),
|
||||
}
|
||||
manifest["documents"].append(document_entry)
|
||||
registered_documents.append(document_entry)
|
||||
|
||||
facts_payload = self._load_facts()
|
||||
for field, value in user_facts.items():
|
||||
existing = facts_payload["facts"].get(field)
|
||||
if existing and existing["value"] != value:
|
||||
issue = {
|
||||
"status": "needs_resolution",
|
||||
"issueType": "fact_conflict",
|
||||
"field": field,
|
||||
"existingValue": existing["value"],
|
||||
"newValue": value,
|
||||
"message": f"Conflicting values for {field}. Resolve before continuing.",
|
||||
"createdAt": _timestamp(),
|
||||
"taxYear": tax_year,
|
||||
}
|
||||
self._write_issue(issue)
|
||||
raise CaseConflictError(issue)
|
||||
|
||||
facts_payload["facts"][field] = {
|
||||
"value": value,
|
||||
"sourceType": "user_statement",
|
||||
"capturedAt": _timestamp(),
|
||||
}
|
||||
|
||||
self._write_manifest(manifest)
|
||||
self._write_facts(facts_payload)
|
||||
return {
|
||||
"status": "accepted",
|
||||
"caseDir": str(self.case_dir),
|
||||
"taxYear": tax_year,
|
||||
"registeredDocuments": registered_documents,
|
||||
"factCount": len(facts_payload["facts"]),
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.cases import CaseConflictError, CaseManager
|
||||
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
|
||||
|
||||
COMMANDS = (
|
||||
@@ -47,6 +48,12 @@ def _require_case_dir(args: argparse.Namespace) -> Path:
|
||||
return Path(args.case_dir).expanduser().resolve()
|
||||
|
||||
|
||||
def _load_json_file(path_value: str | None) -> dict[str, Any]:
|
||||
if not path_value:
|
||||
return {}
|
||||
return json.loads(Path(path_value).expanduser().resolve().read_text())
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="us-cpa",
|
||||
@@ -74,6 +81,10 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
"extract-docs", help="Extract facts from case documents."
|
||||
)
|
||||
_add_common_arguments(extract_docs)
|
||||
extract_docs.add_argument("--create-case", action="store_true")
|
||||
extract_docs.add_argument("--case-label")
|
||||
extract_docs.add_argument("--facts-json")
|
||||
extract_docs.add_argument("--input-file", action="append", default=[])
|
||||
|
||||
render_forms = subparsers.add_parser(
|
||||
"render-forms", help="Render compiled IRS forms."
|
||||
@@ -103,7 +114,35 @@ def main(argv: list[str] | None = None) -> int:
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command in {"prepare", "review", "extract-docs", "render-forms", "export-efile-ready"}:
|
||||
if args.command == "extract-docs":
|
||||
case_dir = _require_case_dir(args)
|
||||
manager = CaseManager(case_dir)
|
||||
if args.create_case:
|
||||
if not args.case_label:
|
||||
raise SystemExit("--case-label is required when --create-case is used.")
|
||||
manager.create_case(case_label=args.case_label, tax_year=args.tax_year)
|
||||
elif not manager.manifest_path.exists():
|
||||
raise SystemExit("Case manifest not found. Use --create-case for a new case.")
|
||||
|
||||
try:
|
||||
result = manager.intake(
|
||||
tax_year=args.tax_year,
|
||||
user_facts=_load_json_file(args.facts_json),
|
||||
document_paths=[
|
||||
Path(path_value).expanduser().resolve() for path_value in args.input_file
|
||||
],
|
||||
)
|
||||
except CaseConflictError as exc:
|
||||
print(json.dumps(exc.issue, indent=2))
|
||||
return 1
|
||||
payload = {
|
||||
"command": args.command,
|
||||
"format": args.format,
|
||||
**result,
|
||||
}
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command in {"prepare", "review", "render-forms", "export-efile-ready"}:
|
||||
case_dir = _require_case_dir(args)
|
||||
payload = {
|
||||
"command": args.command,
|
||||
|
||||
80
skills/us-cpa/tests/test_cases.py
Normal file
80
skills/us-cpa/tests/test_cases.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from us_cpa.cases import CaseConflictError, CaseManager
|
||||
|
||||
|
||||
class CaseManagerTests(unittest.TestCase):
|
||||
def test_create_case_builds_expected_layout(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
manager = CaseManager(case_dir)
|
||||
|
||||
manifest = manager.create_case(case_label="Jane Doe", tax_year=2025)
|
||||
|
||||
self.assertEqual(manifest["caseLabel"], "Jane Doe")
|
||||
self.assertEqual(manifest["taxYear"], 2025)
|
||||
for name in (
|
||||
"input",
|
||||
"extracted",
|
||||
"return",
|
||||
"output",
|
||||
"reports",
|
||||
"issues",
|
||||
"sources",
|
||||
):
|
||||
self.assertTrue((case_dir / name).is_dir())
|
||||
self.assertTrue((case_dir / "case-manifest.json").exists())
|
||||
|
||||
def test_intake_registers_documents_and_user_facts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
case_dir = root / "2025-jane-doe"
|
||||
document = root / "w2.txt"
|
||||
document.write_text("sample w2")
|
||||
manager = CaseManager(case_dir)
|
||||
manager.create_case(case_label="Jane Doe", tax_year=2025)
|
||||
|
||||
result = manager.intake(
|
||||
tax_year=2025,
|
||||
user_facts={"filingStatus": "single", "taxpayer.ssnLast4": "1234"},
|
||||
document_paths=[document],
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "accepted")
|
||||
self.assertEqual(len(result["registeredDocuments"]), 1)
|
||||
self.assertTrue((case_dir / "input" / "w2.txt").exists())
|
||||
facts = json.loads((case_dir / "extracted" / "facts.json").read_text())
|
||||
self.assertEqual(facts["facts"]["filingStatus"]["value"], "single")
|
||||
|
||||
def test_conflicting_facts_raise_structured_issue(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
manager = CaseManager(case_dir)
|
||||
manager.create_case(case_label="Jane Doe", tax_year=2025)
|
||||
manager.intake(
|
||||
tax_year=2025,
|
||||
user_facts={"filingStatus": "single"},
|
||||
document_paths=[],
|
||||
)
|
||||
|
||||
with self.assertRaises(CaseConflictError) as context:
|
||||
manager.intake(
|
||||
tax_year=2025,
|
||||
user_facts={"filingStatus": "married_filing_jointly"},
|
||||
document_paths=[],
|
||||
)
|
||||
|
||||
issue = context.exception.issue
|
||||
self.assertEqual(issue["status"], "needs_resolution")
|
||||
self.assertEqual(issue["issueType"], "fact_conflict")
|
||||
self.assertEqual(issue["field"], "filingStatus")
|
||||
self.assertTrue((case_dir / "issues" / "open-issues.json").exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -4,6 +4,7 @@ import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
@@ -66,6 +67,67 @@ class UsCpaCliSmokeTests(unittest.TestCase):
|
||||
self.assertNotEqual(result.returncode, 0)
|
||||
self.assertIn("case directory", result.stderr.lower())
|
||||
|
||||
def test_extract_docs_can_create_case_and_register_facts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
facts_path = Path(temp_dir) / "facts.json"
|
||||
facts_path.write_text(json.dumps({"filingStatus": "single"}))
|
||||
|
||||
result = self.run_cli(
|
||||
"extract-docs",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--case-dir",
|
||||
str(case_dir),
|
||||
"--create-case",
|
||||
"--case-label",
|
||||
"Jane Doe",
|
||||
"--facts-json",
|
||||
str(facts_path),
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
payload = json.loads(result.stdout)
|
||||
self.assertEqual(payload["status"], "accepted")
|
||||
self.assertEqual(payload["factCount"], 1)
|
||||
self.assertTrue((case_dir / "case-manifest.json").exists())
|
||||
|
||||
def test_extract_docs_stops_on_conflicts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
first_facts = Path(temp_dir) / "facts-1.json"
|
||||
second_facts = Path(temp_dir) / "facts-2.json"
|
||||
first_facts.write_text(json.dumps({"filingStatus": "single"}))
|
||||
second_facts.write_text(json.dumps({"filingStatus": "married_filing_jointly"}))
|
||||
|
||||
first = self.run_cli(
|
||||
"extract-docs",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--case-dir",
|
||||
str(case_dir),
|
||||
"--create-case",
|
||||
"--case-label",
|
||||
"Jane Doe",
|
||||
"--facts-json",
|
||||
str(first_facts),
|
||||
)
|
||||
self.assertEqual(first.returncode, 0, first.stderr)
|
||||
|
||||
second = self.run_cli(
|
||||
"extract-docs",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--case-dir",
|
||||
str(case_dir),
|
||||
"--facts-json",
|
||||
str(second_facts),
|
||||
)
|
||||
self.assertNotEqual(second.returncode, 0)
|
||||
payload = json.loads(second.stdout)
|
||||
self.assertEqual(payload["status"], "needs_resolution")
|
||||
self.assertEqual(payload["issueType"], "fact_conflict")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user