fix: expand us-cpa extraction review and rendering

This commit is contained in:
Stefano Fiorini
2026-03-15 03:01:16 -05:00
parent 6c02e0b7c6
commit fb39fe76cb
19 changed files with 693 additions and 56 deletions

View File

@@ -8,6 +8,8 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from us_cpa.document_extractors import extract_document_facts
CASE_SUBDIRECTORIES = (
"input",
@@ -95,6 +97,48 @@ class CaseManager:
current["issues"].append(issue)
self.issues_path.write_text(json.dumps(current, indent=2))
def _record_fact(
self,
facts_payload: dict[str, Any],
*,
field: str,
value: Any,
source_type: str,
source_name: str,
tax_year: int,
) -> None:
existing = facts_payload["facts"].get(field)
if existing and existing["value"] != value:
issue = {
"status": "needs_resolution",
"issueType": "fact_conflict",
"field": field,
"existingValue": existing["value"],
"newValue": value,
"message": f"Conflicting values for {field}. Resolve before continuing.",
"createdAt": _timestamp(),
"taxYear": tax_year,
}
self._write_issue(issue)
raise CaseConflictError(issue)
captured_at = _timestamp()
source_entry = {
"sourceType": source_type,
"sourceName": source_name,
"capturedAt": captured_at,
}
if existing:
existing["sources"].append(source_entry)
return
facts_payload["facts"][field] = {
"value": value,
"sourceType": source_type,
"capturedAt": captured_at,
"sources": [source_entry],
}
def intake(
self,
*,
@@ -124,27 +168,28 @@ class CaseManager:
registered_documents.append(document_entry)
facts_payload = self._load_facts()
for field, value in user_facts.items():
existing = facts_payload["facts"].get(field)
if existing and existing["value"] != value:
issue = {
"status": "needs_resolution",
"issueType": "fact_conflict",
"field": field,
"existingValue": existing["value"],
"newValue": value,
"message": f"Conflicting values for {field}. Resolve before continuing.",
"createdAt": _timestamp(),
"taxYear": tax_year,
}
self._write_issue(issue)
raise CaseConflictError(issue)
for document_entry in registered_documents:
extracted = extract_document_facts(Path(document_entry["storedPath"]))
document_entry["extractedFacts"] = extracted
for field, value in extracted.items():
self._record_fact(
facts_payload,
field=field,
value=value,
source_type="document_extract",
source_name=document_entry["name"],
tax_year=tax_year,
)
facts_payload["facts"][field] = {
"value": value,
"sourceType": "user_statement",
"capturedAt": _timestamp(),
}
for field, value in user_facts.items():
self._record_fact(
facts_payload,
field=field,
value=value,
source_type="user_statement",
source_name="interactive-intake",
tax_year=tax_year,
)
self._write_manifest(manifest)
self._write_facts(facts_payload)