fix: expand us-cpa extraction review and rendering
This commit is contained in:
3
skills/us-cpa/tests/fixtures/documents/interest-1099.txt
vendored
Normal file
3
skills/us-cpa/tests/fixtures/documents/interest-1099.txt
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
Form 1099-INT
|
||||
Recipient: Jane Doe
|
||||
Box 1 Interest Income 1750
|
||||
4
skills/us-cpa/tests/fixtures/documents/simple-w2.txt
vendored
Normal file
4
skills/us-cpa/tests/fixtures/documents/simple-w2.txt
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
Form W-2 Wage and Tax Statement
|
||||
Employee: Jane Doe
|
||||
Box 1 Wages, tips, other compensation 50000
|
||||
Box 2 Federal income tax withheld 6000
|
||||
16
skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json
vendored
Normal file
16
skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"taxYear": 2025,
|
||||
"filingStatus": "single",
|
||||
"requiredForms": ["f1040", "f1040sb"],
|
||||
"income": {
|
||||
"wages": 50000.0,
|
||||
"taxableInterest": 1750.0,
|
||||
"businessIncome": 0.0,
|
||||
"capitalGainLoss": 0.0,
|
||||
"rentalIncome": 0.0
|
||||
},
|
||||
"totals": {
|
||||
"adjustedGrossIncome": 51750.0,
|
||||
"taxableIncome": 36000.0
|
||||
}
|
||||
}
|
||||
@@ -51,6 +51,39 @@ class CaseManagerTests(unittest.TestCase):
|
||||
facts = json.loads((case_dir / "extracted" / "facts.json").read_text())
|
||||
self.assertEqual(facts["facts"]["filingStatus"]["value"], "single")
|
||||
|
||||
def test_intake_extracts_machine_usable_facts_from_text_documents(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
case_dir = root / "2025-jane-doe"
|
||||
w2 = root / "w2.txt"
|
||||
w2.write_text(
|
||||
"Form W-2 Wage and Tax Statement\n"
|
||||
"Employee: Jane Doe\n"
|
||||
"Box 1 Wages, tips, other compensation 50000\n"
|
||||
"Box 2 Federal income tax withheld 6000\n"
|
||||
)
|
||||
interest = root / "1099-int.txt"
|
||||
interest.write_text(
|
||||
"Form 1099-INT\n"
|
||||
"Recipient: Jane Doe\n"
|
||||
"Box 1 Interest Income 1750\n"
|
||||
)
|
||||
manager = CaseManager(case_dir)
|
||||
manager.create_case(case_label="Jane Doe", tax_year=2025)
|
||||
|
||||
result = manager.intake(
|
||||
tax_year=2025,
|
||||
user_facts={"filingStatus": "single"},
|
||||
document_paths=[w2, interest],
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "accepted")
|
||||
facts = json.loads((case_dir / "extracted" / "facts.json").read_text())
|
||||
self.assertEqual(facts["facts"]["wages"]["value"], 50000.0)
|
||||
self.assertEqual(facts["facts"]["federalWithholding"]["value"], 6000.0)
|
||||
self.assertEqual(facts["facts"]["taxableInterest"]["value"], 1750.0)
|
||||
self.assertEqual(facts["facts"]["wages"]["sources"][0]["sourceType"], "document_extract")
|
||||
|
||||
def test_conflicting_facts_raise_structured_issue(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "2025-jane-doe"
|
||||
|
||||
@@ -51,6 +51,36 @@ class QuestionEngineTests(unittest.TestCase):
|
||||
self.assertEqual(analysis["riskLevel"], "high")
|
||||
self.assertTrue(analysis["primaryLawRequired"])
|
||||
self.assertIn("Internal Revenue Code", analysis["missingFacts"][0])
|
||||
self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in analysis["authorities"]))
|
||||
|
||||
def test_capital_gains_question_returns_schedule_d_guidance(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
engine = self.build_engine(temp_dir)
|
||||
|
||||
analysis = engine.answer(
|
||||
question="Do I need Schedule D for capital gains?",
|
||||
tax_year=2025,
|
||||
case_facts={"capitalGainLoss": 400},
|
||||
)
|
||||
|
||||
self.assertEqual(analysis["issue"], "schedule_d_required")
|
||||
self.assertEqual(analysis["confidence"], "medium")
|
||||
self.assertFalse(analysis["primaryLawRequired"])
|
||||
self.assertTrue(any(item["slug"] == "f1040sd" for item in analysis["authorities"]))
|
||||
|
||||
def test_schedule_e_question_returns_rental_guidance(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
engine = self.build_engine(temp_dir)
|
||||
|
||||
analysis = engine.answer(
|
||||
question="Do I need Schedule E for rental income?",
|
||||
tax_year=2025,
|
||||
case_facts={"rentalIncome": 1200},
|
||||
)
|
||||
|
||||
self.assertEqual(analysis["issue"], "schedule_e_required")
|
||||
self.assertFalse(analysis["primaryLawRequired"])
|
||||
self.assertTrue(any(item["slug"] == "f1040se" for item in analysis["authorities"]))
|
||||
|
||||
def test_renderers_produce_conversation_and_memo(self) -> None:
|
||||
analysis = {
|
||||
|
||||
@@ -13,6 +13,47 @@ from us_cpa.sources import TaxYearCorpus
|
||||
|
||||
|
||||
class RendererTests(unittest.TestCase):
|
||||
def test_render_case_forms_prefers_fillable_pdf_fields_when_available(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "case"
|
||||
(case_dir / "output").mkdir(parents=True)
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
|
||||
irs_dir = corpus.paths_for_year(2025).irs_dir
|
||||
irs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
buffer = BytesIO()
|
||||
pdf = canvas.Canvas(buffer)
|
||||
form = pdf.acroForm
|
||||
pdf.drawString(72, 720, "Name")
|
||||
form.textfield(name="taxpayer_full_name", x=120, y=710, width=200, height=20)
|
||||
pdf.drawString(72, 680, "Wages")
|
||||
form.textfield(name="wages", x=120, y=670, width=200, height=20)
|
||||
pdf.save()
|
||||
(irs_dir / "f1040.pdf").write_bytes(buffer.getvalue())
|
||||
|
||||
normalized = {
|
||||
"taxYear": 2025,
|
||||
"requiredForms": ["f1040"],
|
||||
"taxpayer": {"fullName": "Jane Doe"},
|
||||
"filingStatus": "single",
|
||||
"income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0},
|
||||
"deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0},
|
||||
"adjustments": {"hsaContribution": 0.0},
|
||||
"credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0},
|
||||
"taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0},
|
||||
"payments": {"federalWithholding": 6000.0},
|
||||
"business": {"qualifiedBusinessIncome": 0.0},
|
||||
"basis": {"traditionalIraBasis": 0.0},
|
||||
"depreciation": {"depreciationExpense": 0.0},
|
||||
"assetSales": {"section1231GainLoss": 0.0},
|
||||
"totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0},
|
||||
}
|
||||
|
||||
artifacts = render_case_forms(case_dir, corpus, normalized)
|
||||
|
||||
self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "field_fill")
|
||||
self.assertFalse(artifacts["artifacts"][0]["reviewRequired"])
|
||||
|
||||
def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "case"
|
||||
@@ -32,10 +73,16 @@ class RendererTests(unittest.TestCase):
|
||||
"requiredForms": ["f1040"],
|
||||
"taxpayer": {"fullName": "Jane Doe"},
|
||||
"filingStatus": "single",
|
||||
"income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0},
|
||||
"deductions": {"standardDeduction": 15750.0},
|
||||
"taxes": {"totalTax": 3883.5},
|
||||
"income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0},
|
||||
"deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0},
|
||||
"adjustments": {"hsaContribution": 0.0},
|
||||
"credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0},
|
||||
"taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0},
|
||||
"payments": {"federalWithholding": 6000.0},
|
||||
"business": {"qualifiedBusinessIncome": 0.0},
|
||||
"basis": {"traditionalIraBasis": 0.0},
|
||||
"depreciation": {"depreciationExpense": 0.0},
|
||||
"assetSales": {"section1231GainLoss": 0.0},
|
||||
"totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0},
|
||||
}
|
||||
|
||||
|
||||
@@ -37,11 +37,11 @@ class ReturnModelTests(unittest.TestCase):
|
||||
|
||||
self.assertEqual(
|
||||
resolve_required_forms(normalized),
|
||||
["f1040", "f1040sb", "f1040sc", "f1040se", "f1040s1"],
|
||||
["f1040", "f1040sb", "f1040sc", "f1040sse", "f1040s1", "f8995"],
|
||||
)
|
||||
|
||||
def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None:
|
||||
self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5)
|
||||
self.assertEqual(tax_on_ordinary_income(34350.0, "single", 2025), 3883.5)
|
||||
|
||||
def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None:
|
||||
self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0)
|
||||
@@ -50,6 +50,53 @@ class ReturnModelTests(unittest.TestCase):
|
||||
with self.assertRaisesRegex(ValueError, "Unsupported tax year"):
|
||||
normalize_case_facts({"filingStatus": "single"}, 2023)
|
||||
|
||||
def test_normalize_case_facts_preserves_provenance_and_expands_form_resolution(self) -> None:
|
||||
normalized = normalize_case_facts(
|
||||
{
|
||||
"taxpayer.fullName": "Jane Doe",
|
||||
"spouse.fullName": "John Doe",
|
||||
"dependents": [{"fullName": "Kid Doe", "ssnLast4": "4321"}],
|
||||
"filingStatus": "married_filing_jointly",
|
||||
"wages": 50000,
|
||||
"taxableInterest": 2001,
|
||||
"capitalGainLoss": 400,
|
||||
"rentalIncome": 1200,
|
||||
"itemizedDeductions": 40000,
|
||||
"hsaContribution": 1000,
|
||||
"educationCredit": 500,
|
||||
"foreignTaxCredit": 250,
|
||||
"qualifiedBusinessIncome": 12000,
|
||||
"traditionalIraBasis": 6000,
|
||||
"additionalMedicareTax": 100,
|
||||
"netInvestmentIncomeTax": 200,
|
||||
"alternativeMinimumTax": 300,
|
||||
"additionalTaxPenalty": 50,
|
||||
"energyCredit": 600,
|
||||
"_factMetadata": {
|
||||
"wages": {"sources": [{"sourceType": "document_extract", "documentName": "w2.txt"}]},
|
||||
},
|
||||
},
|
||||
2025,
|
||||
)
|
||||
|
||||
self.assertEqual(normalized["spouse"]["fullName"], "John Doe")
|
||||
self.assertEqual(normalized["dependents"][0]["fullName"], "Kid Doe")
|
||||
self.assertEqual(normalized["provenance"]["income.wages"]["sources"][0]["documentName"], "w2.txt")
|
||||
self.assertIn("f1040sa", normalized["requiredForms"])
|
||||
self.assertIn("f1040sd", normalized["requiredForms"])
|
||||
self.assertIn("f8949", normalized["requiredForms"])
|
||||
self.assertIn("f1040se", normalized["requiredForms"])
|
||||
self.assertIn("f8889", normalized["requiredForms"])
|
||||
self.assertIn("f8863", normalized["requiredForms"])
|
||||
self.assertIn("f1116", normalized["requiredForms"])
|
||||
self.assertIn("f8995", normalized["requiredForms"])
|
||||
self.assertIn("f8606", normalized["requiredForms"])
|
||||
self.assertIn("f8959", normalized["requiredForms"])
|
||||
self.assertIn("f8960", normalized["requiredForms"])
|
||||
self.assertIn("f6251", normalized["requiredForms"])
|
||||
self.assertIn("f5329", normalized["requiredForms"])
|
||||
self.assertIn("f5695", normalized["requiredForms"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -64,6 +64,44 @@ class ReviewEngineTests(unittest.TestCase):
|
||||
self.assertIn("adjusted gross income", review["findings"][0]["title"].lower())
|
||||
self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"]))
|
||||
|
||||
def test_review_detects_reporting_omissions_from_source_facts(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir, corpus = self.build_prepared_case(temp_dir)
|
||||
normalized_path = case_dir / "return" / "normalized-return.json"
|
||||
normalized = json.loads(normalized_path.read_text())
|
||||
normalized["income"]["taxableInterest"] = 0.0
|
||||
normalized["totals"]["adjustedGrossIncome"] = 50000.0
|
||||
normalized_path.write_text(json.dumps(normalized, indent=2))
|
||||
|
||||
facts_path = case_dir / "extracted" / "facts.json"
|
||||
facts_payload = json.loads(facts_path.read_text())
|
||||
facts_payload["facts"]["taxableInterest"] = {
|
||||
"value": 1750.0,
|
||||
"sources": [{"sourceType": "document_extract", "sourceName": "1099-int.txt"}],
|
||||
}
|
||||
facts_path.write_text(json.dumps(facts_payload, indent=2))
|
||||
|
||||
review = ReviewEngine(corpus=corpus).review_case(case_dir)
|
||||
|
||||
self.assertTrue(
|
||||
any("likely omitted taxable interest" in item["title"].lower() for item in review["findings"])
|
||||
)
|
||||
|
||||
def test_review_flags_high_complexity_positions_for_specialist_follow_up(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir, corpus = self.build_prepared_case(temp_dir)
|
||||
normalized_path = case_dir / "return" / "normalized-return.json"
|
||||
normalized = json.loads(normalized_path.read_text())
|
||||
normalized["requiredForms"].append("f6251")
|
||||
normalized["taxes"]["alternativeMinimumTax"] = 300.0
|
||||
normalized_path.write_text(json.dumps(normalized, indent=2))
|
||||
|
||||
review = ReviewEngine(corpus=corpus).review_case(case_dir)
|
||||
|
||||
self.assertTrue(
|
||||
any("high-complexity tax position" in item["title"].lower() for item in review["findings"])
|
||||
)
|
||||
|
||||
def test_review_renderers_produce_summary_and_memo(self) -> None:
|
||||
review = {
|
||||
"status": "reviewed",
|
||||
|
||||
@@ -12,6 +12,7 @@ from us_cpa.sources import (
|
||||
authority_rank_for,
|
||||
bootstrap_irs_catalog,
|
||||
build_irs_prior_pdf_url,
|
||||
build_primary_law_authorities,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,6 +43,17 @@ class SourceCatalogTests(unittest.TestCase):
|
||||
self.assertGreaterEqual(len(catalog), 5)
|
||||
self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf")
|
||||
self.assertTrue(any(item.slug == "i1040gi" for item in catalog))
|
||||
self.assertTrue(any(item.slug == "f1040sse" for item in catalog))
|
||||
|
||||
def test_primary_law_authorities_build_official_urls(self) -> None:
|
||||
authorities = build_primary_law_authorities(
|
||||
"Does section 469 apply and what does Treas. Reg. 1.469-1 say?"
|
||||
)
|
||||
|
||||
self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities))
|
||||
self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities))
|
||||
self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities))
|
||||
self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities))
|
||||
|
||||
|
||||
class TaxYearCorpusTests(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user