From 1be03171928cf8ad857e9f400c97bdc28588cde2 Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:11:24 -0500 Subject: [PATCH] test: add us-cpa module coverage and citations --- docs/us-cpa.md | 4 ++ .../us-cpa/src/us_cpa/document_extractors.py | 20 ++++++ skills/us-cpa/src/us_cpa/tax_years.py | 8 +++ .../us-cpa/tests/test_document_extractors.py | 66 +++++++++++++++++++ skills/us-cpa/tests/test_tax_years.py | 25 +++++++ 5 files changed, 123 insertions(+) create mode 100644 skills/us-cpa/tests/test_document_extractors.py create mode 100644 skills/us-cpa/tests/test_tax_years.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index e93ada0..1a6cdcb 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -73,6 +73,8 @@ Current bundled tax-year computation data: Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added. +Adding a new supported year is a deliberate data-table change in `tax_years.py`, not an automatic runtime discovery step. That is intentional for tax-engine correctness. + ## Interaction Model - `question` @@ -173,6 +175,8 @@ Current review rule: - field-filled artifacts are not automatically flagged for review - overlay-rendered artifacts are marked `reviewRequired: true` +Overlay coordinates are currently a fallback heuristic and are not treated as line-perfect authoritative field maps. Overlay output must be visually reviewed before any filing/export handoff. + ## Preparation Workflow Current `prepare` implementation: diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py index e5034db..1afff11 100644 --- a/skills/us-cpa/src/us_cpa/document_extractors.py +++ b/skills/us-cpa/src/us_cpa/document_extractors.py @@ -36,10 +36,30 @@ def _facts_from_text(text: str) -> dict[str, Any]: extracted["wages"] = _parse_number(match.group(1)) if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I): extracted["federalWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 16 State wages, tips, etc\.\s+" + _NUMBER, text, re.I): + extracted["stateWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 17 State income tax\s+" + _NUMBER, text, re.I): + extracted["stateWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Social security wages\s+" + _NUMBER, text, re.I): + extracted["socialSecurityWages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 5 Medicare wages and tips\s+" + _NUMBER, text, re.I): + extracted["medicareWages"] = _parse_number(match.group(1)) if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I): extracted["taxableInterest"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1a Total ordinary dividends\s+" + _NUMBER, text, re.I): + extracted["ordinaryDividends"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Gross distribution\s+" + _NUMBER, text, re.I): + extracted["retirementDistribution"] = _parse_number(match.group(1)) + if match := re.search(r"Box 3 Other income\s+" + _NUMBER, text, re.I): + extracted["otherIncome"] = _parse_number(match.group(1)) if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I): extracted["businessIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Adjusted gross income\s+" + _NUMBER, text, re.I): + extracted["priorYear.adjustedGrossIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Taxable income\s+" + _NUMBER, text, re.I): + extracted["priorYear.taxableIncome"] = _parse_number(match.group(1)) + if match := re.search(r"Refund\s+" + _NUMBER, text, re.I): + extracted["priorYear.refund"] = _parse_number(match.group(1)) return extracted diff --git a/skills/us-cpa/src/us_cpa/tax_years.py b/skills/us-cpa/src/us_cpa/tax_years.py index aef9b4f..d5ba36f 100644 --- a/skills/us-cpa/src/us_cpa/tax_years.py +++ b/skills/us-cpa/src/us_cpa/tax_years.py @@ -39,6 +39,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = { (float("inf"), 0.37), ], }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.", + }, }, 2025: { "standardDeduction": { @@ -75,6 +79,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = { (float("inf"), 0.37), ], }, + "sourceCitations": { + "standardDeduction": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + "ordinaryIncomeBrackets": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.", + }, }, } diff --git a/skills/us-cpa/tests/test_document_extractors.py b/skills/us-cpa/tests/test_document_extractors.py new file mode 100644 index 0000000..54f1000 --- /dev/null +++ b/skills/us-cpa/tests/test_document_extractors.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from us_cpa.document_extractors import extract_document_facts + + +class DocumentExtractorTests(unittest.TestCase): + def test_extracts_common_w2_fields(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "w2.txt" + path.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + "Box 16 State wages, tips, etc. 50000\n" + "Box 17 State income tax 1200\n" + "Box 3 Social security wages 50000\n" + "Box 5 Medicare wages and tips 50000\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["taxpayer.fullName"], "Jane Doe") + self.assertEqual(extracted["wages"], 50000.0) + self.assertEqual(extracted["federalWithholding"], 6000.0) + self.assertEqual(extracted["stateWages"], 50000.0) + self.assertEqual(extracted["stateWithholding"], 1200.0) + self.assertEqual(extracted["socialSecurityWages"], 50000.0) + self.assertEqual(extracted["medicareWages"], 50000.0) + + def test_extracts_common_1099_patterns(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + div_path = Path(temp_dir) / "1099-div.txt" + div_path.write_text("Form 1099-DIV\nRecipient: Jane Doe\nBox 1a Total ordinary dividends 250\n") + ret_path = Path(temp_dir) / "1099-r.txt" + ret_path.write_text("Form 1099-R\nRecipient: Jane Doe\nBox 1 Gross distribution 10000\n") + misc_path = Path(temp_dir) / "1099-misc.txt" + misc_path.write_text("Form 1099-MISC\nRecipient: Jane Doe\nBox 3 Other income 900\n") + + self.assertEqual(extract_document_facts(div_path)["ordinaryDividends"], 250.0) + self.assertEqual(extract_document_facts(ret_path)["retirementDistribution"], 10000.0) + self.assertEqual(extract_document_facts(misc_path)["otherIncome"], 900.0) + + def test_extracts_prior_year_return_summary_values(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + path = Path(temp_dir) / "prior-return.txt" + path.write_text( + "2024 Form 1040 Summary\n" + "Adjusted gross income 72100\n" + "Taxable income 49800\n" + "Refund 2100\n" + ) + + extracted = extract_document_facts(path) + + self.assertEqual(extracted["priorYear.adjustedGrossIncome"], 72100.0) + self.assertEqual(extracted["priorYear.taxableIncome"], 49800.0) + self.assertEqual(extracted["priorYear.refund"], 2100.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/skills/us-cpa/tests/test_tax_years.py b/skills/us-cpa/tests/test_tax_years.py new file mode 100644 index 0000000..5bc6621 --- /dev/null +++ b/skills/us-cpa/tests/test_tax_years.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import unittest + +from us_cpa.tax_years import supported_tax_years, tax_year_rules + + +class TaxYearRuleTests(unittest.TestCase): + def test_supported_years_are_listed(self) -> None: + self.assertEqual(supported_tax_years(), [2024, 2025]) + + def test_tax_year_rules_include_source_citations(self) -> None: + rules = tax_year_rules(2025) + + self.assertIn("sourceCitations", rules) + self.assertIn("standardDeduction", rules["sourceCitations"]) + self.assertIn("ordinaryIncomeBrackets", rules["sourceCitations"]) + + def test_unsupported_tax_year_raises_clear_error(self) -> None: + with self.assertRaisesRegex(ValueError, "Unsupported tax year 2023"): + tax_year_rules(2023) + + +if __name__ == "__main__": + unittest.main()