test: add us-cpa module coverage and citations
This commit is contained in:
@@ -73,6 +73,8 @@ Current bundled tax-year computation data:
|
|||||||
|
|
||||||
Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added.
|
Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added.
|
||||||
|
|
||||||
|
Adding a new supported year is a deliberate data-table change in `tax_years.py`, not an automatic runtime discovery step. That is intentional for tax-engine correctness.
|
||||||
|
|
||||||
## Interaction Model
|
## Interaction Model
|
||||||
|
|
||||||
- `question`
|
- `question`
|
||||||
@@ -173,6 +175,8 @@ Current review rule:
|
|||||||
- field-filled artifacts are not automatically flagged for review
|
- field-filled artifacts are not automatically flagged for review
|
||||||
- overlay-rendered artifacts are marked `reviewRequired: true`
|
- overlay-rendered artifacts are marked `reviewRequired: true`
|
||||||
|
|
||||||
|
Overlay coordinates are currently a fallback heuristic and are not treated as line-perfect authoritative field maps. Overlay output must be visually reviewed before any filing/export handoff.
|
||||||
|
|
||||||
## Preparation Workflow
|
## Preparation Workflow
|
||||||
|
|
||||||
Current `prepare` implementation:
|
Current `prepare` implementation:
|
||||||
|
|||||||
@@ -36,10 +36,30 @@ def _facts_from_text(text: str) -> dict[str, Any]:
|
|||||||
extracted["wages"] = _parse_number(match.group(1))
|
extracted["wages"] = _parse_number(match.group(1))
|
||||||
if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I):
|
if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I):
|
||||||
extracted["federalWithholding"] = _parse_number(match.group(1))
|
extracted["federalWithholding"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 16 State wages, tips, etc\.\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["stateWages"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 17 State income tax\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["stateWithholding"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 3 Social security wages\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["socialSecurityWages"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 5 Medicare wages and tips\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["medicareWages"] = _parse_number(match.group(1))
|
||||||
if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I):
|
if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I):
|
||||||
extracted["taxableInterest"] = _parse_number(match.group(1))
|
extracted["taxableInterest"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 1a Total ordinary dividends\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["ordinaryDividends"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 1 Gross distribution\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["retirementDistribution"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Box 3 Other income\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["otherIncome"] = _parse_number(match.group(1))
|
||||||
if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I):
|
if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I):
|
||||||
extracted["businessIncome"] = _parse_number(match.group(1))
|
extracted["businessIncome"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Adjusted gross income\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["priorYear.adjustedGrossIncome"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Taxable income\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["priorYear.taxableIncome"] = _parse_number(match.group(1))
|
||||||
|
if match := re.search(r"Refund\s+" + _NUMBER, text, re.I):
|
||||||
|
extracted["priorYear.refund"] = _parse_number(match.group(1))
|
||||||
|
|
||||||
return extracted
|
return extracted
|
||||||
|
|
||||||
|
|||||||
@@ -39,6 +39,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = {
|
|||||||
(float("inf"), 0.37),
|
(float("inf"), 0.37),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"sourceCitations": {
|
||||||
|
"standardDeduction": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.",
|
||||||
|
"ordinaryIncomeBrackets": "IRS Rev. Proc. 2023-34, section 3.01; 2024 Form 1040 instructions.",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
2025: {
|
2025: {
|
||||||
"standardDeduction": {
|
"standardDeduction": {
|
||||||
@@ -75,6 +79,10 @@ TAX_YEAR_DATA: dict[int, dict[str, Any]] = {
|
|||||||
(float("inf"), 0.37),
|
(float("inf"), 0.37),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"sourceCitations": {
|
||||||
|
"standardDeduction": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.",
|
||||||
|
"ordinaryIncomeBrackets": "IRS Rev. Proc. 2024-40, section 3.01; 2025 Form 1040 instructions.",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
66
skills/us-cpa/tests/test_document_extractors.py
Normal file
66
skills/us-cpa/tests/test_document_extractors.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from us_cpa.document_extractors import extract_document_facts
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentExtractorTests(unittest.TestCase):
|
||||||
|
def test_extracts_common_w2_fields(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
path = Path(temp_dir) / "w2.txt"
|
||||||
|
path.write_text(
|
||||||
|
"Form W-2 Wage and Tax Statement\n"
|
||||||
|
"Employee: Jane Doe\n"
|
||||||
|
"Box 1 Wages, tips, other compensation 50000\n"
|
||||||
|
"Box 2 Federal income tax withheld 6000\n"
|
||||||
|
"Box 16 State wages, tips, etc. 50000\n"
|
||||||
|
"Box 17 State income tax 1200\n"
|
||||||
|
"Box 3 Social security wages 50000\n"
|
||||||
|
"Box 5 Medicare wages and tips 50000\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted = extract_document_facts(path)
|
||||||
|
|
||||||
|
self.assertEqual(extracted["taxpayer.fullName"], "Jane Doe")
|
||||||
|
self.assertEqual(extracted["wages"], 50000.0)
|
||||||
|
self.assertEqual(extracted["federalWithholding"], 6000.0)
|
||||||
|
self.assertEqual(extracted["stateWages"], 50000.0)
|
||||||
|
self.assertEqual(extracted["stateWithholding"], 1200.0)
|
||||||
|
self.assertEqual(extracted["socialSecurityWages"], 50000.0)
|
||||||
|
self.assertEqual(extracted["medicareWages"], 50000.0)
|
||||||
|
|
||||||
|
def test_extracts_common_1099_patterns(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
div_path = Path(temp_dir) / "1099-div.txt"
|
||||||
|
div_path.write_text("Form 1099-DIV\nRecipient: Jane Doe\nBox 1a Total ordinary dividends 250\n")
|
||||||
|
ret_path = Path(temp_dir) / "1099-r.txt"
|
||||||
|
ret_path.write_text("Form 1099-R\nRecipient: Jane Doe\nBox 1 Gross distribution 10000\n")
|
||||||
|
misc_path = Path(temp_dir) / "1099-misc.txt"
|
||||||
|
misc_path.write_text("Form 1099-MISC\nRecipient: Jane Doe\nBox 3 Other income 900\n")
|
||||||
|
|
||||||
|
self.assertEqual(extract_document_facts(div_path)["ordinaryDividends"], 250.0)
|
||||||
|
self.assertEqual(extract_document_facts(ret_path)["retirementDistribution"], 10000.0)
|
||||||
|
self.assertEqual(extract_document_facts(misc_path)["otherIncome"], 900.0)
|
||||||
|
|
||||||
|
def test_extracts_prior_year_return_summary_values(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
path = Path(temp_dir) / "prior-return.txt"
|
||||||
|
path.write_text(
|
||||||
|
"2024 Form 1040 Summary\n"
|
||||||
|
"Adjusted gross income 72100\n"
|
||||||
|
"Taxable income 49800\n"
|
||||||
|
"Refund 2100\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted = extract_document_facts(path)
|
||||||
|
|
||||||
|
self.assertEqual(extracted["priorYear.adjustedGrossIncome"], 72100.0)
|
||||||
|
self.assertEqual(extracted["priorYear.taxableIncome"], 49800.0)
|
||||||
|
self.assertEqual(extracted["priorYear.refund"], 2100.0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
25
skills/us-cpa/tests/test_tax_years.py
Normal file
25
skills/us-cpa/tests/test_tax_years.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from us_cpa.tax_years import supported_tax_years, tax_year_rules
|
||||||
|
|
||||||
|
|
||||||
|
class TaxYearRuleTests(unittest.TestCase):
|
||||||
|
def test_supported_years_are_listed(self) -> None:
|
||||||
|
self.assertEqual(supported_tax_years(), [2024, 2025])
|
||||||
|
|
||||||
|
def test_tax_year_rules_include_source_citations(self) -> None:
|
||||||
|
rules = tax_year_rules(2025)
|
||||||
|
|
||||||
|
self.assertIn("sourceCitations", rules)
|
||||||
|
self.assertIn("standardDeduction", rules["sourceCitations"])
|
||||||
|
self.assertIn("ordinaryIncomeBrackets", rules["sourceCitations"])
|
||||||
|
|
||||||
|
def test_unsupported_tax_year_raises_clear_error(self) -> None:
|
||||||
|
with self.assertRaisesRegex(ValueError, "Unsupported tax year 2023"):
|
||||||
|
tax_year_rules(2023)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user