From fb39fe76cb8b8e8c0eb455dbba0c933acb81b82f Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 03:01:16 -0500 Subject: [PATCH] fix: expand us-cpa extraction review and rendering --- docs/us-cpa.md | 57 ++++++-- skills/us-cpa/SKILL.md | 12 +- skills/us-cpa/src/us_cpa/cases.py | 85 +++++++++--- .../us-cpa/src/us_cpa/document_extractors.py | 54 ++++++++ skills/us-cpa/src/us_cpa/prepare.py | 6 +- skills/us-cpa/src/us_cpa/questions.py | 20 ++- skills/us-cpa/src/us_cpa/renderers.py | 38 +++++- skills/us-cpa/src/us_cpa/returns.py | 122 +++++++++++++++++- skills/us-cpa/src/us_cpa/review.py | 51 ++++++++ skills/us-cpa/src/us_cpa/sources.py | 64 ++++++++- .../fixtures/documents/interest-1099.txt | 3 + .../tests/fixtures/documents/simple-w2.txt | 4 + .../simple-w2-interest-2025-normalized.json | 16 +++ skills/us-cpa/tests/test_cases.py | 33 +++++ skills/us-cpa/tests/test_questions.py | 30 +++++ skills/us-cpa/tests/test_renderers.py | 53 +++++++- skills/us-cpa/tests/test_returns.py | 51 +++++++- skills/us-cpa/tests/test_review.py | 38 ++++++ skills/us-cpa/tests/test_sources.py | 12 ++ 19 files changed, 693 insertions(+), 56 deletions(-) create mode 100644 skills/us-cpa/src/us_cpa/document_extractors.py create mode 100644 skills/us-cpa/tests/fixtures/documents/interest-1099.txt create mode 100644 skills/us-cpa/tests/fixtures/documents/simple-w2.txt create mode 100644 skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json diff --git a/docs/us-cpa.md b/docs/us-cpa.md index 66dc9eb..e93ada0 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -24,11 +24,12 @@ Current implementation now includes: - deterministic cache layout under `~/.cache/us-cpa` by default - `fetch-year` download flow for the bootstrap IRS corpus - source manifest with URL, hash, authority rank, and local path traceability -- authority ranking hooks for IRS materials and future primary-law escalation -- case-folder intake and conflict-stop handling +- primary-law URL building for IRC and Treasury regulation escalation +- case-folder intake, document registration, and machine-usable fact extraction from JSON, text, and PDF inputs - question workflow with conversation and memo output -- prepare workflow for the current supported 1040 subset +- prepare workflow for the current supported multi-form 1040 package - review workflow with findings-first output +- fillable-PDF first rendering with overlay fallback - e-file-ready draft export payload generation ## CLI Surface @@ -61,10 +62,17 @@ US_CPA_CACHE_DIR=/tmp/us-cpa-cache skills/us-cpa/scripts/us-cpa fetch-year --tax Current `fetch-year` bootstrap corpus for tax year `2025` is verified against live IRS `irs-prior` PDFs for: - Form 1040 -- Schedules 1, 2, 3, A, B, C, D, SE, and 8812 -- Form 8949 +- Schedules 1, 2, 3, A, B, C, D, E, SE, and 8812 +- Forms 8949, 4562, 4797, 6251, 8606, 8863, 8889, 8959, 8960, 8995, 8995-A, 5329, 5695, and 1116 - General Form 1040 instructions and selected schedule/form instructions +Current bundled tax-year computation data: + +- 2024 +- 2025 + +Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added. + ## Interaction Model - `question` @@ -109,7 +117,8 @@ Behavior: - creates the full case directory layout when `--create-case` is used - copies input documents into `input/` -- stores normalized user-statement facts in `extracted/facts.json` +- stores normalized facts with source metadata in `extracted/facts.json` +- extracts machine-usable facts from JSON/text/PDF documents where supported - appends document registry entries to `case-manifest.json` - stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact @@ -142,21 +151,26 @@ Current implemented topics: - standard deduction - Schedule C / sole proprietorship reporting trigger +- Schedule D / capital gains reporting trigger +- Schedule E / rental income reporting trigger ## Form Rendering Current rendering path: - official IRS PDFs from the cached tax-year corpus -- overlay rendering onto those official PDFs using `reportlab` + `pypdf` +- deterministic field-fill when usable AcroForm fields are present +- overlay rendering onto those official PDFs using `reportlab` + `pypdf` as fallback - artifact manifest written to `output/artifacts.json` Current rendered form support: -- Form 1040 overlay artifact generation +- field-fill support for known mapped fillable forms +- overlay generation for the current required-form set resolved by the return model Current review rule: +- field-filled artifacts are not automatically flagged for review - overlay-rendered artifacts are marked `reviewRequired: true` ## Preparation Workflow @@ -164,9 +178,10 @@ Current review rule: Current `prepare` implementation: - loads case facts from `extracted/facts.json` -- normalizes them into the current supported 2025 federal return model -- computes the current supported 1040 subset -- resolves required forms for the current supported subset +- normalizes them into the current supported federal return model +- preserves source provenance for normalized values +- computes the current supported 1040 package +- resolves required forms across the current supported subset - writes: - `return/normalized-return.json` - `output/artifacts.json` @@ -175,10 +190,27 @@ Current `prepare` implementation: Current supported calculation inputs: - `filingStatus` +- `spouse.fullName` +- `dependents` - `wages` - `taxableInterest` - `businessIncome` +- `capitalGainLoss` +- `rentalIncome` - `federalWithholding` +- `itemizedDeductions` +- `hsaContribution` +- `educationCredit` +- `foreignTaxCredit` +- `qualifiedBusinessIncome` +- `traditionalIraBasis` +- `additionalMedicareTax` +- `netInvestmentIncomeTax` +- `alternativeMinimumTax` +- `additionalTaxPenalty` +- `energyCredit` +- `depreciationExpense` +- `section1231GainLoss` ## E-file-ready Export @@ -200,7 +232,10 @@ Current `review` implementation: - recomputes the return from current case facts - compares stored normalized return values to recomputed values +- flags source-fact mismatches for key income fields +- flags likely omitted income when document-extracted facts support an amount the stored return omits - checks whether required rendered artifacts are present +- flags high-complexity forms for specialist follow-up - flags overlay-rendered artifacts as requiring human review - sorts findings by severity diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index a8d73f5..010a6e4 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -46,7 +46,8 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - IRS materials first; escalate to primary law only when needed - stop on conflicting facts and ask the user to resolve the issue before continuing - official IRS PDFs are the target compiled-form artifacts -- overlay-rendered forms must be flagged for human review +- deterministic field-fill is the preferred render path when the official PDF exposes usable fields +- overlay-rendered forms are the fallback and must be flagged for human review ## Output @@ -55,10 +56,11 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - `question` supports `--style conversation|memo` - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation -- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict -- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review -- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory +- `extract-docs` creates or opens a case, registers documents, stores facts, extracts machine-usable facts from JSON/text/PDF sources where possible, and stops with a structured issue if facts conflict +- `question` currently has explicit IRS-first answers for standard deduction, Schedule C, Schedule D, and Schedule E questions; other questions escalate to primary-law research with official IRC/regulation URLs +- rendered form artifacts prefer fillable-field output when possible and otherwise fall back to overlay output +- `prepare` computes the current supported federal 1040 package, preserves fact provenance in the normalized return, and writes normalized return/artifact/report files into the case directory - `export-efile-ready` writes a draft transmission-ready payload without transmitting anything -- `review` recomputes the return from case facts, checks artifacts, and returns findings-first output in conversation or memo style +- `review` recomputes the return from case facts, checks artifacts, flags source-fact mismatches and likely omissions, and returns findings-first output in conversation or memo style For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/cases.py b/skills/us-cpa/src/us_cpa/cases.py index 9f8827d..68cc90c 100644 --- a/skills/us-cpa/src/us_cpa/cases.py +++ b/skills/us-cpa/src/us_cpa/cases.py @@ -8,6 +8,8 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any +from us_cpa.document_extractors import extract_document_facts + CASE_SUBDIRECTORIES = ( "input", @@ -95,6 +97,48 @@ class CaseManager: current["issues"].append(issue) self.issues_path.write_text(json.dumps(current, indent=2)) + def _record_fact( + self, + facts_payload: dict[str, Any], + *, + field: str, + value: Any, + source_type: str, + source_name: str, + tax_year: int, + ) -> None: + existing = facts_payload["facts"].get(field) + if existing and existing["value"] != value: + issue = { + "status": "needs_resolution", + "issueType": "fact_conflict", + "field": field, + "existingValue": existing["value"], + "newValue": value, + "message": f"Conflicting values for {field}. Resolve before continuing.", + "createdAt": _timestamp(), + "taxYear": tax_year, + } + self._write_issue(issue) + raise CaseConflictError(issue) + + captured_at = _timestamp() + source_entry = { + "sourceType": source_type, + "sourceName": source_name, + "capturedAt": captured_at, + } + if existing: + existing["sources"].append(source_entry) + return + + facts_payload["facts"][field] = { + "value": value, + "sourceType": source_type, + "capturedAt": captured_at, + "sources": [source_entry], + } + def intake( self, *, @@ -124,27 +168,28 @@ class CaseManager: registered_documents.append(document_entry) facts_payload = self._load_facts() - for field, value in user_facts.items(): - existing = facts_payload["facts"].get(field) - if existing and existing["value"] != value: - issue = { - "status": "needs_resolution", - "issueType": "fact_conflict", - "field": field, - "existingValue": existing["value"], - "newValue": value, - "message": f"Conflicting values for {field}. Resolve before continuing.", - "createdAt": _timestamp(), - "taxYear": tax_year, - } - self._write_issue(issue) - raise CaseConflictError(issue) + for document_entry in registered_documents: + extracted = extract_document_facts(Path(document_entry["storedPath"])) + document_entry["extractedFacts"] = extracted + for field, value in extracted.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="document_extract", + source_name=document_entry["name"], + tax_year=tax_year, + ) - facts_payload["facts"][field] = { - "value": value, - "sourceType": "user_statement", - "capturedAt": _timestamp(), - } + for field, value in user_facts.items(): + self._record_fact( + facts_payload, + field=field, + value=value, + source_type="user_statement", + source_name="interactive-intake", + tax_year=tax_year, + ) self._write_manifest(manifest) self._write_facts(facts_payload) diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py new file mode 100644 index 0000000..e5034db --- /dev/null +++ b/skills/us-cpa/src/us_cpa/document_extractors.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path +from typing import Any + +from pypdf import PdfReader + + +_NUMBER = r"(-?\d+(?:,\d{3})*(?:\.\d+)?)" + + +def _parse_number(raw: str) -> float: + return float(raw.replace(",", "")) + + +def _extract_text(path: Path) -> str: + suffix = path.suffix.lower() + if suffix in {".txt", ".md"}: + return path.read_text() + if suffix == ".pdf": + reader = PdfReader(str(path)) + return "\n".join((page.extract_text() or "") for page in reader.pages) + return "" + + +def _facts_from_text(text: str) -> dict[str, Any]: + extracted: dict[str, Any] = {} + + if match := re.search(r"Employee:\s*(.+)", text): + extracted["taxpayer.fullName"] = match.group(1).strip() + if match := re.search(r"Recipient:\s*(.+)", text): + extracted.setdefault("taxpayer.fullName", match.group(1).strip()) + if match := re.search(r"Box 1 Wages, tips, other compensation\s+" + _NUMBER, text, re.I): + extracted["wages"] = _parse_number(match.group(1)) + if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I): + extracted["federalWithholding"] = _parse_number(match.group(1)) + if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I): + extracted["taxableInterest"] = _parse_number(match.group(1)) + if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I): + extracted["businessIncome"] = _parse_number(match.group(1)) + + return extracted + + +def extract_document_facts(path: Path) -> dict[str, Any]: + suffix = path.suffix.lower() + if suffix == ".json": + payload = json.loads(path.read_text()) + if isinstance(payload, dict): + return payload + return {} + return _facts_from_text(_extract_text(path)) diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py index 72ef8cd..1338b83 100644 --- a/skills/us-cpa/src/us_cpa/prepare.py +++ b/skills/us-cpa/src/us_cpa/prepare.py @@ -13,7 +13,11 @@ from us_cpa.sources import TaxYearCorpus def _load_case_facts(case_dir: Path) -> dict[str, Any]: facts_path = case_dir / "extracted" / "facts.json" payload = json.loads(facts_path.read_text()) - return {key: value["value"] for key, value in payload["facts"].items()} + facts = {key: value["value"] for key, value in payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in payload["facts"].items() + } + return facts diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py index 1ee9554..3ee1502 100644 --- a/skills/us-cpa/src/us_cpa/questions.py +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Any -from us_cpa.sources import TaxYearCorpus +from us_cpa.sources import TaxYearCorpus, build_primary_law_authorities TOPIC_RULES = [ @@ -29,6 +29,22 @@ TOPIC_RULES = [ "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.", "confidence": "medium", }, + { + "issue": "schedule_d_required", + "keywords": ("schedule d", "capital gains"), + "authority_slugs": ("f1040sd", "i1040sd", "f8949", "i8949"), + "answer": "Schedule D is generally required when a taxpayer reports capital gains or losses, often alongside Form 8949.", + "summary": "Capital gains and losses generally flow through Schedule D, with Form 8949 supporting detail when required.", + "confidence": "medium", + }, + { + "issue": "schedule_e_required", + "keywords": ("schedule e", "rental income"), + "authority_slugs": ("f1040se", "i1040se"), + "answer": "Schedule E is generally required when a taxpayer reports rental real-estate income or expenses.", + "summary": "Rental income and expenses generally belong on Schedule E.", + "confidence": "medium", + }, ] @@ -117,7 +133,7 @@ class QuestionEngine: "missingFacts": [ "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently." ], - "authorities": [], + "authorities": build_primary_law_authorities(question), "conclusion": { "answer": "Insufficient IRS-form and instruction support for a confident answer.", "summary": "This question needs primary-law analysis before a reliable answer can be given.", diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py index 440d8de..f41c16a 100644 --- a/skills/us-cpa/src/us_cpa/renderers.py +++ b/skills/us-cpa/src/us_cpa/renderers.py @@ -37,6 +37,32 @@ OVERLAY_FIELDS = { } +FIELD_FILL_VALUES = { + "f1040": lambda data: { + "taxpayer_full_name": data["taxpayer"]["fullName"], + "filing_status": data["filingStatus"], + "wages": f"{data['income']['wages']:.2f}", + "taxable_interest": f"{data['income']['taxableInterest']:.2f}", + } +} + + +def _field_fill_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> bool: + reader = PdfReader(str(template_path)) + fields = reader.get_fields() or {} + values = FIELD_FILL_VALUES.get(form_code, lambda _: {})(normalized) + matched = {key: value for key, value in values.items() if key in fields} + if not matched: + return False + + writer = PdfWriter(clone_from=str(template_path)) + writer.update_page_form_field_values(writer.pages[0], matched, auto_regenerate=False) + writer.set_need_appearances_writer() + with output_path.open("wb") as handle: + writer.write(handle) + return True + + def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None: reader = PdfReader(str(template_path)) writer = PdfWriter(clone_from=str(template_path)) @@ -68,14 +94,20 @@ def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[st continue template_path = irs_dir / f"{template_slug}.pdf" output_path = output_dir / f"{form_code}.pdf" - _overlay_page(template_path, output_path, form_code, normalized) + render_method = "overlay" + review_required = True + if _field_fill_page(template_path, output_path, form_code, normalized): + render_method = "field_fill" + review_required = False + else: + _overlay_page(template_path, output_path, form_code, normalized) artifacts.append( { "formCode": form_code, "templatePath": str(template_path), "outputPath": str(output_path), - "renderMethod": "overlay", - "reviewRequired": True, + "renderMethod": render_method, + "reviewRequired": review_required, } ) diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py index 3af4d10..7f0bcc5 100644 --- a/skills/us-cpa/src/us_cpa/returns.py +++ b/skills/us-cpa/src/us_cpa/returns.py @@ -11,6 +11,15 @@ def _as_float(value: Any) -> float: return float(value) +def _fact_metadata(facts: dict[str, Any]) -> dict[str, Any]: + return facts.get("_factMetadata", {}) + + +def _provenance_for(field: str, metadata: dict[str, Any]) -> dict[str, Any]: + entry = metadata.get(field, {}) + return {"sources": list(entry.get("sources", []))} + + def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float: taxable = max(0.0, amount) brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status] @@ -30,57 +39,156 @@ def resolve_required_forms(normalized: dict[str, Any]) -> list[str]: if normalized["income"]["taxableInterest"] > 1500: forms.append("f1040sb") if normalized["income"]["businessIncome"] != 0: - forms.extend(["f1040sc", "f1040se", "f1040s1"]) - return forms + forms.extend(["f1040sc", "f1040sse", "f1040s1", "f8995"]) + if normalized["income"]["capitalGainLoss"] != 0: + forms.extend(["f1040sd", "f8949"]) + if normalized["income"]["rentalIncome"] != 0: + forms.extend(["f1040se", "f1040s1"]) + if normalized["deductions"]["deductionType"] == "itemized": + forms.append("f1040sa") + if normalized["adjustments"]["hsaContribution"] != 0: + forms.append("f8889") + if normalized["credits"]["educationCredit"] != 0: + forms.append("f8863") + if normalized["credits"]["foreignTaxCredit"] != 0: + forms.append("f1116") + if normalized["business"]["qualifiedBusinessIncome"] != 0 and "f8995" not in forms: + forms.append("f8995") + if normalized["basis"]["traditionalIraBasis"] != 0: + forms.append("f8606") + if normalized["taxes"]["additionalMedicareTax"] != 0: + forms.append("f8959") + if normalized["taxes"]["netInvestmentIncomeTax"] != 0: + forms.append("f8960") + if normalized["taxes"]["alternativeMinimumTax"] != 0: + forms.append("f6251") + if normalized["taxes"]["additionalTaxPenalty"] != 0: + forms.append("f5329") + if normalized["credits"]["energyCredit"] != 0: + forms.append("f5695") + if normalized["depreciation"]["depreciationExpense"] != 0: + forms.append("f4562") + if normalized["assetSales"]["section1231GainLoss"] != 0: + forms.append("f4797") + return list(dict.fromkeys(forms)) def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]: rules = tax_year_rules(tax_year) + metadata = _fact_metadata(facts) filing_status = facts.get("filingStatus", "single") wages = _as_float(facts.get("wages")) interest = _as_float(facts.get("taxableInterest")) business_income = _as_float(facts.get("businessIncome")) + capital_gain_loss = _as_float(facts.get("capitalGainLoss")) + rental_income = _as_float(facts.get("rentalIncome")) withholding = _as_float(facts.get("federalWithholding")) + itemized_deductions = _as_float(facts.get("itemizedDeductions")) + hsa_contribution = _as_float(facts.get("hsaContribution")) + education_credit = _as_float(facts.get("educationCredit")) + foreign_tax_credit = _as_float(facts.get("foreignTaxCredit")) + qualified_business_income = _as_float(facts.get("qualifiedBusinessIncome")) + traditional_ira_basis = _as_float(facts.get("traditionalIraBasis")) + additional_medicare_tax = _as_float(facts.get("additionalMedicareTax")) + net_investment_income_tax = _as_float(facts.get("netInvestmentIncomeTax")) + alternative_minimum_tax = _as_float(facts.get("alternativeMinimumTax")) + additional_tax_penalty = _as_float(facts.get("additionalTaxPenalty")) + energy_credit = _as_float(facts.get("energyCredit")) + depreciation_expense = _as_float(facts.get("depreciationExpense")) + section1231_gain_loss = _as_float(facts.get("section1231GainLoss")) - adjusted_gross_income = wages + interest + business_income + adjusted_gross_income = wages + interest + business_income + capital_gain_loss + rental_income standard_deduction = rules["standardDeduction"][filing_status] - taxable_income = max(0.0, adjusted_gross_income - standard_deduction) + deduction_type = "itemized" if itemized_deductions > standard_deduction else "standard" + deduction_amount = itemized_deductions if deduction_type == "itemized" else standard_deduction + taxable_income = max(0.0, adjusted_gross_income - deduction_amount) income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year) self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2) - total_tax = round(income_tax + self_employment_tax, 2) + total_tax = round( + income_tax + + self_employment_tax + + additional_medicare_tax + + net_investment_income_tax + + alternative_minimum_tax + + additional_tax_penalty, + 2, + ) total_payments = withholding - refund = round(max(0.0, total_payments - total_tax), 2) - balance_due = round(max(0.0, total_tax - total_payments), 2) + total_credits = round(education_credit + foreign_tax_credit + energy_credit, 2) + refund = round(max(0.0, total_payments + total_credits - total_tax), 2) + balance_due = round(max(0.0, total_tax - total_payments - total_credits), 2) normalized = { "taxYear": tax_year, "taxpayer": { "fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"), }, + "spouse": { + "fullName": facts.get("spouse.fullName", ""), + }, + "dependents": list(facts.get("dependents", [])), "filingStatus": filing_status, "income": { "wages": wages, "taxableInterest": interest, "businessIncome": business_income, + "capitalGainLoss": capital_gain_loss, + "rentalIncome": rental_income, + }, + "adjustments": { + "hsaContribution": hsa_contribution, }, "payments": { "federalWithholding": withholding, }, "deductions": { "standardDeduction": standard_deduction, + "itemizedDeductions": itemized_deductions, + "deductionType": deduction_type, + "deductionAmount": deduction_amount, + }, + "credits": { + "educationCredit": education_credit, + "foreignTaxCredit": foreign_tax_credit, + "energyCredit": energy_credit, }, "taxes": { "incomeTax": income_tax, "selfEmploymentTax": self_employment_tax, + "additionalMedicareTax": additional_medicare_tax, + "netInvestmentIncomeTax": net_investment_income_tax, + "alternativeMinimumTax": alternative_minimum_tax, + "additionalTaxPenalty": additional_tax_penalty, "totalTax": total_tax, }, + "business": { + "qualifiedBusinessIncome": qualified_business_income, + }, + "basis": { + "traditionalIraBasis": traditional_ira_basis, + }, + "depreciation": { + "depreciationExpense": depreciation_expense, + }, + "assetSales": { + "section1231GainLoss": section1231_gain_loss, + }, "totals": { "adjustedGrossIncome": round(adjusted_gross_income, 2), "taxableIncome": round(taxable_income, 2), "totalPayments": round(total_payments, 2), + "totalCredits": total_credits, "refund": refund, "balanceDue": balance_due, }, + "provenance": { + "income.wages": _provenance_for("wages", metadata), + "income.taxableInterest": _provenance_for("taxableInterest", metadata), + "income.businessIncome": _provenance_for("businessIncome", metadata), + "income.capitalGainLoss": _provenance_for("capitalGainLoss", metadata), + "income.rentalIncome": _provenance_for("rentalIncome", metadata), + "payments.federalWithholding": _provenance_for("federalWithholding", metadata), + }, } normalized["requiredForms"] = resolve_required_forms(normalized) return normalized diff --git a/skills/us-cpa/src/us_cpa/review.py b/skills/us-cpa/src/us_cpa/review.py index 82576b9..8e50f85 100644 --- a/skills/us-cpa/src/us_cpa/review.py +++ b/skills/us-cpa/src/us_cpa/review.py @@ -22,6 +22,9 @@ class ReviewEngine: stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text()) facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text()) facts = {key: value["value"] for key, value in facts_payload["facts"].items()} + facts["_factMetadata"] = { + key: {"sources": value.get("sources", [])} for key, value in facts_payload["facts"].items() + } recomputed = normalize_case_facts(facts, manifest["taxYear"]) artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text()) @@ -39,6 +42,42 @@ class ReviewEngine: } ) + for field, label in ( + ("wages", "wages"), + ("taxableInterest", "taxable interest"), + ("businessIncome", "business income"), + ("capitalGainLoss", "capital gains or losses"), + ("rentalIncome", "rental income"), + ): + stored_value = stored_return["income"].get(field, 0.0) + recomputed_value = recomputed["income"].get(field, 0.0) + sources = recomputed.get("provenance", {}).get(f"income.{field}", {}).get("sources", []) + has_document_source = any(item.get("sourceType") == "document_extract" for item in sources) + if stored_value != recomputed_value: + findings.append( + { + "severity": "high" if has_document_source else "medium", + "title": f"Source fact mismatch for {label}", + "explanation": f"Stored return reports {stored_value:.2f} for {label}, but case facts support {recomputed_value:.2f}.", + "suggestedAction": f"Reconcile {label} to {recomputed_value:.2f} before treating the return as final.", + "authorities": [ + {"title": "Case fact registry", "sourceClass": "irs_form"} + ], + } + ) + if stored_value == 0 and recomputed_value > 0 and has_document_source: + findings.append( + { + "severity": "high", + "title": f"Likely omitted {label}", + "explanation": f"Document-extracted facts support {recomputed_value:.2f} of {label}, but the stored return reports none.", + "suggestedAction": f"Add {label} to the return and regenerate the required forms.", + "authorities": [ + {"title": "Case document extraction", "sourceClass": "irs_form"} + ], + } + ) + rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]} for required_form in recomputed["requiredForms"]: if required_form not in rendered_forms: @@ -64,6 +103,18 @@ class ReviewEngine: } ) + required_forms_union = set(recomputed["requiredForms"]) | set(stored_return.get("requiredForms", [])) + if any(form in required_forms_union for form in ("f6251", "f8960", "f8959", "f1116")): + findings.append( + { + "severity": "medium", + "title": "High-complexity tax position requires specialist follow-up", + "explanation": "The return includes forms or computations that usually require deeper technical support and careful authority review.", + "suggestedAction": "Review the supporting authority and computations for the high-complexity forms before treating the return as filing-ready.", + "authorities": [{"title": "Required form analysis", "sourceClass": "irs_instructions"}], + } + ) + findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"])) review = { "status": "reviewed", diff --git a/skills/us-cpa/src/us_cpa/sources.py b/skills/us-cpa/src/us_cpa/sources.py index 1f4190a..852b5aa 100644 --- a/skills/us-cpa/src/us_cpa/sources.py +++ b/skills/us-cpa/src/us_cpa/sources.py @@ -3,6 +3,7 @@ from __future__ import annotations import hashlib import json import os +import re from dataclasses import dataclass from datetime import datetime, timezone from enum import IntEnum @@ -63,6 +64,37 @@ def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str: return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf" +def build_primary_law_authorities(question: str) -> list[dict[str, str | int]]: + authorities: list[dict[str, str | int]] = [] + normalized = question.lower() + + for match in re.finditer(r"(?:section|sec\.)\s+(\d+[a-z0-9-]*)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"irc-{section}", + "title": f"Internal Revenue Code section {section}", + "sourceClass": "internal_revenue_code", + "url": f"https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title26-section{section}&num=0&edition=prelim", + "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE), + } + ) + + for match in re.finditer(r"(?:treas(?:ury)?\.?\s+reg(?:ulation)?\.?\s*)([\d.]+-\d+)", normalized): + section = match.group(1) + authorities.append( + { + "slug": f"reg-{section}", + "title": f"Treasury Regulation {section}", + "sourceClass": "treasury_regulation", + "url": f"https://www.ecfr.gov/current/title-26/section-{section}", + "authorityRank": int(AuthorityRank.TREASURY_REGULATION), + } + ) + + return authorities + + def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: entries = [ ("f1040", "Form 1040", "irs_form"), @@ -73,16 +105,44 @@ def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]: ("f1040sb", "Schedule B (Form 1040)", "irs_form"), ("f1040sc", "Schedule C (Form 1040)", "irs_form"), ("f1040sd", "Schedule D (Form 1040)", "irs_form"), - ("f1040se", "Schedule SE (Form 1040)", "irs_form"), + ("f1040se", "Schedule E (Form 1040)", "irs_form"), + ("f1040sse", "Schedule SE (Form 1040)", "irs_form"), ("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"), ("f8949", "Form 8949", "irs_form"), + ("f4562", "Form 4562", "irs_form"), + ("f4797", "Form 4797", "irs_form"), + ("f6251", "Form 6251", "irs_form"), + ("f8606", "Form 8606", "irs_form"), + ("f8863", "Form 8863", "irs_form"), + ("f8889", "Form 8889", "irs_form"), + ("f8959", "Form 8959", "irs_form"), + ("f8960", "Form 8960", "irs_form"), + ("f8995", "Form 8995", "irs_form"), + ("f8995a", "Form 8995-A", "irs_form"), + ("f5329", "Form 5329", "irs_form"), + ("f5695", "Form 5695", "irs_form"), + ("f1116", "Form 1116", "irs_form"), ("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"), ("i1040sca", "Instructions for Schedule A", "irs_instructions"), ("i1040sc", "Instructions for Schedule C", "irs_instructions"), ("i1040sd", "Instructions for Schedule D", "irs_instructions"), - ("i1040se", "Instructions for Schedule SE", "irs_instructions"), + ("i1040se", "Instructions for Schedule E (Form 1040)", "irs_instructions"), + ("i1040sse", "Instructions for Schedule SE", "irs_instructions"), ("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"), ("i8949", "Instructions for Form 8949", "irs_instructions"), + ("i4562", "Instructions for Form 4562", "irs_instructions"), + ("i4797", "Instructions for Form 4797", "irs_instructions"), + ("i6251", "Instructions for Form 6251", "irs_instructions"), + ("i8606", "Instructions for Form 8606", "irs_instructions"), + ("i8863", "Instructions for Form 8863", "irs_instructions"), + ("i8889", "Instructions for Form 8889", "irs_instructions"), + ("i8959", "Instructions for Form 8959", "irs_instructions"), + ("i8960", "Instructions for Form 8960", "irs_instructions"), + ("i8995", "Instructions for Form 8995", "irs_instructions"), + ("i8995a", "Instructions for Form 8995-A", "irs_instructions"), + ("i5329", "Instructions for Form 5329", "irs_instructions"), + ("i5695", "Instructions for Form 5695", "irs_instructions"), + ("i1116", "Instructions for Form 1116", "irs_instructions"), ] return [ SourceDescriptor( diff --git a/skills/us-cpa/tests/fixtures/documents/interest-1099.txt b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt new file mode 100644 index 0000000..89d54f7 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt @@ -0,0 +1,3 @@ +Form 1099-INT +Recipient: Jane Doe +Box 1 Interest Income 1750 diff --git a/skills/us-cpa/tests/fixtures/documents/simple-w2.txt b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt new file mode 100644 index 0000000..b3336b1 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt @@ -0,0 +1,4 @@ +Form W-2 Wage and Tax Statement +Employee: Jane Doe +Box 1 Wages, tips, other compensation 50000 +Box 2 Federal income tax withheld 6000 diff --git a/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json new file mode 100644 index 0000000..39fbd78 --- /dev/null +++ b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json @@ -0,0 +1,16 @@ +{ + "taxYear": 2025, + "filingStatus": "single", + "requiredForms": ["f1040", "f1040sb"], + "income": { + "wages": 50000.0, + "taxableInterest": 1750.0, + "businessIncome": 0.0, + "capitalGainLoss": 0.0, + "rentalIncome": 0.0 + }, + "totals": { + "adjustedGrossIncome": 51750.0, + "taxableIncome": 36000.0 + } +} diff --git a/skills/us-cpa/tests/test_cases.py b/skills/us-cpa/tests/test_cases.py index 71fa379..7ddb47f 100644 --- a/skills/us-cpa/tests/test_cases.py +++ b/skills/us-cpa/tests/test_cases.py @@ -51,6 +51,39 @@ class CaseManagerTests(unittest.TestCase): facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) self.assertEqual(facts["facts"]["filingStatus"]["value"], "single") + def test_intake_extracts_machine_usable_facts_from_text_documents(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + root = Path(temp_dir) + case_dir = root / "2025-jane-doe" + w2 = root / "w2.txt" + w2.write_text( + "Form W-2 Wage and Tax Statement\n" + "Employee: Jane Doe\n" + "Box 1 Wages, tips, other compensation 50000\n" + "Box 2 Federal income tax withheld 6000\n" + ) + interest = root / "1099-int.txt" + interest.write_text( + "Form 1099-INT\n" + "Recipient: Jane Doe\n" + "Box 1 Interest Income 1750\n" + ) + manager = CaseManager(case_dir) + manager.create_case(case_label="Jane Doe", tax_year=2025) + + result = manager.intake( + tax_year=2025, + user_facts={"filingStatus": "single"}, + document_paths=[w2, interest], + ) + + self.assertEqual(result["status"], "accepted") + facts = json.loads((case_dir / "extracted" / "facts.json").read_text()) + self.assertEqual(facts["facts"]["wages"]["value"], 50000.0) + self.assertEqual(facts["facts"]["federalWithholding"]["value"], 6000.0) + self.assertEqual(facts["facts"]["taxableInterest"]["value"], 1750.0) + self.assertEqual(facts["facts"]["wages"]["sources"][0]["sourceType"], "document_extract") + def test_conflicting_facts_raise_structured_issue(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: case_dir = Path(temp_dir) / "2025-jane-doe" diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py index 7ab1d34..f464488 100644 --- a/skills/us-cpa/tests/test_questions.py +++ b/skills/us-cpa/tests/test_questions.py @@ -51,6 +51,36 @@ class QuestionEngineTests(unittest.TestCase): self.assertEqual(analysis["riskLevel"], "high") self.assertTrue(analysis["primaryLawRequired"]) self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in analysis["authorities"])) + + def test_capital_gains_question_returns_schedule_d_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule D for capital gains?", + tax_year=2025, + case_facts={"capitalGainLoss": 400}, + ) + + self.assertEqual(analysis["issue"], "schedule_d_required") + self.assertEqual(analysis["confidence"], "medium") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040sd" for item in analysis["authorities"])) + + def test_schedule_e_question_returns_rental_guidance(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Do I need Schedule E for rental income?", + tax_year=2025, + case_facts={"rentalIncome": 1200}, + ) + + self.assertEqual(analysis["issue"], "schedule_e_required") + self.assertFalse(analysis["primaryLawRequired"]) + self.assertTrue(any(item["slug"] == "f1040se" for item in analysis["authorities"])) def test_renderers_produce_conversation_and_memo(self) -> None: analysis = { diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py index 33f5358..cc0e506 100644 --- a/skills/us-cpa/tests/test_renderers.py +++ b/skills/us-cpa/tests/test_renderers.py @@ -13,6 +13,47 @@ from us_cpa.sources import TaxYearCorpus class RendererTests(unittest.TestCase): + def test_render_case_forms_prefers_fillable_pdf_fields_when_available(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + form = pdf.acroForm + pdf.drawString(72, 720, "Name") + form.textfield(name="taxpayer_full_name", x=120, y=710, width=200, height=20) + pdf.drawString(72, 680, "Wages") + form.textfield(name="wages", x=120, y=670, width=200, height=20) + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, + "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "field_fill") + self.assertFalse(artifacts["artifacts"][0]["reviewRequired"]) + def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None: with tempfile.TemporaryDirectory() as temp_dir: case_dir = Path(temp_dir) / "case" @@ -32,10 +73,16 @@ class RendererTests(unittest.TestCase): "requiredForms": ["f1040"], "taxpayer": {"fullName": "Jane Doe"}, "filingStatus": "single", - "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0}, - "deductions": {"standardDeduction": 15750.0}, - "taxes": {"totalTax": 3883.5}, + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0}, + "adjustments": {"hsaContribution": 0.0}, + "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0}, + "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0}, "payments": {"federalWithholding": 6000.0}, + "business": {"qualifiedBusinessIncome": 0.0}, + "basis": {"traditionalIraBasis": 0.0}, + "depreciation": {"depreciationExpense": 0.0}, + "assetSales": {"section1231GainLoss": 0.0}, "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, } diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py index ed5b66d..f905877 100644 --- a/skills/us-cpa/tests/test_returns.py +++ b/skills/us-cpa/tests/test_returns.py @@ -37,11 +37,11 @@ class ReturnModelTests(unittest.TestCase): self.assertEqual( resolve_required_forms(normalized), - ["f1040", "f1040sb", "f1040sc", "f1040se", "f1040s1"], + ["f1040", "f1040sb", "f1040sc", "f1040sse", "f1040s1", "f8995"], ) def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None: - self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5) + self.assertEqual(tax_on_ordinary_income(34350.0, "single", 2025), 3883.5) def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None: self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0) @@ -50,6 +50,53 @@ class ReturnModelTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "Unsupported tax year"): normalize_case_facts({"filingStatus": "single"}, 2023) + def test_normalize_case_facts_preserves_provenance_and_expands_form_resolution(self) -> None: + normalized = normalize_case_facts( + { + "taxpayer.fullName": "Jane Doe", + "spouse.fullName": "John Doe", + "dependents": [{"fullName": "Kid Doe", "ssnLast4": "4321"}], + "filingStatus": "married_filing_jointly", + "wages": 50000, + "taxableInterest": 2001, + "capitalGainLoss": 400, + "rentalIncome": 1200, + "itemizedDeductions": 40000, + "hsaContribution": 1000, + "educationCredit": 500, + "foreignTaxCredit": 250, + "qualifiedBusinessIncome": 12000, + "traditionalIraBasis": 6000, + "additionalMedicareTax": 100, + "netInvestmentIncomeTax": 200, + "alternativeMinimumTax": 300, + "additionalTaxPenalty": 50, + "energyCredit": 600, + "_factMetadata": { + "wages": {"sources": [{"sourceType": "document_extract", "documentName": "w2.txt"}]}, + }, + }, + 2025, + ) + + self.assertEqual(normalized["spouse"]["fullName"], "John Doe") + self.assertEqual(normalized["dependents"][0]["fullName"], "Kid Doe") + self.assertEqual(normalized["provenance"]["income.wages"]["sources"][0]["documentName"], "w2.txt") + self.assertIn("f1040sa", normalized["requiredForms"]) + self.assertIn("f1040sd", normalized["requiredForms"]) + self.assertIn("f8949", normalized["requiredForms"]) + self.assertIn("f1040se", normalized["requiredForms"]) + self.assertIn("f8889", normalized["requiredForms"]) + self.assertIn("f8863", normalized["requiredForms"]) + self.assertIn("f1116", normalized["requiredForms"]) + self.assertIn("f8995", normalized["requiredForms"]) + self.assertIn("f8606", normalized["requiredForms"]) + self.assertIn("f8959", normalized["requiredForms"]) + self.assertIn("f8960", normalized["requiredForms"]) + self.assertIn("f6251", normalized["requiredForms"]) + self.assertIn("f5329", normalized["requiredForms"]) + self.assertIn("f5695", normalized["requiredForms"]) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_review.py b/skills/us-cpa/tests/test_review.py index c46014d..eb0cbb1 100644 --- a/skills/us-cpa/tests/test_review.py +++ b/skills/us-cpa/tests/test_review.py @@ -64,6 +64,44 @@ class ReviewEngineTests(unittest.TestCase): self.assertIn("adjusted gross income", review["findings"][0]["title"].lower()) self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"])) + def test_review_detects_reporting_omissions_from_source_facts(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["income"]["taxableInterest"] = 0.0 + normalized["totals"]["adjustedGrossIncome"] = 50000.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + facts_path = case_dir / "extracted" / "facts.json" + facts_payload = json.loads(facts_path.read_text()) + facts_payload["facts"]["taxableInterest"] = { + "value": 1750.0, + "sources": [{"sourceType": "document_extract", "sourceName": "1099-int.txt"}], + } + facts_path.write_text(json.dumps(facts_payload, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("likely omitted taxable interest" in item["title"].lower() for item in review["findings"]) + ) + + def test_review_flags_high_complexity_positions_for_specialist_follow_up(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir, corpus = self.build_prepared_case(temp_dir) + normalized_path = case_dir / "return" / "normalized-return.json" + normalized = json.loads(normalized_path.read_text()) + normalized["requiredForms"].append("f6251") + normalized["taxes"]["alternativeMinimumTax"] = 300.0 + normalized_path.write_text(json.dumps(normalized, indent=2)) + + review = ReviewEngine(corpus=corpus).review_case(case_dir) + + self.assertTrue( + any("high-complexity tax position" in item["title"].lower() for item in review["findings"]) + ) + def test_review_renderers_produce_summary_and_memo(self) -> None: review = { "status": "reviewed", diff --git a/skills/us-cpa/tests/test_sources.py b/skills/us-cpa/tests/test_sources.py index 206e4d0..a180ff9 100644 --- a/skills/us-cpa/tests/test_sources.py +++ b/skills/us-cpa/tests/test_sources.py @@ -12,6 +12,7 @@ from us_cpa.sources import ( authority_rank_for, bootstrap_irs_catalog, build_irs_prior_pdf_url, + build_primary_law_authorities, ) @@ -42,6 +43,17 @@ class SourceCatalogTests(unittest.TestCase): self.assertGreaterEqual(len(catalog), 5) self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf") self.assertTrue(any(item.slug == "i1040gi" for item in catalog)) + self.assertTrue(any(item.slug == "f1040sse" for item in catalog)) + + def test_primary_law_authorities_build_official_urls(self) -> None: + authorities = build_primary_law_authorities( + "Does section 469 apply and what does Treas. Reg. 1.469-1 say?" + ) + + self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities)) + self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities)) + self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities)) + self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities)) class TaxYearCorpusTests(unittest.TestCase):