From fb39fe76cb8b8e8c0eb455dbba0c933acb81b82f Mon Sep 17 00:00:00 2001
From: Stefano Fiorini <stefano@Stefanos-MacBook-Air.local>
Date: Sun, 15 Mar 2026 03:01:16 -0500
Subject: [PATCH] fix: expand us-cpa extraction review and rendering

---
 docs/us-cpa.md                                |  57 ++++++--
 skills/us-cpa/SKILL.md                        |  12 +-
 skills/us-cpa/src/us_cpa/cases.py             |  85 +++++++++---
 .../us-cpa/src/us_cpa/document_extractors.py  |  54 ++++++++
 skills/us-cpa/src/us_cpa/prepare.py           |   6 +-
 skills/us-cpa/src/us_cpa/questions.py         |  20 ++-
 skills/us-cpa/src/us_cpa/renderers.py         |  38 +++++-
 skills/us-cpa/src/us_cpa/returns.py           | 122 +++++++++++++++++-
 skills/us-cpa/src/us_cpa/review.py            |  51 ++++++++
 skills/us-cpa/src/us_cpa/sources.py           |  64 ++++++++-
 .../fixtures/documents/interest-1099.txt      |   3 +
 .../tests/fixtures/documents/simple-w2.txt    |   4 +
 .../simple-w2-interest-2025-normalized.json   |  16 +++
 skills/us-cpa/tests/test_cases.py             |  33 +++++
 skills/us-cpa/tests/test_questions.py         |  30 +++++
 skills/us-cpa/tests/test_renderers.py         |  53 +++++++-
 skills/us-cpa/tests/test_returns.py           |  51 +++++++-
 skills/us-cpa/tests/test_review.py            |  38 ++++++
 skills/us-cpa/tests/test_sources.py           |  12 ++
 19 files changed, 693 insertions(+), 56 deletions(-)
 create mode 100644 skills/us-cpa/src/us_cpa/document_extractors.py
 create mode 100644 skills/us-cpa/tests/fixtures/documents/interest-1099.txt
 create mode 100644 skills/us-cpa/tests/fixtures/documents/simple-w2.txt
 create mode 100644 skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json

diff --git a/docs/us-cpa.md b/docs/us-cpa.md
index 66dc9eb..e93ada0 100644
--- a/docs/us-cpa.md
+++ b/docs/us-cpa.md
@@ -24,11 +24,12 @@ Current implementation now includes:
 - deterministic cache layout under `~/.cache/us-cpa` by default
 - `fetch-year` download flow for the bootstrap IRS corpus
 - source manifest with URL, hash, authority rank, and local path traceability
-- authority ranking hooks for IRS materials and future primary-law escalation
-- case-folder intake and conflict-stop handling
+- primary-law URL building for IRC and Treasury regulation escalation
+- case-folder intake, document registration, and machine-usable fact extraction from JSON, text, and PDF inputs
 - question workflow with conversation and memo output
-- prepare workflow for the current supported 1040 subset
+- prepare workflow for the current supported multi-form 1040 package
 - review workflow with findings-first output
+- fillable-PDF first rendering with overlay fallback
 - e-file-ready draft export payload generation
 
 ## CLI Surface
@@ -61,10 +62,17 @@ US_CPA_CACHE_DIR=/tmp/us-cpa-cache skills/us-cpa/scripts/us-cpa fetch-year --tax
 Current `fetch-year` bootstrap corpus for tax year `2025` is verified against live IRS `irs-prior` PDFs for:
 
 - Form 1040
-- Schedules 1, 2, 3, A, B, C, D, SE, and 8812
-- Form 8949
+- Schedules 1, 2, 3, A, B, C, D, E, SE, and 8812
+- Forms 8949, 4562, 4797, 6251, 8606, 8863, 8889, 8959, 8960, 8995, 8995-A, 5329, 5695, and 1116
 - General Form 1040 instructions and selected schedule/form instructions
 
+Current bundled tax-year computation data:
+
+- 2024
+- 2025
+
+Other years fetch/source correctly, but deterministic return calculations currently stop with an explicit unsupported-year error until rate tables are added.
+
 ## Interaction Model
 
 - `question`
@@ -109,7 +117,8 @@ Behavior:
 
 - creates the full case directory layout when `--create-case` is used
 - copies input documents into `input/`
-- stores normalized user-statement facts in `extracted/facts.json`
+- stores normalized facts with source metadata in `extracted/facts.json`
+- extracts machine-usable facts from JSON/text/PDF documents where supported
 - appends document registry entries to `case-manifest.json`
 - stops with a structured issue and non-zero exit if a new fact conflicts with an existing stored fact
 
@@ -142,21 +151,26 @@ Current implemented topics:
 
 - standard deduction
 - Schedule C / sole proprietorship reporting trigger
+- Schedule D / capital gains reporting trigger
+- Schedule E / rental income reporting trigger
 
 ## Form Rendering
 
 Current rendering path:
 
 - official IRS PDFs from the cached tax-year corpus
-- overlay rendering onto those official PDFs using `reportlab` + `pypdf`
+- deterministic field-fill when usable AcroForm fields are present
+- overlay rendering onto those official PDFs using `reportlab` + `pypdf` as fallback
 - artifact manifest written to `output/artifacts.json`
 
 Current rendered form support:
 
-- Form 1040 overlay artifact generation
+- field-fill support for known mapped fillable forms
+- overlay generation for the current required-form set resolved by the return model
 
 Current review rule:
 
+- field-filled artifacts are not automatically flagged for review
 - overlay-rendered artifacts are marked `reviewRequired: true`
 
 ## Preparation Workflow
@@ -164,9 +178,10 @@ Current review rule:
 Current `prepare` implementation:
 
 - loads case facts from `extracted/facts.json`
-- normalizes them into the current supported 2025 federal return model
-- computes the current supported 1040 subset
-- resolves required forms for the current supported subset
+- normalizes them into the current supported federal return model
+- preserves source provenance for normalized values
+- computes the current supported 1040 package
+- resolves required forms across the current supported subset
 - writes:
   - `return/normalized-return.json`
   - `output/artifacts.json`
@@ -175,10 +190,27 @@ Current `prepare` implementation:
 Current supported calculation inputs:
 
 - `filingStatus`
+- `spouse.fullName`
+- `dependents`
 - `wages`
 - `taxableInterest`
 - `businessIncome`
+- `capitalGainLoss`
+- `rentalIncome`
 - `federalWithholding`
+- `itemizedDeductions`
+- `hsaContribution`
+- `educationCredit`
+- `foreignTaxCredit`
+- `qualifiedBusinessIncome`
+- `traditionalIraBasis`
+- `additionalMedicareTax`
+- `netInvestmentIncomeTax`
+- `alternativeMinimumTax`
+- `additionalTaxPenalty`
+- `energyCredit`
+- `depreciationExpense`
+- `section1231GainLoss`
 
 ## E-file-ready Export
 
@@ -200,7 +232,10 @@ Current `review` implementation:
 
 - recomputes the return from current case facts
 - compares stored normalized return values to recomputed values
+- flags source-fact mismatches for key income fields
+- flags likely omitted income when document-extracted facts support an amount the stored return omits
 - checks whether required rendered artifacts are present
+- flags high-complexity forms for specialist follow-up
 - flags overlay-rendered artifacts as requiring human review
 - sorts findings by severity
 
diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md
index a8d73f5..010a6e4 100644
--- a/skills/us-cpa/SKILL.md
+++ b/skills/us-cpa/SKILL.md
@@ -46,7 +46,8 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
 - IRS materials first; escalate to primary law only when needed
 - stop on conflicting facts and ask the user to resolve the issue before continuing
 - official IRS PDFs are the target compiled-form artifacts
-- overlay-rendered forms must be flagged for human review
+- deterministic field-fill is the preferred render path when the official PDF exposes usable fields
+- overlay-rendered forms are the fallback and must be flagged for human review
 
 ## Output
 
@@ -55,10 +56,11 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
 - `question` supports `--style conversation|memo`
 - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default
 - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
-- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
-- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review
-- `prepare` computes the supported 1040 subset and writes normalized return/artifact/report files into the case directory
+- `extract-docs` creates or opens a case, registers documents, stores facts, extracts machine-usable facts from JSON/text/PDF sources where possible, and stops with a structured issue if facts conflict
+- `question` currently has explicit IRS-first answers for standard deduction, Schedule C, Schedule D, and Schedule E questions; other questions escalate to primary-law research with official IRC/regulation URLs
+- rendered form artifacts prefer fillable-field output when possible and otherwise fall back to overlay output
+- `prepare` computes the current supported federal 1040 package, preserves fact provenance in the normalized return, and writes normalized return/artifact/report files into the case directory
 - `export-efile-ready` writes a draft transmission-ready payload without transmitting anything
-- `review` recomputes the return from case facts, checks artifacts, and returns findings-first output in conversation or memo style
+- `review` recomputes the return from case facts, checks artifacts, flags source-fact mismatches and likely omissions, and returns findings-first output in conversation or memo style
 
 For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.
diff --git a/skills/us-cpa/src/us_cpa/cases.py b/skills/us-cpa/src/us_cpa/cases.py
index 9f8827d..68cc90c 100644
--- a/skills/us-cpa/src/us_cpa/cases.py
+++ b/skills/us-cpa/src/us_cpa/cases.py
@@ -8,6 +8,8 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 
+from us_cpa.document_extractors import extract_document_facts
+
 
 CASE_SUBDIRECTORIES = (
     "input",
@@ -95,6 +97,48 @@ class CaseManager:
         current["issues"].append(issue)
         self.issues_path.write_text(json.dumps(current, indent=2))
 
+    def _record_fact(
+        self,
+        facts_payload: dict[str, Any],
+        *,
+        field: str,
+        value: Any,
+        source_type: str,
+        source_name: str,
+        tax_year: int,
+    ) -> None:
+        existing = facts_payload["facts"].get(field)
+        if existing and existing["value"] != value:
+            issue = {
+                "status": "needs_resolution",
+                "issueType": "fact_conflict",
+                "field": field,
+                "existingValue": existing["value"],
+                "newValue": value,
+                "message": f"Conflicting values for {field}. Resolve before continuing.",
+                "createdAt": _timestamp(),
+                "taxYear": tax_year,
+            }
+            self._write_issue(issue)
+            raise CaseConflictError(issue)
+
+        captured_at = _timestamp()
+        source_entry = {
+            "sourceType": source_type,
+            "sourceName": source_name,
+            "capturedAt": captured_at,
+        }
+        if existing:
+            existing["sources"].append(source_entry)
+            return
+
+        facts_payload["facts"][field] = {
+            "value": value,
+            "sourceType": source_type,
+            "capturedAt": captured_at,
+            "sources": [source_entry],
+        }
+
     def intake(
         self,
         *,
@@ -124,27 +168,28 @@ class CaseManager:
             registered_documents.append(document_entry)
 
         facts_payload = self._load_facts()
-        for field, value in user_facts.items():
-            existing = facts_payload["facts"].get(field)
-            if existing and existing["value"] != value:
-                issue = {
-                    "status": "needs_resolution",
-                    "issueType": "fact_conflict",
-                    "field": field,
-                    "existingValue": existing["value"],
-                    "newValue": value,
-                    "message": f"Conflicting values for {field}. Resolve before continuing.",
-                    "createdAt": _timestamp(),
-                    "taxYear": tax_year,
-                }
-                self._write_issue(issue)
-                raise CaseConflictError(issue)
+        for document_entry in registered_documents:
+            extracted = extract_document_facts(Path(document_entry["storedPath"]))
+            document_entry["extractedFacts"] = extracted
+            for field, value in extracted.items():
+                self._record_fact(
+                    facts_payload,
+                    field=field,
+                    value=value,
+                    source_type="document_extract",
+                    source_name=document_entry["name"],
+                    tax_year=tax_year,
+                )
 
-            facts_payload["facts"][field] = {
-                "value": value,
-                "sourceType": "user_statement",
-                "capturedAt": _timestamp(),
-            }
+        for field, value in user_facts.items():
+            self._record_fact(
+                facts_payload,
+                field=field,
+                value=value,
+                source_type="user_statement",
+                source_name="interactive-intake",
+                tax_year=tax_year,
+            )
 
         self._write_manifest(manifest)
         self._write_facts(facts_payload)
diff --git a/skills/us-cpa/src/us_cpa/document_extractors.py b/skills/us-cpa/src/us_cpa/document_extractors.py
new file mode 100644
index 0000000..e5034db
--- /dev/null
+++ b/skills/us-cpa/src/us_cpa/document_extractors.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+from pypdf import PdfReader
+
+
+_NUMBER = r"(-?\d+(?:,\d{3})*(?:\.\d+)?)"
+
+
+def _parse_number(raw: str) -> float:
+    return float(raw.replace(",", ""))
+
+
+def _extract_text(path: Path) -> str:
+    suffix = path.suffix.lower()
+    if suffix in {".txt", ".md"}:
+        return path.read_text()
+    if suffix == ".pdf":
+        reader = PdfReader(str(path))
+        return "\n".join((page.extract_text() or "") for page in reader.pages)
+    return ""
+
+
+def _facts_from_text(text: str) -> dict[str, Any]:
+    extracted: dict[str, Any] = {}
+
+    if match := re.search(r"Employee:\s*(.+)", text):
+        extracted["taxpayer.fullName"] = match.group(1).strip()
+    if match := re.search(r"Recipient:\s*(.+)", text):
+        extracted.setdefault("taxpayer.fullName", match.group(1).strip())
+    if match := re.search(r"Box 1 Wages, tips, other compensation\s+" + _NUMBER, text, re.I):
+        extracted["wages"] = _parse_number(match.group(1))
+    if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I):
+        extracted["federalWithholding"] = _parse_number(match.group(1))
+    if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I):
+        extracted["taxableInterest"] = _parse_number(match.group(1))
+    if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I):
+        extracted["businessIncome"] = _parse_number(match.group(1))
+
+    return extracted
+
+
+def extract_document_facts(path: Path) -> dict[str, Any]:
+    suffix = path.suffix.lower()
+    if suffix == ".json":
+        payload = json.loads(path.read_text())
+        if isinstance(payload, dict):
+            return payload
+        return {}
+    return _facts_from_text(_extract_text(path))
diff --git a/skills/us-cpa/src/us_cpa/prepare.py b/skills/us-cpa/src/us_cpa/prepare.py
index 72ef8cd..1338b83 100644
--- a/skills/us-cpa/src/us_cpa/prepare.py
+++ b/skills/us-cpa/src/us_cpa/prepare.py
@@ -13,7 +13,11 @@ from us_cpa.sources import TaxYearCorpus
 def _load_case_facts(case_dir: Path) -> dict[str, Any]:
     facts_path = case_dir / "extracted" / "facts.json"
     payload = json.loads(facts_path.read_text())
-    return {key: value["value"] for key, value in payload["facts"].items()}
+    facts = {key: value["value"] for key, value in payload["facts"].items()}
+    facts["_factMetadata"] = {
+        key: {"sources": value.get("sources", [])} for key, value in payload["facts"].items()
+    }
+    return facts
 
 
 
diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py
index 1ee9554..3ee1502 100644
--- a/skills/us-cpa/src/us_cpa/questions.py
+++ b/skills/us-cpa/src/us_cpa/questions.py
@@ -5,7 +5,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 
-from us_cpa.sources import TaxYearCorpus
+from us_cpa.sources import TaxYearCorpus, build_primary_law_authorities
 
 
 TOPIC_RULES = [
@@ -29,6 +29,22 @@ TOPIC_RULES = [
         "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.",
         "confidence": "medium",
     },
+    {
+        "issue": "schedule_d_required",
+        "keywords": ("schedule d", "capital gains"),
+        "authority_slugs": ("f1040sd", "i1040sd", "f8949", "i8949"),
+        "answer": "Schedule D is generally required when a taxpayer reports capital gains or losses, often alongside Form 8949.",
+        "summary": "Capital gains and losses generally flow through Schedule D, with Form 8949 supporting detail when required.",
+        "confidence": "medium",
+    },
+    {
+        "issue": "schedule_e_required",
+        "keywords": ("schedule e", "rental income"),
+        "authority_slugs": ("f1040se", "i1040se"),
+        "answer": "Schedule E is generally required when a taxpayer reports rental real-estate income or expenses.",
+        "summary": "Rental income and expenses generally belong on Schedule E.",
+        "confidence": "medium",
+    },
 ]
 
 
@@ -117,7 +133,7 @@ class QuestionEngine:
             "missingFacts": [
                 "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently."
             ],
-            "authorities": [],
+            "authorities": build_primary_law_authorities(question),
             "conclusion": {
                 "answer": "Insufficient IRS-form and instruction support for a confident answer.",
                 "summary": "This question needs primary-law analysis before a reliable answer can be given.",
diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py
index 440d8de..f41c16a 100644
--- a/skills/us-cpa/src/us_cpa/renderers.py
+++ b/skills/us-cpa/src/us_cpa/renderers.py
@@ -37,6 +37,32 @@ OVERLAY_FIELDS = {
 }
 
 
+FIELD_FILL_VALUES = {
+    "f1040": lambda data: {
+        "taxpayer_full_name": data["taxpayer"]["fullName"],
+        "filing_status": data["filingStatus"],
+        "wages": f"{data['income']['wages']:.2f}",
+        "taxable_interest": f"{data['income']['taxableInterest']:.2f}",
+    }
+}
+
+
+def _field_fill_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> bool:
+    reader = PdfReader(str(template_path))
+    fields = reader.get_fields() or {}
+    values = FIELD_FILL_VALUES.get(form_code, lambda _: {})(normalized)
+    matched = {key: value for key, value in values.items() if key in fields}
+    if not matched:
+        return False
+
+    writer = PdfWriter(clone_from=str(template_path))
+    writer.update_page_form_field_values(writer.pages[0], matched, auto_regenerate=False)
+    writer.set_need_appearances_writer()
+    with output_path.open("wb") as handle:
+        writer.write(handle)
+    return True
+
+
 def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None:
     reader = PdfReader(str(template_path))
     writer = PdfWriter(clone_from=str(template_path))
@@ -68,14 +94,20 @@ def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[st
             continue
         template_path = irs_dir / f"{template_slug}.pdf"
         output_path = output_dir / f"{form_code}.pdf"
-        _overlay_page(template_path, output_path, form_code, normalized)
+        render_method = "overlay"
+        review_required = True
+        if _field_fill_page(template_path, output_path, form_code, normalized):
+            render_method = "field_fill"
+            review_required = False
+        else:
+            _overlay_page(template_path, output_path, form_code, normalized)
         artifacts.append(
             {
                 "formCode": form_code,
                 "templatePath": str(template_path),
                 "outputPath": str(output_path),
-                "renderMethod": "overlay",
-                "reviewRequired": True,
+                "renderMethod": render_method,
+                "reviewRequired": review_required,
             }
         )
 
diff --git a/skills/us-cpa/src/us_cpa/returns.py b/skills/us-cpa/src/us_cpa/returns.py
index 3af4d10..7f0bcc5 100644
--- a/skills/us-cpa/src/us_cpa/returns.py
+++ b/skills/us-cpa/src/us_cpa/returns.py
@@ -11,6 +11,15 @@ def _as_float(value: Any) -> float:
     return float(value)
 
 
+def _fact_metadata(facts: dict[str, Any]) -> dict[str, Any]:
+    return facts.get("_factMetadata", {})
+
+
+def _provenance_for(field: str, metadata: dict[str, Any]) -> dict[str, Any]:
+    entry = metadata.get(field, {})
+    return {"sources": list(entry.get("sources", []))}
+
+
 def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float:
     taxable = max(0.0, amount)
     brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status]
@@ -30,57 +39,156 @@ def resolve_required_forms(normalized: dict[str, Any]) -> list[str]:
     if normalized["income"]["taxableInterest"] > 1500:
         forms.append("f1040sb")
     if normalized["income"]["businessIncome"] != 0:
-        forms.extend(["f1040sc", "f1040se", "f1040s1"])
-    return forms
+        forms.extend(["f1040sc", "f1040sse", "f1040s1", "f8995"])
+    if normalized["income"]["capitalGainLoss"] != 0:
+        forms.extend(["f1040sd", "f8949"])
+    if normalized["income"]["rentalIncome"] != 0:
+        forms.extend(["f1040se", "f1040s1"])
+    if normalized["deductions"]["deductionType"] == "itemized":
+        forms.append("f1040sa")
+    if normalized["adjustments"]["hsaContribution"] != 0:
+        forms.append("f8889")
+    if normalized["credits"]["educationCredit"] != 0:
+        forms.append("f8863")
+    if normalized["credits"]["foreignTaxCredit"] != 0:
+        forms.append("f1116")
+    if normalized["business"]["qualifiedBusinessIncome"] != 0 and "f8995" not in forms:
+        forms.append("f8995")
+    if normalized["basis"]["traditionalIraBasis"] != 0:
+        forms.append("f8606")
+    if normalized["taxes"]["additionalMedicareTax"] != 0:
+        forms.append("f8959")
+    if normalized["taxes"]["netInvestmentIncomeTax"] != 0:
+        forms.append("f8960")
+    if normalized["taxes"]["alternativeMinimumTax"] != 0:
+        forms.append("f6251")
+    if normalized["taxes"]["additionalTaxPenalty"] != 0:
+        forms.append("f5329")
+    if normalized["credits"]["energyCredit"] != 0:
+        forms.append("f5695")
+    if normalized["depreciation"]["depreciationExpense"] != 0:
+        forms.append("f4562")
+    if normalized["assetSales"]["section1231GainLoss"] != 0:
+        forms.append("f4797")
+    return list(dict.fromkeys(forms))
 
 
 def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]:
     rules = tax_year_rules(tax_year)
+    metadata = _fact_metadata(facts)
     filing_status = facts.get("filingStatus", "single")
     wages = _as_float(facts.get("wages"))
     interest = _as_float(facts.get("taxableInterest"))
     business_income = _as_float(facts.get("businessIncome"))
+    capital_gain_loss = _as_float(facts.get("capitalGainLoss"))
+    rental_income = _as_float(facts.get("rentalIncome"))
     withholding = _as_float(facts.get("federalWithholding"))
+    itemized_deductions = _as_float(facts.get("itemizedDeductions"))
+    hsa_contribution = _as_float(facts.get("hsaContribution"))
+    education_credit = _as_float(facts.get("educationCredit"))
+    foreign_tax_credit = _as_float(facts.get("foreignTaxCredit"))
+    qualified_business_income = _as_float(facts.get("qualifiedBusinessIncome"))
+    traditional_ira_basis = _as_float(facts.get("traditionalIraBasis"))
+    additional_medicare_tax = _as_float(facts.get("additionalMedicareTax"))
+    net_investment_income_tax = _as_float(facts.get("netInvestmentIncomeTax"))
+    alternative_minimum_tax = _as_float(facts.get("alternativeMinimumTax"))
+    additional_tax_penalty = _as_float(facts.get("additionalTaxPenalty"))
+    energy_credit = _as_float(facts.get("energyCredit"))
+    depreciation_expense = _as_float(facts.get("depreciationExpense"))
+    section1231_gain_loss = _as_float(facts.get("section1231GainLoss"))
 
-    adjusted_gross_income = wages + interest + business_income
+    adjusted_gross_income = wages + interest + business_income + capital_gain_loss + rental_income
     standard_deduction = rules["standardDeduction"][filing_status]
-    taxable_income = max(0.0, adjusted_gross_income - standard_deduction)
+    deduction_type = "itemized" if itemized_deductions > standard_deduction else "standard"
+    deduction_amount = itemized_deductions if deduction_type == "itemized" else standard_deduction
+    taxable_income = max(0.0, adjusted_gross_income - deduction_amount)
     income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year)
     self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2)
-    total_tax = round(income_tax + self_employment_tax, 2)
+    total_tax = round(
+        income_tax
+        + self_employment_tax
+        + additional_medicare_tax
+        + net_investment_income_tax
+        + alternative_minimum_tax
+        + additional_tax_penalty,
+        2,
+    )
     total_payments = withholding
-    refund = round(max(0.0, total_payments - total_tax), 2)
-    balance_due = round(max(0.0, total_tax - total_payments), 2)
+    total_credits = round(education_credit + foreign_tax_credit + energy_credit, 2)
+    refund = round(max(0.0, total_payments + total_credits - total_tax), 2)
+    balance_due = round(max(0.0, total_tax - total_payments - total_credits), 2)
 
     normalized = {
         "taxYear": tax_year,
         "taxpayer": {
             "fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"),
         },
+        "spouse": {
+            "fullName": facts.get("spouse.fullName", ""),
+        },
+        "dependents": list(facts.get("dependents", [])),
         "filingStatus": filing_status,
         "income": {
             "wages": wages,
             "taxableInterest": interest,
             "businessIncome": business_income,
+            "capitalGainLoss": capital_gain_loss,
+            "rentalIncome": rental_income,
+        },
+        "adjustments": {
+            "hsaContribution": hsa_contribution,
         },
         "payments": {
             "federalWithholding": withholding,
         },
         "deductions": {
             "standardDeduction": standard_deduction,
+            "itemizedDeductions": itemized_deductions,
+            "deductionType": deduction_type,
+            "deductionAmount": deduction_amount,
+        },
+        "credits": {
+            "educationCredit": education_credit,
+            "foreignTaxCredit": foreign_tax_credit,
+            "energyCredit": energy_credit,
         },
         "taxes": {
             "incomeTax": income_tax,
             "selfEmploymentTax": self_employment_tax,
+            "additionalMedicareTax": additional_medicare_tax,
+            "netInvestmentIncomeTax": net_investment_income_tax,
+            "alternativeMinimumTax": alternative_minimum_tax,
+            "additionalTaxPenalty": additional_tax_penalty,
             "totalTax": total_tax,
         },
+        "business": {
+            "qualifiedBusinessIncome": qualified_business_income,
+        },
+        "basis": {
+            "traditionalIraBasis": traditional_ira_basis,
+        },
+        "depreciation": {
+            "depreciationExpense": depreciation_expense,
+        },
+        "assetSales": {
+            "section1231GainLoss": section1231_gain_loss,
+        },
         "totals": {
             "adjustedGrossIncome": round(adjusted_gross_income, 2),
             "taxableIncome": round(taxable_income, 2),
             "totalPayments": round(total_payments, 2),
+            "totalCredits": total_credits,
             "refund": refund,
             "balanceDue": balance_due,
         },
+        "provenance": {
+            "income.wages": _provenance_for("wages", metadata),
+            "income.taxableInterest": _provenance_for("taxableInterest", metadata),
+            "income.businessIncome": _provenance_for("businessIncome", metadata),
+            "income.capitalGainLoss": _provenance_for("capitalGainLoss", metadata),
+            "income.rentalIncome": _provenance_for("rentalIncome", metadata),
+            "payments.federalWithholding": _provenance_for("federalWithholding", metadata),
+        },
     }
     normalized["requiredForms"] = resolve_required_forms(normalized)
     return normalized
diff --git a/skills/us-cpa/src/us_cpa/review.py b/skills/us-cpa/src/us_cpa/review.py
index 82576b9..8e50f85 100644
--- a/skills/us-cpa/src/us_cpa/review.py
+++ b/skills/us-cpa/src/us_cpa/review.py
@@ -22,6 +22,9 @@ class ReviewEngine:
         stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text())
         facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text())
         facts = {key: value["value"] for key, value in facts_payload["facts"].items()}
+        facts["_factMetadata"] = {
+            key: {"sources": value.get("sources", [])} for key, value in facts_payload["facts"].items()
+        }
         recomputed = normalize_case_facts(facts, manifest["taxYear"])
         artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text())
 
@@ -39,6 +42,42 @@ class ReviewEngine:
                 }
             )
 
+        for field, label in (
+            ("wages", "wages"),
+            ("taxableInterest", "taxable interest"),
+            ("businessIncome", "business income"),
+            ("capitalGainLoss", "capital gains or losses"),
+            ("rentalIncome", "rental income"),
+        ):
+            stored_value = stored_return["income"].get(field, 0.0)
+            recomputed_value = recomputed["income"].get(field, 0.0)
+            sources = recomputed.get("provenance", {}).get(f"income.{field}", {}).get("sources", [])
+            has_document_source = any(item.get("sourceType") == "document_extract" for item in sources)
+            if stored_value != recomputed_value:
+                findings.append(
+                    {
+                        "severity": "high" if has_document_source else "medium",
+                        "title": f"Source fact mismatch for {label}",
+                        "explanation": f"Stored return reports {stored_value:.2f} for {label}, but case facts support {recomputed_value:.2f}.",
+                        "suggestedAction": f"Reconcile {label} to {recomputed_value:.2f} before treating the return as final.",
+                        "authorities": [
+                            {"title": "Case fact registry", "sourceClass": "irs_form"}
+                        ],
+                    }
+                )
+            if stored_value == 0 and recomputed_value > 0 and has_document_source:
+                findings.append(
+                    {
+                        "severity": "high",
+                        "title": f"Likely omitted {label}",
+                        "explanation": f"Document-extracted facts support {recomputed_value:.2f} of {label}, but the stored return reports none.",
+                        "suggestedAction": f"Add {label} to the return and regenerate the required forms.",
+                        "authorities": [
+                            {"title": "Case document extraction", "sourceClass": "irs_form"}
+                        ],
+                    }
+                )
+
         rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]}
         for required_form in recomputed["requiredForms"]:
             if required_form not in rendered_forms:
@@ -64,6 +103,18 @@ class ReviewEngine:
                     }
                 )
 
+        required_forms_union = set(recomputed["requiredForms"]) | set(stored_return.get("requiredForms", []))
+        if any(form in required_forms_union for form in ("f6251", "f8960", "f8959", "f1116")):
+            findings.append(
+                {
+                    "severity": "medium",
+                    "title": "High-complexity tax position requires specialist follow-up",
+                    "explanation": "The return includes forms or computations that usually require deeper technical support and careful authority review.",
+                    "suggestedAction": "Review the supporting authority and computations for the high-complexity forms before treating the return as filing-ready.",
+                    "authorities": [{"title": "Required form analysis", "sourceClass": "irs_instructions"}],
+                }
+            )
+
         findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"]))
         review = {
             "status": "reviewed",
diff --git a/skills/us-cpa/src/us_cpa/sources.py b/skills/us-cpa/src/us_cpa/sources.py
index 1f4190a..852b5aa 100644
--- a/skills/us-cpa/src/us_cpa/sources.py
+++ b/skills/us-cpa/src/us_cpa/sources.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import hashlib
 import json
 import os
+import re
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from enum import IntEnum
@@ -63,6 +64,37 @@ def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str:
     return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf"
 
 
+def build_primary_law_authorities(question: str) -> list[dict[str, str | int]]:
+    authorities: list[dict[str, str | int]] = []
+    normalized = question.lower()
+
+    for match in re.finditer(r"(?:section|sec\.)\s+(\d+[a-z0-9-]*)", normalized):
+        section = match.group(1)
+        authorities.append(
+            {
+                "slug": f"irc-{section}",
+                "title": f"Internal Revenue Code section {section}",
+                "sourceClass": "internal_revenue_code",
+                "url": f"https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title26-section{section}&num=0&edition=prelim",
+                "authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE),
+            }
+        )
+
+    for match in re.finditer(r"(?:treas(?:ury)?\.?\s+reg(?:ulation)?\.?\s*)([\d.]+-\d+)", normalized):
+        section = match.group(1)
+        authorities.append(
+            {
+                "slug": f"reg-{section}",
+                "title": f"Treasury Regulation {section}",
+                "sourceClass": "treasury_regulation",
+                "url": f"https://www.ecfr.gov/current/title-26/section-{section}",
+                "authorityRank": int(AuthorityRank.TREASURY_REGULATION),
+            }
+        )
+
+    return authorities
+
+
 def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]:
     entries = [
         ("f1040", "Form 1040", "irs_form"),
@@ -73,16 +105,44 @@ def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]:
         ("f1040sb", "Schedule B (Form 1040)", "irs_form"),
         ("f1040sc", "Schedule C (Form 1040)", "irs_form"),
         ("f1040sd", "Schedule D (Form 1040)", "irs_form"),
-        ("f1040se", "Schedule SE (Form 1040)", "irs_form"),
+        ("f1040se", "Schedule E (Form 1040)", "irs_form"),
+        ("f1040sse", "Schedule SE (Form 1040)", "irs_form"),
         ("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"),
         ("f8949", "Form 8949", "irs_form"),
+        ("f4562", "Form 4562", "irs_form"),
+        ("f4797", "Form 4797", "irs_form"),
+        ("f6251", "Form 6251", "irs_form"),
+        ("f8606", "Form 8606", "irs_form"),
+        ("f8863", "Form 8863", "irs_form"),
+        ("f8889", "Form 8889", "irs_form"),
+        ("f8959", "Form 8959", "irs_form"),
+        ("f8960", "Form 8960", "irs_form"),
+        ("f8995", "Form 8995", "irs_form"),
+        ("f8995a", "Form 8995-A", "irs_form"),
+        ("f5329", "Form 5329", "irs_form"),
+        ("f5695", "Form 5695", "irs_form"),
+        ("f1116", "Form 1116", "irs_form"),
         ("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"),
         ("i1040sca", "Instructions for Schedule A", "irs_instructions"),
         ("i1040sc", "Instructions for Schedule C", "irs_instructions"),
         ("i1040sd", "Instructions for Schedule D", "irs_instructions"),
-        ("i1040se", "Instructions for Schedule SE", "irs_instructions"),
+        ("i1040se", "Instructions for Schedule E (Form 1040)", "irs_instructions"),
+        ("i1040sse", "Instructions for Schedule SE", "irs_instructions"),
         ("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"),
         ("i8949", "Instructions for Form 8949", "irs_instructions"),
+        ("i4562", "Instructions for Form 4562", "irs_instructions"),
+        ("i4797", "Instructions for Form 4797", "irs_instructions"),
+        ("i6251", "Instructions for Form 6251", "irs_instructions"),
+        ("i8606", "Instructions for Form 8606", "irs_instructions"),
+        ("i8863", "Instructions for Form 8863", "irs_instructions"),
+        ("i8889", "Instructions for Form 8889", "irs_instructions"),
+        ("i8959", "Instructions for Form 8959", "irs_instructions"),
+        ("i8960", "Instructions for Form 8960", "irs_instructions"),
+        ("i8995", "Instructions for Form 8995", "irs_instructions"),
+        ("i8995a", "Instructions for Form 8995-A", "irs_instructions"),
+        ("i5329", "Instructions for Form 5329", "irs_instructions"),
+        ("i5695", "Instructions for Form 5695", "irs_instructions"),
+        ("i1116", "Instructions for Form 1116", "irs_instructions"),
     ]
     return [
         SourceDescriptor(
diff --git a/skills/us-cpa/tests/fixtures/documents/interest-1099.txt b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt
new file mode 100644
index 0000000..89d54f7
--- /dev/null
+++ b/skills/us-cpa/tests/fixtures/documents/interest-1099.txt
@@ -0,0 +1,3 @@
+Form 1099-INT
+Recipient: Jane Doe
+Box 1 Interest Income 1750
diff --git a/skills/us-cpa/tests/fixtures/documents/simple-w2.txt b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt
new file mode 100644
index 0000000..b3336b1
--- /dev/null
+++ b/skills/us-cpa/tests/fixtures/documents/simple-w2.txt
@@ -0,0 +1,4 @@
+Form W-2 Wage and Tax Statement
+Employee: Jane Doe
+Box 1 Wages, tips, other compensation 50000
+Box 2 Federal income tax withheld 6000
diff --git a/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json
new file mode 100644
index 0000000..39fbd78
--- /dev/null
+++ b/skills/us-cpa/tests/fixtures/returns/simple-w2-interest-2025-normalized.json
@@ -0,0 +1,16 @@
+{
+  "taxYear": 2025,
+  "filingStatus": "single",
+  "requiredForms": ["f1040", "f1040sb"],
+  "income": {
+    "wages": 50000.0,
+    "taxableInterest": 1750.0,
+    "businessIncome": 0.0,
+    "capitalGainLoss": 0.0,
+    "rentalIncome": 0.0
+  },
+  "totals": {
+    "adjustedGrossIncome": 51750.0,
+    "taxableIncome": 36000.0
+  }
+}
diff --git a/skills/us-cpa/tests/test_cases.py b/skills/us-cpa/tests/test_cases.py
index 71fa379..7ddb47f 100644
--- a/skills/us-cpa/tests/test_cases.py
+++ b/skills/us-cpa/tests/test_cases.py
@@ -51,6 +51,39 @@ class CaseManagerTests(unittest.TestCase):
             facts = json.loads((case_dir / "extracted" / "facts.json").read_text())
             self.assertEqual(facts["facts"]["filingStatus"]["value"], "single")
 
+    def test_intake_extracts_machine_usable_facts_from_text_documents(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root = Path(temp_dir)
+            case_dir = root / "2025-jane-doe"
+            w2 = root / "w2.txt"
+            w2.write_text(
+                "Form W-2 Wage and Tax Statement\n"
+                "Employee: Jane Doe\n"
+                "Box 1 Wages, tips, other compensation 50000\n"
+                "Box 2 Federal income tax withheld 6000\n"
+            )
+            interest = root / "1099-int.txt"
+            interest.write_text(
+                "Form 1099-INT\n"
+                "Recipient: Jane Doe\n"
+                "Box 1 Interest Income 1750\n"
+            )
+            manager = CaseManager(case_dir)
+            manager.create_case(case_label="Jane Doe", tax_year=2025)
+
+            result = manager.intake(
+                tax_year=2025,
+                user_facts={"filingStatus": "single"},
+                document_paths=[w2, interest],
+            )
+
+            self.assertEqual(result["status"], "accepted")
+            facts = json.loads((case_dir / "extracted" / "facts.json").read_text())
+            self.assertEqual(facts["facts"]["wages"]["value"], 50000.0)
+            self.assertEqual(facts["facts"]["federalWithholding"]["value"], 6000.0)
+            self.assertEqual(facts["facts"]["taxableInterest"]["value"], 1750.0)
+            self.assertEqual(facts["facts"]["wages"]["sources"][0]["sourceType"], "document_extract")
+
     def test_conflicting_facts_raise_structured_issue(self) -> None:
         with tempfile.TemporaryDirectory() as temp_dir:
             case_dir = Path(temp_dir) / "2025-jane-doe"
diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py
index 7ab1d34..f464488 100644
--- a/skills/us-cpa/tests/test_questions.py
+++ b/skills/us-cpa/tests/test_questions.py
@@ -51,6 +51,36 @@ class QuestionEngineTests(unittest.TestCase):
             self.assertEqual(analysis["riskLevel"], "high")
             self.assertTrue(analysis["primaryLawRequired"])
             self.assertIn("Internal Revenue Code", analysis["missingFacts"][0])
+            self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in analysis["authorities"]))
+
+    def test_capital_gains_question_returns_schedule_d_guidance(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            engine = self.build_engine(temp_dir)
+
+            analysis = engine.answer(
+                question="Do I need Schedule D for capital gains?",
+                tax_year=2025,
+                case_facts={"capitalGainLoss": 400},
+            )
+
+            self.assertEqual(analysis["issue"], "schedule_d_required")
+            self.assertEqual(analysis["confidence"], "medium")
+            self.assertFalse(analysis["primaryLawRequired"])
+            self.assertTrue(any(item["slug"] == "f1040sd" for item in analysis["authorities"]))
+
+    def test_schedule_e_question_returns_rental_guidance(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            engine = self.build_engine(temp_dir)
+
+            analysis = engine.answer(
+                question="Do I need Schedule E for rental income?",
+                tax_year=2025,
+                case_facts={"rentalIncome": 1200},
+            )
+
+            self.assertEqual(analysis["issue"], "schedule_e_required")
+            self.assertFalse(analysis["primaryLawRequired"])
+            self.assertTrue(any(item["slug"] == "f1040se" for item in analysis["authorities"]))
 
     def test_renderers_produce_conversation_and_memo(self) -> None:
         analysis = {
diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py
index 33f5358..cc0e506 100644
--- a/skills/us-cpa/tests/test_renderers.py
+++ b/skills/us-cpa/tests/test_renderers.py
@@ -13,6 +13,47 @@ from us_cpa.sources import TaxYearCorpus
 
 
 class RendererTests(unittest.TestCase):
+    def test_render_case_forms_prefers_fillable_pdf_fields_when_available(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            case_dir = Path(temp_dir) / "case"
+            (case_dir / "output").mkdir(parents=True)
+            corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
+            irs_dir = corpus.paths_for_year(2025).irs_dir
+            irs_dir.mkdir(parents=True, exist_ok=True)
+
+            buffer = BytesIO()
+            pdf = canvas.Canvas(buffer)
+            form = pdf.acroForm
+            pdf.drawString(72, 720, "Name")
+            form.textfield(name="taxpayer_full_name", x=120, y=710, width=200, height=20)
+            pdf.drawString(72, 680, "Wages")
+            form.textfield(name="wages", x=120, y=670, width=200, height=20)
+            pdf.save()
+            (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue())
+
+            normalized = {
+                "taxYear": 2025,
+                "requiredForms": ["f1040"],
+                "taxpayer": {"fullName": "Jane Doe"},
+                "filingStatus": "single",
+                "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0},
+                "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0},
+                "adjustments": {"hsaContribution": 0.0},
+                "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0},
+                "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0},
+                "payments": {"federalWithholding": 6000.0},
+                "business": {"qualifiedBusinessIncome": 0.0},
+                "basis": {"traditionalIraBasis": 0.0},
+                "depreciation": {"depreciationExpense": 0.0},
+                "assetSales": {"section1231GainLoss": 0.0},
+                "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0},
+            }
+
+            artifacts = render_case_forms(case_dir, corpus, normalized)
+
+            self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "field_fill")
+            self.assertFalse(artifacts["artifacts"][0]["reviewRequired"])
+
     def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None:
         with tempfile.TemporaryDirectory() as temp_dir:
             case_dir = Path(temp_dir) / "case"
@@ -32,10 +73,16 @@ class RendererTests(unittest.TestCase):
                 "requiredForms": ["f1040"],
                 "taxpayer": {"fullName": "Jane Doe"},
                 "filingStatus": "single",
-                "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0},
-                "deductions": {"standardDeduction": 15750.0},
-                "taxes": {"totalTax": 3883.5},
+                "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0, "capitalGainLoss": 0.0, "rentalIncome": 0.0},
+                "deductions": {"standardDeduction": 15750.0, "deductionType": "standard", "deductionAmount": 15750.0},
+                "adjustments": {"hsaContribution": 0.0},
+                "credits": {"educationCredit": 0.0, "foreignTaxCredit": 0.0, "energyCredit": 0.0},
+                "taxes": {"totalTax": 3883.5, "additionalMedicareTax": 0.0, "netInvestmentIncomeTax": 0.0, "alternativeMinimumTax": 0.0, "additionalTaxPenalty": 0.0},
                 "payments": {"federalWithholding": 6000.0},
+                "business": {"qualifiedBusinessIncome": 0.0},
+                "basis": {"traditionalIraBasis": 0.0},
+                "depreciation": {"depreciationExpense": 0.0},
+                "assetSales": {"section1231GainLoss": 0.0},
                 "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0},
             }
 
diff --git a/skills/us-cpa/tests/test_returns.py b/skills/us-cpa/tests/test_returns.py
index ed5b66d..f905877 100644
--- a/skills/us-cpa/tests/test_returns.py
+++ b/skills/us-cpa/tests/test_returns.py
@@ -37,11 +37,11 @@ class ReturnModelTests(unittest.TestCase):
 
         self.assertEqual(
             resolve_required_forms(normalized),
-            ["f1040", "f1040sb", "f1040sc", "f1040se", "f1040s1"],
+            ["f1040", "f1040sb", "f1040sc", "f1040sse", "f1040s1", "f8995"],
         )
 
     def test_tax_bracket_calculation_uses_2025_single_rates(self) -> None:
-        self.assertEqual(tax_on_ordinary_income(34350.0, "single"), 3883.5)
+        self.assertEqual(tax_on_ordinary_income(34350.0, "single", 2025), 3883.5)
 
     def test_tax_bracket_calculation_uses_selected_tax_year(self) -> None:
         self.assertEqual(tax_on_ordinary_income(33650.0, "single", 2024), 3806.0)
@@ -50,6 +50,53 @@ class ReturnModelTests(unittest.TestCase):
         with self.assertRaisesRegex(ValueError, "Unsupported tax year"):
             normalize_case_facts({"filingStatus": "single"}, 2023)
 
+    def test_normalize_case_facts_preserves_provenance_and_expands_form_resolution(self) -> None:
+        normalized = normalize_case_facts(
+            {
+                "taxpayer.fullName": "Jane Doe",
+                "spouse.fullName": "John Doe",
+                "dependents": [{"fullName": "Kid Doe", "ssnLast4": "4321"}],
+                "filingStatus": "married_filing_jointly",
+                "wages": 50000,
+                "taxableInterest": 2001,
+                "capitalGainLoss": 400,
+                "rentalIncome": 1200,
+                "itemizedDeductions": 40000,
+                "hsaContribution": 1000,
+                "educationCredit": 500,
+                "foreignTaxCredit": 250,
+                "qualifiedBusinessIncome": 12000,
+                "traditionalIraBasis": 6000,
+                "additionalMedicareTax": 100,
+                "netInvestmentIncomeTax": 200,
+                "alternativeMinimumTax": 300,
+                "additionalTaxPenalty": 50,
+                "energyCredit": 600,
+                "_factMetadata": {
+                    "wages": {"sources": [{"sourceType": "document_extract", "documentName": "w2.txt"}]},
+                },
+            },
+            2025,
+        )
+
+        self.assertEqual(normalized["spouse"]["fullName"], "John Doe")
+        self.assertEqual(normalized["dependents"][0]["fullName"], "Kid Doe")
+        self.assertEqual(normalized["provenance"]["income.wages"]["sources"][0]["documentName"], "w2.txt")
+        self.assertIn("f1040sa", normalized["requiredForms"])
+        self.assertIn("f1040sd", normalized["requiredForms"])
+        self.assertIn("f8949", normalized["requiredForms"])
+        self.assertIn("f1040se", normalized["requiredForms"])
+        self.assertIn("f8889", normalized["requiredForms"])
+        self.assertIn("f8863", normalized["requiredForms"])
+        self.assertIn("f1116", normalized["requiredForms"])
+        self.assertIn("f8995", normalized["requiredForms"])
+        self.assertIn("f8606", normalized["requiredForms"])
+        self.assertIn("f8959", normalized["requiredForms"])
+        self.assertIn("f8960", normalized["requiredForms"])
+        self.assertIn("f6251", normalized["requiredForms"])
+        self.assertIn("f5329", normalized["requiredForms"])
+        self.assertIn("f5695", normalized["requiredForms"])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/skills/us-cpa/tests/test_review.py b/skills/us-cpa/tests/test_review.py
index c46014d..eb0cbb1 100644
--- a/skills/us-cpa/tests/test_review.py
+++ b/skills/us-cpa/tests/test_review.py
@@ -64,6 +64,44 @@ class ReviewEngineTests(unittest.TestCase):
             self.assertIn("adjusted gross income", review["findings"][0]["title"].lower())
             self.assertTrue(any("missing rendered artifact" in item["title"].lower() for item in review["findings"]))
 
+    def test_review_detects_reporting_omissions_from_source_facts(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            case_dir, corpus = self.build_prepared_case(temp_dir)
+            normalized_path = case_dir / "return" / "normalized-return.json"
+            normalized = json.loads(normalized_path.read_text())
+            normalized["income"]["taxableInterest"] = 0.0
+            normalized["totals"]["adjustedGrossIncome"] = 50000.0
+            normalized_path.write_text(json.dumps(normalized, indent=2))
+
+            facts_path = case_dir / "extracted" / "facts.json"
+            facts_payload = json.loads(facts_path.read_text())
+            facts_payload["facts"]["taxableInterest"] = {
+                "value": 1750.0,
+                "sources": [{"sourceType": "document_extract", "sourceName": "1099-int.txt"}],
+            }
+            facts_path.write_text(json.dumps(facts_payload, indent=2))
+
+            review = ReviewEngine(corpus=corpus).review_case(case_dir)
+
+            self.assertTrue(
+                any("likely omitted taxable interest" in item["title"].lower() for item in review["findings"])
+            )
+
+    def test_review_flags_high_complexity_positions_for_specialist_follow_up(self) -> None:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            case_dir, corpus = self.build_prepared_case(temp_dir)
+            normalized_path = case_dir / "return" / "normalized-return.json"
+            normalized = json.loads(normalized_path.read_text())
+            normalized["requiredForms"].append("f6251")
+            normalized["taxes"]["alternativeMinimumTax"] = 300.0
+            normalized_path.write_text(json.dumps(normalized, indent=2))
+
+            review = ReviewEngine(corpus=corpus).review_case(case_dir)
+
+            self.assertTrue(
+                any("high-complexity tax position" in item["title"].lower() for item in review["findings"])
+            )
+
     def test_review_renderers_produce_summary_and_memo(self) -> None:
         review = {
             "status": "reviewed",
diff --git a/skills/us-cpa/tests/test_sources.py b/skills/us-cpa/tests/test_sources.py
index 206e4d0..a180ff9 100644
--- a/skills/us-cpa/tests/test_sources.py
+++ b/skills/us-cpa/tests/test_sources.py
@@ -12,6 +12,7 @@ from us_cpa.sources import (
     authority_rank_for,
     bootstrap_irs_catalog,
     build_irs_prior_pdf_url,
+    build_primary_law_authorities,
 )
 
 
@@ -42,6 +43,17 @@ class SourceCatalogTests(unittest.TestCase):
         self.assertGreaterEqual(len(catalog), 5)
         self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf")
         self.assertTrue(any(item.slug == "i1040gi" for item in catalog))
+        self.assertTrue(any(item.slug == "f1040sse" for item in catalog))
+
+    def test_primary_law_authorities_build_official_urls(self) -> None:
+        authorities = build_primary_law_authorities(
+            "Does section 469 apply and what does Treas. Reg. 1.469-1 say?"
+        )
+
+        self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities))
+        self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities))
+        self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities))
+        self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities))
 
 
 class TaxYearCorpusTests(unittest.TestCase):