fix: expand us-cpa extraction review and rendering
This commit is contained in:
@@ -8,6 +8,8 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.document_extractors import extract_document_facts
|
||||
|
||||
|
||||
CASE_SUBDIRECTORIES = (
|
||||
"input",
|
||||
@@ -95,6 +97,48 @@ class CaseManager:
|
||||
current["issues"].append(issue)
|
||||
self.issues_path.write_text(json.dumps(current, indent=2))
|
||||
|
||||
def _record_fact(
|
||||
self,
|
||||
facts_payload: dict[str, Any],
|
||||
*,
|
||||
field: str,
|
||||
value: Any,
|
||||
source_type: str,
|
||||
source_name: str,
|
||||
tax_year: int,
|
||||
) -> None:
|
||||
existing = facts_payload["facts"].get(field)
|
||||
if existing and existing["value"] != value:
|
||||
issue = {
|
||||
"status": "needs_resolution",
|
||||
"issueType": "fact_conflict",
|
||||
"field": field,
|
||||
"existingValue": existing["value"],
|
||||
"newValue": value,
|
||||
"message": f"Conflicting values for {field}. Resolve before continuing.",
|
||||
"createdAt": _timestamp(),
|
||||
"taxYear": tax_year,
|
||||
}
|
||||
self._write_issue(issue)
|
||||
raise CaseConflictError(issue)
|
||||
|
||||
captured_at = _timestamp()
|
||||
source_entry = {
|
||||
"sourceType": source_type,
|
||||
"sourceName": source_name,
|
||||
"capturedAt": captured_at,
|
||||
}
|
||||
if existing:
|
||||
existing["sources"].append(source_entry)
|
||||
return
|
||||
|
||||
facts_payload["facts"][field] = {
|
||||
"value": value,
|
||||
"sourceType": source_type,
|
||||
"capturedAt": captured_at,
|
||||
"sources": [source_entry],
|
||||
}
|
||||
|
||||
def intake(
|
||||
self,
|
||||
*,
|
||||
@@ -124,27 +168,28 @@ class CaseManager:
|
||||
registered_documents.append(document_entry)
|
||||
|
||||
facts_payload = self._load_facts()
|
||||
for field, value in user_facts.items():
|
||||
existing = facts_payload["facts"].get(field)
|
||||
if existing and existing["value"] != value:
|
||||
issue = {
|
||||
"status": "needs_resolution",
|
||||
"issueType": "fact_conflict",
|
||||
"field": field,
|
||||
"existingValue": existing["value"],
|
||||
"newValue": value,
|
||||
"message": f"Conflicting values for {field}. Resolve before continuing.",
|
||||
"createdAt": _timestamp(),
|
||||
"taxYear": tax_year,
|
||||
}
|
||||
self._write_issue(issue)
|
||||
raise CaseConflictError(issue)
|
||||
for document_entry in registered_documents:
|
||||
extracted = extract_document_facts(Path(document_entry["storedPath"]))
|
||||
document_entry["extractedFacts"] = extracted
|
||||
for field, value in extracted.items():
|
||||
self._record_fact(
|
||||
facts_payload,
|
||||
field=field,
|
||||
value=value,
|
||||
source_type="document_extract",
|
||||
source_name=document_entry["name"],
|
||||
tax_year=tax_year,
|
||||
)
|
||||
|
||||
facts_payload["facts"][field] = {
|
||||
"value": value,
|
||||
"sourceType": "user_statement",
|
||||
"capturedAt": _timestamp(),
|
||||
}
|
||||
for field, value in user_facts.items():
|
||||
self._record_fact(
|
||||
facts_payload,
|
||||
field=field,
|
||||
value=value,
|
||||
source_type="user_statement",
|
||||
source_name="interactive-intake",
|
||||
tax_year=tax_year,
|
||||
)
|
||||
|
||||
self._write_manifest(manifest)
|
||||
self._write_facts(facts_payload)
|
||||
|
||||
54
skills/us-cpa/src/us_cpa/document_extractors.py
Normal file
54
skills/us-cpa/src/us_cpa/document_extractors.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from pypdf import PdfReader
|
||||
|
||||
|
||||
_NUMBER = r"(-?\d+(?:,\d{3})*(?:\.\d+)?)"
|
||||
|
||||
|
||||
def _parse_number(raw: str) -> float:
|
||||
return float(raw.replace(",", ""))
|
||||
|
||||
|
||||
def _extract_text(path: Path) -> str:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {".txt", ".md"}:
|
||||
return path.read_text()
|
||||
if suffix == ".pdf":
|
||||
reader = PdfReader(str(path))
|
||||
return "\n".join((page.extract_text() or "") for page in reader.pages)
|
||||
return ""
|
||||
|
||||
|
||||
def _facts_from_text(text: str) -> dict[str, Any]:
|
||||
extracted: dict[str, Any] = {}
|
||||
|
||||
if match := re.search(r"Employee:\s*(.+)", text):
|
||||
extracted["taxpayer.fullName"] = match.group(1).strip()
|
||||
if match := re.search(r"Recipient:\s*(.+)", text):
|
||||
extracted.setdefault("taxpayer.fullName", match.group(1).strip())
|
||||
if match := re.search(r"Box 1 Wages, tips, other compensation\s+" + _NUMBER, text, re.I):
|
||||
extracted["wages"] = _parse_number(match.group(1))
|
||||
if match := re.search(r"Box 2 Federal income tax withheld\s+" + _NUMBER, text, re.I):
|
||||
extracted["federalWithholding"] = _parse_number(match.group(1))
|
||||
if match := re.search(r"Box 1 Interest Income\s+" + _NUMBER, text, re.I):
|
||||
extracted["taxableInterest"] = _parse_number(match.group(1))
|
||||
if match := re.search(r"Net profit(?: or loss)?\s+" + _NUMBER, text, re.I):
|
||||
extracted["businessIncome"] = _parse_number(match.group(1))
|
||||
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_facts(path: Path) -> dict[str, Any]:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix == ".json":
|
||||
payload = json.loads(path.read_text())
|
||||
if isinstance(payload, dict):
|
||||
return payload
|
||||
return {}
|
||||
return _facts_from_text(_extract_text(path))
|
||||
@@ -13,7 +13,11 @@ from us_cpa.sources import TaxYearCorpus
|
||||
def _load_case_facts(case_dir: Path) -> dict[str, Any]:
|
||||
facts_path = case_dir / "extracted" / "facts.json"
|
||||
payload = json.loads(facts_path.read_text())
|
||||
return {key: value["value"] for key, value in payload["facts"].items()}
|
||||
facts = {key: value["value"] for key, value in payload["facts"].items()}
|
||||
facts["_factMetadata"] = {
|
||||
key: {"sources": value.get("sources", [])} for key, value in payload["facts"].items()
|
||||
}
|
||||
return facts
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.sources import TaxYearCorpus
|
||||
from us_cpa.sources import TaxYearCorpus, build_primary_law_authorities
|
||||
|
||||
|
||||
TOPIC_RULES = [
|
||||
@@ -29,6 +29,22 @@ TOPIC_RULES = [
|
||||
"summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.",
|
||||
"confidence": "medium",
|
||||
},
|
||||
{
|
||||
"issue": "schedule_d_required",
|
||||
"keywords": ("schedule d", "capital gains"),
|
||||
"authority_slugs": ("f1040sd", "i1040sd", "f8949", "i8949"),
|
||||
"answer": "Schedule D is generally required when a taxpayer reports capital gains or losses, often alongside Form 8949.",
|
||||
"summary": "Capital gains and losses generally flow through Schedule D, with Form 8949 supporting detail when required.",
|
||||
"confidence": "medium",
|
||||
},
|
||||
{
|
||||
"issue": "schedule_e_required",
|
||||
"keywords": ("schedule e", "rental income"),
|
||||
"authority_slugs": ("f1040se", "i1040se"),
|
||||
"answer": "Schedule E is generally required when a taxpayer reports rental real-estate income or expenses.",
|
||||
"summary": "Rental income and expenses generally belong on Schedule E.",
|
||||
"confidence": "medium",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -117,7 +133,7 @@ class QuestionEngine:
|
||||
"missingFacts": [
|
||||
"Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently."
|
||||
],
|
||||
"authorities": [],
|
||||
"authorities": build_primary_law_authorities(question),
|
||||
"conclusion": {
|
||||
"answer": "Insufficient IRS-form and instruction support for a confident answer.",
|
||||
"summary": "This question needs primary-law analysis before a reliable answer can be given.",
|
||||
|
||||
@@ -37,6 +37,32 @@ OVERLAY_FIELDS = {
|
||||
}
|
||||
|
||||
|
||||
FIELD_FILL_VALUES = {
|
||||
"f1040": lambda data: {
|
||||
"taxpayer_full_name": data["taxpayer"]["fullName"],
|
||||
"filing_status": data["filingStatus"],
|
||||
"wages": f"{data['income']['wages']:.2f}",
|
||||
"taxable_interest": f"{data['income']['taxableInterest']:.2f}",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _field_fill_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> bool:
|
||||
reader = PdfReader(str(template_path))
|
||||
fields = reader.get_fields() or {}
|
||||
values = FIELD_FILL_VALUES.get(form_code, lambda _: {})(normalized)
|
||||
matched = {key: value for key, value in values.items() if key in fields}
|
||||
if not matched:
|
||||
return False
|
||||
|
||||
writer = PdfWriter(clone_from=str(template_path))
|
||||
writer.update_page_form_field_values(writer.pages[0], matched, auto_regenerate=False)
|
||||
writer.set_need_appearances_writer()
|
||||
with output_path.open("wb") as handle:
|
||||
writer.write(handle)
|
||||
return True
|
||||
|
||||
|
||||
def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None:
|
||||
reader = PdfReader(str(template_path))
|
||||
writer = PdfWriter(clone_from=str(template_path))
|
||||
@@ -68,14 +94,20 @@ def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[st
|
||||
continue
|
||||
template_path = irs_dir / f"{template_slug}.pdf"
|
||||
output_path = output_dir / f"{form_code}.pdf"
|
||||
_overlay_page(template_path, output_path, form_code, normalized)
|
||||
render_method = "overlay"
|
||||
review_required = True
|
||||
if _field_fill_page(template_path, output_path, form_code, normalized):
|
||||
render_method = "field_fill"
|
||||
review_required = False
|
||||
else:
|
||||
_overlay_page(template_path, output_path, form_code, normalized)
|
||||
artifacts.append(
|
||||
{
|
||||
"formCode": form_code,
|
||||
"templatePath": str(template_path),
|
||||
"outputPath": str(output_path),
|
||||
"renderMethod": "overlay",
|
||||
"reviewRequired": True,
|
||||
"renderMethod": render_method,
|
||||
"reviewRequired": review_required,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -11,6 +11,15 @@ def _as_float(value: Any) -> float:
|
||||
return float(value)
|
||||
|
||||
|
||||
def _fact_metadata(facts: dict[str, Any]) -> dict[str, Any]:
|
||||
return facts.get("_factMetadata", {})
|
||||
|
||||
|
||||
def _provenance_for(field: str, metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
entry = metadata.get(field, {})
|
||||
return {"sources": list(entry.get("sources", []))}
|
||||
|
||||
|
||||
def tax_on_ordinary_income(amount: float, filing_status: str, tax_year: int) -> float:
|
||||
taxable = max(0.0, amount)
|
||||
brackets = tax_year_rules(tax_year)["ordinaryIncomeBrackets"][filing_status]
|
||||
@@ -30,57 +39,156 @@ def resolve_required_forms(normalized: dict[str, Any]) -> list[str]:
|
||||
if normalized["income"]["taxableInterest"] > 1500:
|
||||
forms.append("f1040sb")
|
||||
if normalized["income"]["businessIncome"] != 0:
|
||||
forms.extend(["f1040sc", "f1040se", "f1040s1"])
|
||||
return forms
|
||||
forms.extend(["f1040sc", "f1040sse", "f1040s1", "f8995"])
|
||||
if normalized["income"]["capitalGainLoss"] != 0:
|
||||
forms.extend(["f1040sd", "f8949"])
|
||||
if normalized["income"]["rentalIncome"] != 0:
|
||||
forms.extend(["f1040se", "f1040s1"])
|
||||
if normalized["deductions"]["deductionType"] == "itemized":
|
||||
forms.append("f1040sa")
|
||||
if normalized["adjustments"]["hsaContribution"] != 0:
|
||||
forms.append("f8889")
|
||||
if normalized["credits"]["educationCredit"] != 0:
|
||||
forms.append("f8863")
|
||||
if normalized["credits"]["foreignTaxCredit"] != 0:
|
||||
forms.append("f1116")
|
||||
if normalized["business"]["qualifiedBusinessIncome"] != 0 and "f8995" not in forms:
|
||||
forms.append("f8995")
|
||||
if normalized["basis"]["traditionalIraBasis"] != 0:
|
||||
forms.append("f8606")
|
||||
if normalized["taxes"]["additionalMedicareTax"] != 0:
|
||||
forms.append("f8959")
|
||||
if normalized["taxes"]["netInvestmentIncomeTax"] != 0:
|
||||
forms.append("f8960")
|
||||
if normalized["taxes"]["alternativeMinimumTax"] != 0:
|
||||
forms.append("f6251")
|
||||
if normalized["taxes"]["additionalTaxPenalty"] != 0:
|
||||
forms.append("f5329")
|
||||
if normalized["credits"]["energyCredit"] != 0:
|
||||
forms.append("f5695")
|
||||
if normalized["depreciation"]["depreciationExpense"] != 0:
|
||||
forms.append("f4562")
|
||||
if normalized["assetSales"]["section1231GainLoss"] != 0:
|
||||
forms.append("f4797")
|
||||
return list(dict.fromkeys(forms))
|
||||
|
||||
|
||||
def normalize_case_facts(facts: dict[str, Any], tax_year: int) -> dict[str, Any]:
|
||||
rules = tax_year_rules(tax_year)
|
||||
metadata = _fact_metadata(facts)
|
||||
filing_status = facts.get("filingStatus", "single")
|
||||
wages = _as_float(facts.get("wages"))
|
||||
interest = _as_float(facts.get("taxableInterest"))
|
||||
business_income = _as_float(facts.get("businessIncome"))
|
||||
capital_gain_loss = _as_float(facts.get("capitalGainLoss"))
|
||||
rental_income = _as_float(facts.get("rentalIncome"))
|
||||
withholding = _as_float(facts.get("federalWithholding"))
|
||||
itemized_deductions = _as_float(facts.get("itemizedDeductions"))
|
||||
hsa_contribution = _as_float(facts.get("hsaContribution"))
|
||||
education_credit = _as_float(facts.get("educationCredit"))
|
||||
foreign_tax_credit = _as_float(facts.get("foreignTaxCredit"))
|
||||
qualified_business_income = _as_float(facts.get("qualifiedBusinessIncome"))
|
||||
traditional_ira_basis = _as_float(facts.get("traditionalIraBasis"))
|
||||
additional_medicare_tax = _as_float(facts.get("additionalMedicareTax"))
|
||||
net_investment_income_tax = _as_float(facts.get("netInvestmentIncomeTax"))
|
||||
alternative_minimum_tax = _as_float(facts.get("alternativeMinimumTax"))
|
||||
additional_tax_penalty = _as_float(facts.get("additionalTaxPenalty"))
|
||||
energy_credit = _as_float(facts.get("energyCredit"))
|
||||
depreciation_expense = _as_float(facts.get("depreciationExpense"))
|
||||
section1231_gain_loss = _as_float(facts.get("section1231GainLoss"))
|
||||
|
||||
adjusted_gross_income = wages + interest + business_income
|
||||
adjusted_gross_income = wages + interest + business_income + capital_gain_loss + rental_income
|
||||
standard_deduction = rules["standardDeduction"][filing_status]
|
||||
taxable_income = max(0.0, adjusted_gross_income - standard_deduction)
|
||||
deduction_type = "itemized" if itemized_deductions > standard_deduction else "standard"
|
||||
deduction_amount = itemized_deductions if deduction_type == "itemized" else standard_deduction
|
||||
taxable_income = max(0.0, adjusted_gross_income - deduction_amount)
|
||||
income_tax = tax_on_ordinary_income(taxable_income, filing_status, tax_year)
|
||||
self_employment_tax = round(max(0.0, business_income) * 0.9235 * 0.153, 2)
|
||||
total_tax = round(income_tax + self_employment_tax, 2)
|
||||
total_tax = round(
|
||||
income_tax
|
||||
+ self_employment_tax
|
||||
+ additional_medicare_tax
|
||||
+ net_investment_income_tax
|
||||
+ alternative_minimum_tax
|
||||
+ additional_tax_penalty,
|
||||
2,
|
||||
)
|
||||
total_payments = withholding
|
||||
refund = round(max(0.0, total_payments - total_tax), 2)
|
||||
balance_due = round(max(0.0, total_tax - total_payments), 2)
|
||||
total_credits = round(education_credit + foreign_tax_credit + energy_credit, 2)
|
||||
refund = round(max(0.0, total_payments + total_credits - total_tax), 2)
|
||||
balance_due = round(max(0.0, total_tax - total_payments - total_credits), 2)
|
||||
|
||||
normalized = {
|
||||
"taxYear": tax_year,
|
||||
"taxpayer": {
|
||||
"fullName": facts.get("taxpayer.fullName", "Unknown Taxpayer"),
|
||||
},
|
||||
"spouse": {
|
||||
"fullName": facts.get("spouse.fullName", ""),
|
||||
},
|
||||
"dependents": list(facts.get("dependents", [])),
|
||||
"filingStatus": filing_status,
|
||||
"income": {
|
||||
"wages": wages,
|
||||
"taxableInterest": interest,
|
||||
"businessIncome": business_income,
|
||||
"capitalGainLoss": capital_gain_loss,
|
||||
"rentalIncome": rental_income,
|
||||
},
|
||||
"adjustments": {
|
||||
"hsaContribution": hsa_contribution,
|
||||
},
|
||||
"payments": {
|
||||
"federalWithholding": withholding,
|
||||
},
|
||||
"deductions": {
|
||||
"standardDeduction": standard_deduction,
|
||||
"itemizedDeductions": itemized_deductions,
|
||||
"deductionType": deduction_type,
|
||||
"deductionAmount": deduction_amount,
|
||||
},
|
||||
"credits": {
|
||||
"educationCredit": education_credit,
|
||||
"foreignTaxCredit": foreign_tax_credit,
|
||||
"energyCredit": energy_credit,
|
||||
},
|
||||
"taxes": {
|
||||
"incomeTax": income_tax,
|
||||
"selfEmploymentTax": self_employment_tax,
|
||||
"additionalMedicareTax": additional_medicare_tax,
|
||||
"netInvestmentIncomeTax": net_investment_income_tax,
|
||||
"alternativeMinimumTax": alternative_minimum_tax,
|
||||
"additionalTaxPenalty": additional_tax_penalty,
|
||||
"totalTax": total_tax,
|
||||
},
|
||||
"business": {
|
||||
"qualifiedBusinessIncome": qualified_business_income,
|
||||
},
|
||||
"basis": {
|
||||
"traditionalIraBasis": traditional_ira_basis,
|
||||
},
|
||||
"depreciation": {
|
||||
"depreciationExpense": depreciation_expense,
|
||||
},
|
||||
"assetSales": {
|
||||
"section1231GainLoss": section1231_gain_loss,
|
||||
},
|
||||
"totals": {
|
||||
"adjustedGrossIncome": round(adjusted_gross_income, 2),
|
||||
"taxableIncome": round(taxable_income, 2),
|
||||
"totalPayments": round(total_payments, 2),
|
||||
"totalCredits": total_credits,
|
||||
"refund": refund,
|
||||
"balanceDue": balance_due,
|
||||
},
|
||||
"provenance": {
|
||||
"income.wages": _provenance_for("wages", metadata),
|
||||
"income.taxableInterest": _provenance_for("taxableInterest", metadata),
|
||||
"income.businessIncome": _provenance_for("businessIncome", metadata),
|
||||
"income.capitalGainLoss": _provenance_for("capitalGainLoss", metadata),
|
||||
"income.rentalIncome": _provenance_for("rentalIncome", metadata),
|
||||
"payments.federalWithholding": _provenance_for("federalWithholding", metadata),
|
||||
},
|
||||
}
|
||||
normalized["requiredForms"] = resolve_required_forms(normalized)
|
||||
return normalized
|
||||
|
||||
@@ -22,6 +22,9 @@ class ReviewEngine:
|
||||
stored_return = json.loads((case_dir / "return" / "normalized-return.json").read_text())
|
||||
facts_payload = json.loads((case_dir / "extracted" / "facts.json").read_text())
|
||||
facts = {key: value["value"] for key, value in facts_payload["facts"].items()}
|
||||
facts["_factMetadata"] = {
|
||||
key: {"sources": value.get("sources", [])} for key, value in facts_payload["facts"].items()
|
||||
}
|
||||
recomputed = normalize_case_facts(facts, manifest["taxYear"])
|
||||
artifacts_payload = json.loads((case_dir / "output" / "artifacts.json").read_text())
|
||||
|
||||
@@ -39,6 +42,42 @@ class ReviewEngine:
|
||||
}
|
||||
)
|
||||
|
||||
for field, label in (
|
||||
("wages", "wages"),
|
||||
("taxableInterest", "taxable interest"),
|
||||
("businessIncome", "business income"),
|
||||
("capitalGainLoss", "capital gains or losses"),
|
||||
("rentalIncome", "rental income"),
|
||||
):
|
||||
stored_value = stored_return["income"].get(field, 0.0)
|
||||
recomputed_value = recomputed["income"].get(field, 0.0)
|
||||
sources = recomputed.get("provenance", {}).get(f"income.{field}", {}).get("sources", [])
|
||||
has_document_source = any(item.get("sourceType") == "document_extract" for item in sources)
|
||||
if stored_value != recomputed_value:
|
||||
findings.append(
|
||||
{
|
||||
"severity": "high" if has_document_source else "medium",
|
||||
"title": f"Source fact mismatch for {label}",
|
||||
"explanation": f"Stored return reports {stored_value:.2f} for {label}, but case facts support {recomputed_value:.2f}.",
|
||||
"suggestedAction": f"Reconcile {label} to {recomputed_value:.2f} before treating the return as final.",
|
||||
"authorities": [
|
||||
{"title": "Case fact registry", "sourceClass": "irs_form"}
|
||||
],
|
||||
}
|
||||
)
|
||||
if stored_value == 0 and recomputed_value > 0 and has_document_source:
|
||||
findings.append(
|
||||
{
|
||||
"severity": "high",
|
||||
"title": f"Likely omitted {label}",
|
||||
"explanation": f"Document-extracted facts support {recomputed_value:.2f} of {label}, but the stored return reports none.",
|
||||
"suggestedAction": f"Add {label} to the return and regenerate the required forms.",
|
||||
"authorities": [
|
||||
{"title": "Case document extraction", "sourceClass": "irs_form"}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
rendered_forms = {artifact["formCode"] for artifact in artifacts_payload["artifacts"]}
|
||||
for required_form in recomputed["requiredForms"]:
|
||||
if required_form not in rendered_forms:
|
||||
@@ -64,6 +103,18 @@ class ReviewEngine:
|
||||
}
|
||||
)
|
||||
|
||||
required_forms_union = set(recomputed["requiredForms"]) | set(stored_return.get("requiredForms", []))
|
||||
if any(form in required_forms_union for form in ("f6251", "f8960", "f8959", "f1116")):
|
||||
findings.append(
|
||||
{
|
||||
"severity": "medium",
|
||||
"title": "High-complexity tax position requires specialist follow-up",
|
||||
"explanation": "The return includes forms or computations that usually require deeper technical support and careful authority review.",
|
||||
"suggestedAction": "Review the supporting authority and computations for the high-complexity forms before treating the return as filing-ready.",
|
||||
"authorities": [{"title": "Required form analysis", "sourceClass": "irs_instructions"}],
|
||||
}
|
||||
)
|
||||
|
||||
findings.sort(key=lambda item: (_severity_rank(item["severity"]), item["title"]))
|
||||
review = {
|
||||
"status": "reviewed",
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from enum import IntEnum
|
||||
@@ -63,6 +64,37 @@ def build_irs_prior_pdf_url(slug: str, tax_year: int) -> str:
|
||||
return f"https://www.irs.gov/pub/irs-prior/{slug}--{tax_year}.pdf"
|
||||
|
||||
|
||||
def build_primary_law_authorities(question: str) -> list[dict[str, str | int]]:
|
||||
authorities: list[dict[str, str | int]] = []
|
||||
normalized = question.lower()
|
||||
|
||||
for match in re.finditer(r"(?:section|sec\.)\s+(\d+[a-z0-9-]*)", normalized):
|
||||
section = match.group(1)
|
||||
authorities.append(
|
||||
{
|
||||
"slug": f"irc-{section}",
|
||||
"title": f"Internal Revenue Code section {section}",
|
||||
"sourceClass": "internal_revenue_code",
|
||||
"url": f"https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title26-section{section}&num=0&edition=prelim",
|
||||
"authorityRank": int(AuthorityRank.INTERNAL_REVENUE_CODE),
|
||||
}
|
||||
)
|
||||
|
||||
for match in re.finditer(r"(?:treas(?:ury)?\.?\s+reg(?:ulation)?\.?\s*)([\d.]+-\d+)", normalized):
|
||||
section = match.group(1)
|
||||
authorities.append(
|
||||
{
|
||||
"slug": f"reg-{section}",
|
||||
"title": f"Treasury Regulation {section}",
|
||||
"sourceClass": "treasury_regulation",
|
||||
"url": f"https://www.ecfr.gov/current/title-26/section-{section}",
|
||||
"authorityRank": int(AuthorityRank.TREASURY_REGULATION),
|
||||
}
|
||||
)
|
||||
|
||||
return authorities
|
||||
|
||||
|
||||
def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]:
|
||||
entries = [
|
||||
("f1040", "Form 1040", "irs_form"),
|
||||
@@ -73,16 +105,44 @@ def bootstrap_irs_catalog(tax_year: int) -> list[SourceDescriptor]:
|
||||
("f1040sb", "Schedule B (Form 1040)", "irs_form"),
|
||||
("f1040sc", "Schedule C (Form 1040)", "irs_form"),
|
||||
("f1040sd", "Schedule D (Form 1040)", "irs_form"),
|
||||
("f1040se", "Schedule SE (Form 1040)", "irs_form"),
|
||||
("f1040se", "Schedule E (Form 1040)", "irs_form"),
|
||||
("f1040sse", "Schedule SE (Form 1040)", "irs_form"),
|
||||
("f1040s8", "Schedule 8812 (Form 1040)", "irs_form"),
|
||||
("f8949", "Form 8949", "irs_form"),
|
||||
("f4562", "Form 4562", "irs_form"),
|
||||
("f4797", "Form 4797", "irs_form"),
|
||||
("f6251", "Form 6251", "irs_form"),
|
||||
("f8606", "Form 8606", "irs_form"),
|
||||
("f8863", "Form 8863", "irs_form"),
|
||||
("f8889", "Form 8889", "irs_form"),
|
||||
("f8959", "Form 8959", "irs_form"),
|
||||
("f8960", "Form 8960", "irs_form"),
|
||||
("f8995", "Form 8995", "irs_form"),
|
||||
("f8995a", "Form 8995-A", "irs_form"),
|
||||
("f5329", "Form 5329", "irs_form"),
|
||||
("f5695", "Form 5695", "irs_form"),
|
||||
("f1116", "Form 1116", "irs_form"),
|
||||
("i1040gi", "Instructions for Form 1040 and Schedules 1-3", "irs_instructions"),
|
||||
("i1040sca", "Instructions for Schedule A", "irs_instructions"),
|
||||
("i1040sc", "Instructions for Schedule C", "irs_instructions"),
|
||||
("i1040sd", "Instructions for Schedule D", "irs_instructions"),
|
||||
("i1040se", "Instructions for Schedule SE", "irs_instructions"),
|
||||
("i1040se", "Instructions for Schedule E (Form 1040)", "irs_instructions"),
|
||||
("i1040sse", "Instructions for Schedule SE", "irs_instructions"),
|
||||
("i1040s8", "Instructions for Schedule 8812 (Form 1040)", "irs_instructions"),
|
||||
("i8949", "Instructions for Form 8949", "irs_instructions"),
|
||||
("i4562", "Instructions for Form 4562", "irs_instructions"),
|
||||
("i4797", "Instructions for Form 4797", "irs_instructions"),
|
||||
("i6251", "Instructions for Form 6251", "irs_instructions"),
|
||||
("i8606", "Instructions for Form 8606", "irs_instructions"),
|
||||
("i8863", "Instructions for Form 8863", "irs_instructions"),
|
||||
("i8889", "Instructions for Form 8889", "irs_instructions"),
|
||||
("i8959", "Instructions for Form 8959", "irs_instructions"),
|
||||
("i8960", "Instructions for Form 8960", "irs_instructions"),
|
||||
("i8995", "Instructions for Form 8995", "irs_instructions"),
|
||||
("i8995a", "Instructions for Form 8995-A", "irs_instructions"),
|
||||
("i5329", "Instructions for Form 5329", "irs_instructions"),
|
||||
("i5695", "Instructions for Form 5695", "irs_instructions"),
|
||||
("i1116", "Instructions for Form 1116", "irs_instructions"),
|
||||
]
|
||||
return [
|
||||
SourceDescriptor(
|
||||
|
||||
Reference in New Issue
Block a user