feat: add us-cpa pdf renderer
This commit is contained in:
@@ -122,6 +122,22 @@ Current implemented topics:
|
||||
- standard deduction
|
||||
- Schedule C / sole proprietorship reporting trigger
|
||||
|
||||
## Form Rendering
|
||||
|
||||
Current rendering path:
|
||||
|
||||
- official IRS PDFs from the cached tax-year corpus
|
||||
- overlay rendering onto those official PDFs using `reportlab` + `pypdf`
|
||||
- artifact manifest written to `output/artifacts.json`
|
||||
|
||||
Current rendered form support:
|
||||
|
||||
- Form 1040 overlay artifact generation
|
||||
|
||||
Current review rule:
|
||||
|
||||
- overlay-rendered artifacts are marked `reviewRequired: true`
|
||||
|
||||
## Scope Rules
|
||||
|
||||
- U.S. federal individual returns only in v1
|
||||
|
||||
@@ -54,5 +54,6 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
|
||||
- `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default
|
||||
- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
|
||||
- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
|
||||
- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review
|
||||
|
||||
For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`.
|
||||
|
||||
88
skills/us-cpa/src/us_cpa/renderers.py
Normal file
88
skills/us-cpa/src/us_cpa/renderers.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from us_cpa.sources import TaxYearCorpus
|
||||
|
||||
|
||||
FORM_TEMPLATES = {
|
||||
"f1040": "f1040",
|
||||
"f1040sb": "f1040sb",
|
||||
"f1040sc": "f1040sc",
|
||||
"f1040se": "f1040se",
|
||||
"f1040s1": "f1040s1",
|
||||
}
|
||||
|
||||
|
||||
OVERLAY_FIELDS = {
|
||||
"f1040": [
|
||||
(72, 725, lambda data: f"Taxpayer: {data['taxpayer']['fullName']}"),
|
||||
(72, 705, lambda data: f"Filing status: {data['filingStatus']}"),
|
||||
(72, 685, lambda data: f"Wages: {data['income']['wages']:.2f}"),
|
||||
(72, 665, lambda data: f"Taxable interest: {data['income']['taxableInterest']:.2f}"),
|
||||
(72, 645, lambda data: f"AGI: {data['totals']['adjustedGrossIncome']:.2f}"),
|
||||
(72, 625, lambda data: f"Standard deduction: {data['deductions']['standardDeduction']:.2f}"),
|
||||
(72, 605, lambda data: f"Taxable income: {data['totals']['taxableIncome']:.2f}"),
|
||||
(72, 585, lambda data: f"Total tax: {data['taxes']['totalTax']:.2f}"),
|
||||
(72, 565, lambda data: f"Withholding: {data['payments']['federalWithholding']:.2f}"),
|
||||
(72, 545, lambda data: f"Refund: {data['totals']['refund']:.2f}"),
|
||||
(72, 525, lambda data: f"Balance due: {data['totals']['balanceDue']:.2f}"),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None:
|
||||
reader = PdfReader(str(template_path))
|
||||
writer = PdfWriter(clone_from=str(template_path))
|
||||
|
||||
page = writer.pages[0]
|
||||
width = float(page.mediabox.width)
|
||||
height = float(page.mediabox.height)
|
||||
buffer = BytesIO()
|
||||
pdf = canvas.Canvas(buffer, pagesize=(width, height))
|
||||
for x, y, getter in OVERLAY_FIELDS.get(form_code, []):
|
||||
pdf.drawString(x, y, getter(normalized))
|
||||
pdf.save()
|
||||
buffer.seek(0)
|
||||
overlay = PdfReader(buffer)
|
||||
page.merge_page(overlay.pages[0])
|
||||
with output_path.open("wb") as handle:
|
||||
writer.write(handle)
|
||||
|
||||
|
||||
def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[str, Any]) -> dict[str, Any]:
|
||||
output_dir = case_dir / "output" / "forms"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
irs_dir = corpus.paths_for_year(normalized["taxYear"]).irs_dir
|
||||
|
||||
artifacts = []
|
||||
for form_code in normalized["requiredForms"]:
|
||||
template_slug = FORM_TEMPLATES.get(form_code)
|
||||
if template_slug is None:
|
||||
continue
|
||||
template_path = irs_dir / f"{template_slug}.pdf"
|
||||
output_path = output_dir / f"{form_code}.pdf"
|
||||
_overlay_page(template_path, output_path, form_code, normalized)
|
||||
artifacts.append(
|
||||
{
|
||||
"formCode": form_code,
|
||||
"templatePath": str(template_path),
|
||||
"outputPath": str(output_path),
|
||||
"renderMethod": "overlay",
|
||||
"reviewRequired": True,
|
||||
}
|
||||
)
|
||||
|
||||
artifact_manifest = {
|
||||
"taxYear": normalized["taxYear"],
|
||||
"artifactCount": len(artifacts),
|
||||
"artifacts": artifacts,
|
||||
}
|
||||
(case_dir / "output" / "artifacts.json").write_text(json.dumps(artifact_manifest, indent=2))
|
||||
return artifact_manifest
|
||||
53
skills/us-cpa/tests/test_renderers.py
Normal file
53
skills/us-cpa/tests/test_renderers.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from us_cpa.renderers import render_case_forms
|
||||
from us_cpa.sources import TaxYearCorpus
|
||||
|
||||
|
||||
class RendererTests(unittest.TestCase):
|
||||
def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
case_dir = Path(temp_dir) / "case"
|
||||
(case_dir / "output").mkdir(parents=True)
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache")
|
||||
irs_dir = corpus.paths_for_year(2025).irs_dir
|
||||
irs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
buffer = BytesIO()
|
||||
pdf = canvas.Canvas(buffer)
|
||||
pdf.drawString(72, 720, "Template")
|
||||
pdf.save()
|
||||
(irs_dir / "f1040.pdf").write_bytes(buffer.getvalue())
|
||||
|
||||
normalized = {
|
||||
"taxYear": 2025,
|
||||
"requiredForms": ["f1040"],
|
||||
"taxpayer": {"fullName": "Jane Doe"},
|
||||
"filingStatus": "single",
|
||||
"income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0},
|
||||
"deductions": {"standardDeduction": 15750.0},
|
||||
"taxes": {"totalTax": 3883.5},
|
||||
"payments": {"federalWithholding": 6000.0},
|
||||
"totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0},
|
||||
}
|
||||
|
||||
artifacts = render_case_forms(case_dir, corpus, normalized)
|
||||
|
||||
self.assertEqual(artifacts["artifactCount"], 1)
|
||||
self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "overlay")
|
||||
self.assertTrue(artifacts["artifacts"][0]["reviewRequired"])
|
||||
self.assertTrue((case_dir / "output" / "forms" / "f1040.pdf").exists())
|
||||
manifest = json.loads((case_dir / "output" / "artifacts.json").read_text())
|
||||
self.assertEqual(manifest["artifacts"][0]["formCode"], "f1040")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user