From decf3132d5ac3b71ec8912943f6a69220a14ff0b Mon Sep 17 00:00:00 2001 From: Stefano Fiorini Date: Sun, 15 Mar 2026 01:26:29 -0500 Subject: [PATCH] feat: add us-cpa pdf renderer --- docs/us-cpa.md | 16 +++++ skills/us-cpa/SKILL.md | 1 + skills/us-cpa/src/us_cpa/renderers.py | 88 +++++++++++++++++++++++++++ skills/us-cpa/tests/test_renderers.py | 53 ++++++++++++++++ 4 files changed, 158 insertions(+) create mode 100644 skills/us-cpa/src/us_cpa/renderers.py create mode 100644 skills/us-cpa/tests/test_renderers.py diff --git a/docs/us-cpa.md b/docs/us-cpa.md index d500436..df8f9e1 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -122,6 +122,22 @@ Current implemented topics: - standard deduction - Schedule C / sole proprietorship reporting trigger +## Form Rendering + +Current rendering path: + +- official IRS PDFs from the cached tax-year corpus +- overlay rendering onto those official PDFs using `reportlab` + `pypdf` +- artifact manifest written to `output/artifacts.json` + +Current rendered form support: + +- Form 1040 overlay artifact generation + +Current review rule: + +- overlay-rendered artifacts are marked `reviewRequired: true` + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index f14247a..8c39ebe 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -54,5 +54,6 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict +- rendered form artifacts are currently generated by overlaying values onto the official IRS PDFs and are flagged for human review For operator details, limitations, and the planned case structure, see `docs/us-cpa.md`. diff --git a/skills/us-cpa/src/us_cpa/renderers.py b/skills/us-cpa/src/us_cpa/renderers.py new file mode 100644 index 0000000..440d8de --- /dev/null +++ b/skills/us-cpa/src/us_cpa/renderers.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import json +from io import BytesIO +from pathlib import Path +from typing import Any + +from pypdf import PdfReader, PdfWriter +from reportlab.pdfgen import canvas + +from us_cpa.sources import TaxYearCorpus + + +FORM_TEMPLATES = { + "f1040": "f1040", + "f1040sb": "f1040sb", + "f1040sc": "f1040sc", + "f1040se": "f1040se", + "f1040s1": "f1040s1", +} + + +OVERLAY_FIELDS = { + "f1040": [ + (72, 725, lambda data: f"Taxpayer: {data['taxpayer']['fullName']}"), + (72, 705, lambda data: f"Filing status: {data['filingStatus']}"), + (72, 685, lambda data: f"Wages: {data['income']['wages']:.2f}"), + (72, 665, lambda data: f"Taxable interest: {data['income']['taxableInterest']:.2f}"), + (72, 645, lambda data: f"AGI: {data['totals']['adjustedGrossIncome']:.2f}"), + (72, 625, lambda data: f"Standard deduction: {data['deductions']['standardDeduction']:.2f}"), + (72, 605, lambda data: f"Taxable income: {data['totals']['taxableIncome']:.2f}"), + (72, 585, lambda data: f"Total tax: {data['taxes']['totalTax']:.2f}"), + (72, 565, lambda data: f"Withholding: {data['payments']['federalWithholding']:.2f}"), + (72, 545, lambda data: f"Refund: {data['totals']['refund']:.2f}"), + (72, 525, lambda data: f"Balance due: {data['totals']['balanceDue']:.2f}"), + ], +} + + +def _overlay_page(template_path: Path, output_path: Path, form_code: str, normalized: dict[str, Any]) -> None: + reader = PdfReader(str(template_path)) + writer = PdfWriter(clone_from=str(template_path)) + + page = writer.pages[0] + width = float(page.mediabox.width) + height = float(page.mediabox.height) + buffer = BytesIO() + pdf = canvas.Canvas(buffer, pagesize=(width, height)) + for x, y, getter in OVERLAY_FIELDS.get(form_code, []): + pdf.drawString(x, y, getter(normalized)) + pdf.save() + buffer.seek(0) + overlay = PdfReader(buffer) + page.merge_page(overlay.pages[0]) + with output_path.open("wb") as handle: + writer.write(handle) + + +def render_case_forms(case_dir: Path, corpus: TaxYearCorpus, normalized: dict[str, Any]) -> dict[str, Any]: + output_dir = case_dir / "output" / "forms" + output_dir.mkdir(parents=True, exist_ok=True) + irs_dir = corpus.paths_for_year(normalized["taxYear"]).irs_dir + + artifacts = [] + for form_code in normalized["requiredForms"]: + template_slug = FORM_TEMPLATES.get(form_code) + if template_slug is None: + continue + template_path = irs_dir / f"{template_slug}.pdf" + output_path = output_dir / f"{form_code}.pdf" + _overlay_page(template_path, output_path, form_code, normalized) + artifacts.append( + { + "formCode": form_code, + "templatePath": str(template_path), + "outputPath": str(output_path), + "renderMethod": "overlay", + "reviewRequired": True, + } + ) + + artifact_manifest = { + "taxYear": normalized["taxYear"], + "artifactCount": len(artifacts), + "artifacts": artifacts, + } + (case_dir / "output" / "artifacts.json").write_text(json.dumps(artifact_manifest, indent=2)) + return artifact_manifest diff --git a/skills/us-cpa/tests/test_renderers.py b/skills/us-cpa/tests/test_renderers.py new file mode 100644 index 0000000..33f5358 --- /dev/null +++ b/skills/us-cpa/tests/test_renderers.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from io import BytesIO +from pathlib import Path + +from reportlab.pdfgen import canvas + +from us_cpa.renderers import render_case_forms +from us_cpa.sources import TaxYearCorpus + + +class RendererTests(unittest.TestCase): + def test_render_case_forms_writes_overlay_artifacts_and_flags_review(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + case_dir = Path(temp_dir) / "case" + (case_dir / "output").mkdir(parents=True) + corpus = TaxYearCorpus(cache_root=Path(temp_dir) / "cache") + irs_dir = corpus.paths_for_year(2025).irs_dir + irs_dir.mkdir(parents=True, exist_ok=True) + + buffer = BytesIO() + pdf = canvas.Canvas(buffer) + pdf.drawString(72, 720, "Template") + pdf.save() + (irs_dir / "f1040.pdf").write_bytes(buffer.getvalue()) + + normalized = { + "taxYear": 2025, + "requiredForms": ["f1040"], + "taxpayer": {"fullName": "Jane Doe"}, + "filingStatus": "single", + "income": {"wages": 50000.0, "taxableInterest": 100.0, "businessIncome": 0.0}, + "deductions": {"standardDeduction": 15750.0}, + "taxes": {"totalTax": 3883.5}, + "payments": {"federalWithholding": 6000.0}, + "totals": {"adjustedGrossIncome": 50100.0, "taxableIncome": 34350.0, "refund": 2116.5, "balanceDue": 0.0}, + } + + artifacts = render_case_forms(case_dir, corpus, normalized) + + self.assertEqual(artifacts["artifactCount"], 1) + self.assertEqual(artifacts["artifacts"][0]["renderMethod"], "overlay") + self.assertTrue(artifacts["artifacts"][0]["reviewRequired"]) + self.assertTrue((case_dir / "output" / "forms" / "f1040.pdf").exists()) + manifest = json.loads((case_dir / "output" / "artifacts.json").read_text()) + self.assertEqual(manifest["artifacts"][0]["formCode"], "f1040") + + +if __name__ == "__main__": + unittest.main()