Files
stef-openclaw-skills/skills/us-cpa/tests/test_sources.py
2026-03-15 03:01:16 -05:00

110 lines
4.3 KiB
Python

from __future__ import annotations
import json
import tempfile
import unittest
from pathlib import Path
from us_cpa.sources import (
AuthorityRank,
SourceDescriptor,
TaxYearCorpus,
authority_rank_for,
bootstrap_irs_catalog,
build_irs_prior_pdf_url,
build_primary_law_authorities,
)
class SourceCatalogTests(unittest.TestCase):
def test_build_irs_prior_pdf_url_uses_expected_pattern(self) -> None:
self.assertEqual(
build_irs_prior_pdf_url("f1040", 2025),
"https://www.irs.gov/pub/irs-prior/f1040--2025.pdf",
)
self.assertEqual(
build_irs_prior_pdf_url("i1040gi", 2025),
"https://www.irs.gov/pub/irs-prior/i1040gi--2025.pdf",
)
def test_authority_ranking_orders_irs_before_primary_law(self) -> None:
self.assertEqual(authority_rank_for("irs_form"), AuthorityRank.IRS_FORM)
self.assertEqual(
authority_rank_for("treasury_regulation"),
AuthorityRank.TREASURY_REGULATION,
)
self.assertLess(
authority_rank_for("irs_form"), authority_rank_for("internal_revenue_code")
)
def test_bootstrap_catalog_builds_tax_year_specific_urls(self) -> None:
catalog = bootstrap_irs_catalog(2025)
self.assertGreaterEqual(len(catalog), 5)
self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf")
self.assertTrue(any(item.slug == "i1040gi" for item in catalog))
self.assertTrue(any(item.slug == "f1040sse" for item in catalog))
def test_primary_law_authorities_build_official_urls(self) -> None:
authorities = build_primary_law_authorities(
"Does section 469 apply and what does Treas. Reg. 1.469-1 say?"
)
self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities))
self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities))
self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities))
self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities))
class TaxYearCorpusTests(unittest.TestCase):
def test_tax_year_layout_is_deterministic(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
paths = corpus.paths_for_year(2025)
self.assertEqual(paths.year_dir, Path(temp_dir) / "tax-years" / "2025")
self.assertEqual(paths.irs_dir, paths.year_dir / "irs")
self.assertEqual(paths.manifest_path, paths.year_dir / "manifest.json")
def test_download_catalog_writes_files_and_manifest(self) -> None:
with tempfile.TemporaryDirectory() as temp_dir:
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
catalog = [
SourceDescriptor(
slug="f1040",
title="Form 1040",
source_class="irs_form",
media_type="application/pdf",
url=build_irs_prior_pdf_url("f1040", 2025),
),
SourceDescriptor(
slug="i1040gi",
title="Instructions for Form 1040",
source_class="irs_instructions",
media_type="application/pdf",
url=build_irs_prior_pdf_url("i1040gi", 2025),
),
]
def fake_fetch(url: str) -> bytes:
return f"downloaded:{url}".encode()
manifest = corpus.download_catalog(2025, catalog, fetcher=fake_fetch)
self.assertEqual(manifest["taxYear"], 2025)
self.assertEqual(manifest["sourceCount"], 2)
self.assertTrue(corpus.paths_for_year(2025).manifest_path.exists())
first = manifest["sources"][0]
self.assertEqual(first["slug"], "f1040")
self.assertEqual(first["authorityRank"], int(AuthorityRank.IRS_FORM))
self.assertTrue(Path(first["localPath"]).exists())
saved = json.loads(corpus.paths_for_year(2025).manifest_path.read_text())
self.assertEqual(saved["sourceCount"], 2)
self.assertEqual(saved["sources"][1]["slug"], "i1040gi")
if __name__ == "__main__":
unittest.main()