110 lines
4.3 KiB
Python
110 lines
4.3 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
from us_cpa.sources import (
|
|
AuthorityRank,
|
|
SourceDescriptor,
|
|
TaxYearCorpus,
|
|
authority_rank_for,
|
|
bootstrap_irs_catalog,
|
|
build_irs_prior_pdf_url,
|
|
build_primary_law_authorities,
|
|
)
|
|
|
|
|
|
class SourceCatalogTests(unittest.TestCase):
|
|
def test_build_irs_prior_pdf_url_uses_expected_pattern(self) -> None:
|
|
self.assertEqual(
|
|
build_irs_prior_pdf_url("f1040", 2025),
|
|
"https://www.irs.gov/pub/irs-prior/f1040--2025.pdf",
|
|
)
|
|
self.assertEqual(
|
|
build_irs_prior_pdf_url("i1040gi", 2025),
|
|
"https://www.irs.gov/pub/irs-prior/i1040gi--2025.pdf",
|
|
)
|
|
|
|
def test_authority_ranking_orders_irs_before_primary_law(self) -> None:
|
|
self.assertEqual(authority_rank_for("irs_form"), AuthorityRank.IRS_FORM)
|
|
self.assertEqual(
|
|
authority_rank_for("treasury_regulation"),
|
|
AuthorityRank.TREASURY_REGULATION,
|
|
)
|
|
self.assertLess(
|
|
authority_rank_for("irs_form"), authority_rank_for("internal_revenue_code")
|
|
)
|
|
|
|
def test_bootstrap_catalog_builds_tax_year_specific_urls(self) -> None:
|
|
catalog = bootstrap_irs_catalog(2025)
|
|
|
|
self.assertGreaterEqual(len(catalog), 5)
|
|
self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf")
|
|
self.assertTrue(any(item.slug == "i1040gi" for item in catalog))
|
|
self.assertTrue(any(item.slug == "f1040sse" for item in catalog))
|
|
|
|
def test_primary_law_authorities_build_official_urls(self) -> None:
|
|
authorities = build_primary_law_authorities(
|
|
"Does section 469 apply and what does Treas. Reg. 1.469-1 say?"
|
|
)
|
|
|
|
self.assertTrue(any(item["sourceClass"] == "internal_revenue_code" for item in authorities))
|
|
self.assertTrue(any(item["sourceClass"] == "treasury_regulation" for item in authorities))
|
|
self.assertTrue(any("uscode.house.gov" in item["url"] for item in authorities))
|
|
self.assertTrue(any("ecfr.gov" in item["url"] for item in authorities))
|
|
|
|
|
|
class TaxYearCorpusTests(unittest.TestCase):
|
|
def test_tax_year_layout_is_deterministic(self) -> None:
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
|
|
paths = corpus.paths_for_year(2025)
|
|
|
|
self.assertEqual(paths.year_dir, Path(temp_dir) / "tax-years" / "2025")
|
|
self.assertEqual(paths.irs_dir, paths.year_dir / "irs")
|
|
self.assertEqual(paths.manifest_path, paths.year_dir / "manifest.json")
|
|
|
|
def test_download_catalog_writes_files_and_manifest(self) -> None:
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
|
|
catalog = [
|
|
SourceDescriptor(
|
|
slug="f1040",
|
|
title="Form 1040",
|
|
source_class="irs_form",
|
|
media_type="application/pdf",
|
|
url=build_irs_prior_pdf_url("f1040", 2025),
|
|
),
|
|
SourceDescriptor(
|
|
slug="i1040gi",
|
|
title="Instructions for Form 1040",
|
|
source_class="irs_instructions",
|
|
media_type="application/pdf",
|
|
url=build_irs_prior_pdf_url("i1040gi", 2025),
|
|
),
|
|
]
|
|
|
|
def fake_fetch(url: str) -> bytes:
|
|
return f"downloaded:{url}".encode()
|
|
|
|
manifest = corpus.download_catalog(2025, catalog, fetcher=fake_fetch)
|
|
|
|
self.assertEqual(manifest["taxYear"], 2025)
|
|
self.assertEqual(manifest["sourceCount"], 2)
|
|
self.assertTrue(corpus.paths_for_year(2025).manifest_path.exists())
|
|
|
|
first = manifest["sources"][0]
|
|
self.assertEqual(first["slug"], "f1040")
|
|
self.assertEqual(first["authorityRank"], int(AuthorityRank.IRS_FORM))
|
|
self.assertTrue(Path(first["localPath"]).exists())
|
|
|
|
saved = json.loads(corpus.paths_for_year(2025).manifest_path.read_text())
|
|
self.assertEqual(saved["sourceCount"], 2)
|
|
self.assertEqual(saved["sources"][1]["slug"], "i1040gi")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|