feat: add us-cpa tax-year source corpus
This commit is contained in:
97
skills/us-cpa/tests/test_sources.py
Normal file
97
skills/us-cpa/tests/test_sources.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from us_cpa.sources import (
|
||||
AuthorityRank,
|
||||
SourceDescriptor,
|
||||
TaxYearCorpus,
|
||||
authority_rank_for,
|
||||
bootstrap_irs_catalog,
|
||||
build_irs_prior_pdf_url,
|
||||
)
|
||||
|
||||
|
||||
class SourceCatalogTests(unittest.TestCase):
|
||||
def test_build_irs_prior_pdf_url_uses_expected_pattern(self) -> None:
|
||||
self.assertEqual(
|
||||
build_irs_prior_pdf_url("f1040", 2025),
|
||||
"https://www.irs.gov/pub/irs-prior/f1040--2025.pdf",
|
||||
)
|
||||
self.assertEqual(
|
||||
build_irs_prior_pdf_url("i1040gi", 2025),
|
||||
"https://www.irs.gov/pub/irs-prior/i1040gi--2025.pdf",
|
||||
)
|
||||
|
||||
def test_authority_ranking_orders_irs_before_primary_law(self) -> None:
|
||||
self.assertEqual(authority_rank_for("irs_form"), AuthorityRank.IRS_FORM)
|
||||
self.assertEqual(
|
||||
authority_rank_for("treasury_regulation"),
|
||||
AuthorityRank.TREASURY_REGULATION,
|
||||
)
|
||||
self.assertLess(
|
||||
authority_rank_for("irs_form"), authority_rank_for("internal_revenue_code")
|
||||
)
|
||||
|
||||
def test_bootstrap_catalog_builds_tax_year_specific_urls(self) -> None:
|
||||
catalog = bootstrap_irs_catalog(2025)
|
||||
|
||||
self.assertGreaterEqual(len(catalog), 5)
|
||||
self.assertEqual(catalog[0].url, "https://www.irs.gov/pub/irs-prior/f1040--2025.pdf")
|
||||
self.assertTrue(any(item.slug == "i1040gi" for item in catalog))
|
||||
|
||||
|
||||
class TaxYearCorpusTests(unittest.TestCase):
|
||||
def test_tax_year_layout_is_deterministic(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
|
||||
paths = corpus.paths_for_year(2025)
|
||||
|
||||
self.assertEqual(paths.year_dir, Path(temp_dir) / "tax-years" / "2025")
|
||||
self.assertEqual(paths.irs_dir, paths.year_dir / "irs")
|
||||
self.assertEqual(paths.manifest_path, paths.year_dir / "manifest.json")
|
||||
|
||||
def test_download_catalog_writes_files_and_manifest(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
|
||||
catalog = [
|
||||
SourceDescriptor(
|
||||
slug="f1040",
|
||||
title="Form 1040",
|
||||
source_class="irs_form",
|
||||
media_type="application/pdf",
|
||||
url=build_irs_prior_pdf_url("f1040", 2025),
|
||||
),
|
||||
SourceDescriptor(
|
||||
slug="i1040gi",
|
||||
title="Instructions for Form 1040",
|
||||
source_class="irs_instructions",
|
||||
media_type="application/pdf",
|
||||
url=build_irs_prior_pdf_url("i1040gi", 2025),
|
||||
),
|
||||
]
|
||||
|
||||
def fake_fetch(url: str) -> bytes:
|
||||
return f"downloaded:{url}".encode()
|
||||
|
||||
manifest = corpus.download_catalog(2025, catalog, fetcher=fake_fetch)
|
||||
|
||||
self.assertEqual(manifest["taxYear"], 2025)
|
||||
self.assertEqual(manifest["sourceCount"], 2)
|
||||
self.assertTrue(corpus.paths_for_year(2025).manifest_path.exists())
|
||||
|
||||
first = manifest["sources"][0]
|
||||
self.assertEqual(first["slug"], "f1040")
|
||||
self.assertEqual(first["authorityRank"], int(AuthorityRank.IRS_FORM))
|
||||
self.assertTrue(Path(first["localPath"]).exists())
|
||||
|
||||
saved = json.loads(corpus.paths_for_year(2025).manifest_path.read_text())
|
||||
self.assertEqual(saved["sourceCount"], 2)
|
||||
self.assertEqual(saved["sources"][1]["slug"], "i1040gi")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user