feat: make us-cpa questions retrieval-first

This commit is contained in:
Stefano Fiorini
2026-03-15 04:40:57 -05:00
parent b4f9666560
commit b2bb07fa90
6 changed files with 272 additions and 10 deletions

View File

@@ -57,6 +57,19 @@ def _load_json_file(path_value: str | None) -> dict[str, Any]:
return json.loads(Path(path_value).expanduser().resolve().read_text())
def _ensure_question_corpus(corpus: TaxYearCorpus, tax_year: int) -> None:
paths = corpus.paths_for_year(tax_year)
required_slugs = {item.slug for item in bootstrap_irs_catalog(tax_year)}
if not paths.manifest_path.exists():
corpus.download_catalog(tax_year, bootstrap_irs_catalog(tax_year))
return
manifest = json.loads(paths.manifest_path.read_text())
existing_slugs = {item["slug"] for item in manifest.get("sources", [])}
if not required_slugs.issubset(existing_slugs):
corpus.download_catalog(tax_year, bootstrap_irs_catalog(tax_year))
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="us-cpa",
@@ -110,6 +123,7 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "question":
corpus = TaxYearCorpus()
_ensure_question_corpus(corpus, args.tax_year)
engine = QuestionEngine(corpus=corpus)
case_facts: dict[str, Any] = {}
if args.case_dir: