diff --git a/docs/us-cpa.md b/docs/us-cpa.md index e50b598..d500436 100644 --- a/docs/us-cpa.md +++ b/docs/us-cpa.md @@ -17,6 +17,7 @@ Tax logic, case workflows, rendering, and review logic are still pending. ```bash skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025 @@ -98,9 +99,29 @@ Behavior: - JSON by default - markdown available with `--format markdown` +- `question` supports: + - `--style conversation` + - `--style memo` - `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"` - `fetch-year` emits a downloaded manifest location and source count +## Question Engine + +Current `question` implementation: + +- loads the cached tax-year corpus +- searches a small IRS-first topical rule set +- returns one canonical analysis object +- renders that analysis as: + - conversational output + - memo output +- marks questions outside the current topical rule set as requiring primary-law escalation + +Current implemented topics: + +- standard deduction +- Schedule C / sole proprietorship reporting trigger + ## Scope Rules - U.S. federal individual returns only in v1 diff --git a/skills/us-cpa/SKILL.md b/skills/us-cpa/SKILL.md index 6b9e841..f14247a 100644 --- a/skills/us-cpa/SKILL.md +++ b/skills/us-cpa/SKILL.md @@ -32,6 +32,7 @@ description: Use when answering U.S. federal individual tax questions, preparing ```bash skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 +skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json @@ -49,6 +50,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases - JSON by default - markdown output available with `--format markdown` +- `question` supports `--style conversation|memo` - `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default - override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation - `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict diff --git a/skills/us-cpa/src/us_cpa/cli.py b/skills/us-cpa/src/us_cpa/cli.py index 6d5b46f..1249c51 100644 --- a/skills/us-cpa/src/us_cpa/cli.py +++ b/skills/us-cpa/src/us_cpa/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any from us_cpa.cases import CaseConflictError, CaseManager +from us_cpa.questions import QuestionEngine, render_analysis, render_memo from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog COMMANDS = ( @@ -64,6 +65,7 @@ def build_parser() -> argparse.ArgumentParser: question = subparsers.add_parser("question", help="Answer a tax question.") _add_common_arguments(question) question.add_argument("--question", required=True) + question.add_argument("--style", choices=("conversation", "memo"), default="conversation") prepare = subparsers.add_parser("prepare", help="Prepare a return case.") _add_common_arguments(prepare) @@ -104,14 +106,37 @@ def main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) if args.command == "question": + corpus = TaxYearCorpus() + engine = QuestionEngine(corpus=corpus) + case_facts: dict[str, Any] = {} + if args.case_dir: + manager = CaseManager(Path(args.case_dir)) + if manager.facts_path.exists(): + case_facts = { + key: value["value"] + for key, value in json.loads(manager.facts_path.read_text())["facts"].items() + } + analysis = engine.answer( + question=args.question, + tax_year=args.tax_year, + case_facts=case_facts, + ) payload = { "command": "question", "format": args.format, + "style": args.style, "taxYear": args.tax_year, "caseDir": args.case_dir, "question": args.question, - "status": "not_implemented", + "status": "answered", + "analysis": analysis, } + payload["rendered"] = ( + render_memo(analysis) if args.style == "memo" else render_analysis(analysis) + ) + if args.format == "markdown": + print(payload["rendered"]) + return 0 return _emit(payload, args.format) if args.command == "extract-docs": diff --git a/skills/us-cpa/src/us_cpa/questions.py b/skills/us-cpa/src/us_cpa/questions.py new file mode 100644 index 0000000..be4b103 --- /dev/null +++ b/skills/us-cpa/src/us_cpa/questions.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from us_cpa.sources import TaxYearCorpus + + +TOPIC_RULES = [ + { + "issue": "standard_deduction", + "keywords": ("standard deduction",), + "authority_slugs": ("i1040gi",), + "answer_by_status": { + "single": "$15,000", + "married_filing_jointly": "$30,000", + "head_of_household": "$22,500", + }, + "summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.", + "confidence": "high", + }, + { + "issue": "schedule_c_required", + "keywords": ("schedule c", "sole proprietor", "self-employment"), + "authority_slugs": ("f1040sc", "i1040sc"), + "answer": "Schedule C is generally required when a taxpayer reports sole proprietorship business income or expenses.", + "summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.", + "confidence": "medium", + }, +] + + +def _normalize_question(question: str) -> str: + return question.strip().lower() + + +def _filing_status_label(status: str) -> str: + return status.replace("_", " ").title() + + +@dataclass +class QuestionEngine: + corpus: TaxYearCorpus + + def _manifest(self, tax_year: int) -> dict[str, Any]: + path = self.corpus.paths_for_year(tax_year).manifest_path + if not path.exists(): + raise FileNotFoundError( + f"Tax year {tax_year} corpus not found at {path}. Run fetch-year first." + ) + return json.loads(path.read_text()) + + def _authorities_for(self, manifest: dict[str, Any], slugs: tuple[str, ...]) -> list[dict[str, Any]]: + found = [] + sources = {item["slug"]: item for item in manifest["sources"]} + for slug in slugs: + if slug in sources: + source = sources[slug] + found.append( + { + "slug": source["slug"], + "title": source["title"], + "sourceClass": source["sourceClass"], + "url": source["url"], + "localPath": source["localPath"], + "authorityRank": source["authorityRank"], + } + ) + return found + + def answer(self, *, question: str, tax_year: int, case_facts: dict[str, Any]) -> dict[str, Any]: + manifest = self._manifest(tax_year) + normalized = _normalize_question(question) + facts_used = [{"field": key, "value": value} for key, value in sorted(case_facts.items())] + + for rule in TOPIC_RULES: + if all(keyword in normalized for keyword in rule["keywords"]): + authorities = self._authorities_for(manifest, rule["authority_slugs"]) + if rule["issue"] == "standard_deduction": + filing_status = case_facts.get("filingStatus", "single") + answer = rule["answer_by_status"].get(filing_status, rule["answer_by_status"]["single"]) + summary = rule["summary_template"].format( + filing_status_label=_filing_status_label(filing_status), + answer=answer, + tax_year=tax_year, + ) + else: + answer = rule["answer"] + summary = rule["summary"] + + return { + "issue": rule["issue"], + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [], + "authorities": authorities, + "conclusion": {"answer": answer, "summary": summary}, + "confidence": rule["confidence"], + "followUpQuestions": [], + "primaryLawRequired": False, + } + + return { + "issue": "requires_primary_law_escalation", + "taxYear": tax_year, + "factsUsed": facts_used, + "missingFacts": [ + "Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently." + ], + "authorities": [], + "conclusion": { + "answer": "Insufficient IRS-form and instruction support for a confident answer.", + "summary": "This question needs primary-law analysis before a reliable answer can be given.", + }, + "confidence": "low", + "followUpQuestions": [ + "What facts drive the section-level issue?", + "Is there an existing return position or drafted treatment to review?", + ], + "primaryLawRequired": True, + } + + +def render_analysis(analysis: dict[str, Any]) -> str: + lines = [analysis["conclusion"]["summary"]] + if analysis["factsUsed"]: + facts = ", ".join(f"{item['field']}={item['value']}" for item in analysis["factsUsed"]) + lines.append(f"Facts used: {facts}.") + if analysis["authorities"]: + titles = "; ".join(item["title"] for item in analysis["authorities"]) + lines.append(f"Authorities: {titles}.") + if analysis["missingFacts"]: + lines.append(f"Open items: {' '.join(analysis['missingFacts'])}") + return " ".join(lines) + + +def render_memo(analysis: dict[str, Any]) -> str: + lines = [ + "# Tax Memo", + "", + f"## Issue\n{analysis['issue']}", + "", + "## Facts", + ] + if analysis["factsUsed"]: + for item in analysis["factsUsed"]: + lines.append(f"- {item['field']}: {item['value']}") + else: + lines.append("- No case-specific facts supplied.") + lines.extend(["", "## Authorities"]) + if analysis["authorities"]: + for authority in analysis["authorities"]: + lines.append(f"- {authority['title']}") + else: + lines.append("- Primary-law escalation required.") + lines.extend( + [ + "", + "## Analysis", + analysis["conclusion"]["summary"], + "", + "## Conclusion", + analysis["conclusion"]["answer"], + ] + ) + if analysis["missingFacts"]: + lines.extend(["", "## Open Items"]) + for item in analysis["missingFacts"]: + lines.append(f"- {item}") + return "\n".join(lines) diff --git a/skills/us-cpa/tests/test_cli.py b/skills/us-cpa/tests/test_cli.py index 942f027..3d21a17 100644 --- a/skills/us-cpa/tests/test_cli.py +++ b/skills/us-cpa/tests/test_cli.py @@ -53,13 +53,40 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertIn(command, result.stdout) def test_question_command_emits_json_by_default(self) -> None: - result = self.run_cli("question", "--question", "What is the standard deduction?") + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) self.assertEqual(result.returncode, 0, result.stderr) payload = json.loads(result.stdout) self.assertEqual(payload["command"], "question") self.assertEqual(payload["format"], "json") self.assertEqual(payload["question"], "What is the standard deduction?") + self.assertEqual(payload["status"], "answered") + self.assertIn("analysis", payload) def test_prepare_requires_case_dir(self) -> None: result = self.run_cli("prepare", "--tax-year", "2025") @@ -128,6 +155,42 @@ class UsCpaCliSmokeTests(unittest.TestCase): self.assertEqual(payload["status"], "needs_resolution") self.assertEqual(payload["issueType"], "fact_conflict") + def test_question_markdown_memo_mode_renders_tax_memo(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + env = os.environ.copy() + env["PYTHONPATH"] = str(SRC_DIR) + env["US_CPA_CACHE_DIR"] = temp_dir + subprocess.run( + [sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"], + text=True, + capture_output=True, + env=env, + check=True, + ) + result = subprocess.run( + [ + sys.executable, + "-m", + "us_cpa.cli", + "question", + "--tax-year", + "2025", + "--format", + "markdown", + "--style", + "memo", + "--question", + "What is the standard deduction?", + ], + text=True, + capture_output=True, + env=env, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertIn("# Tax Memo", result.stdout) + self.assertIn("## Conclusion", result.stdout) + if __name__ == "__main__": unittest.main() diff --git a/skills/us-cpa/tests/test_questions.py b/skills/us-cpa/tests/test_questions.py new file mode 100644 index 0000000..e392af2 --- /dev/null +++ b/skills/us-cpa/tests/test_questions.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path + +from us_cpa.questions import QuestionEngine, render_analysis, render_memo +from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog + + +class QuestionEngineTests(unittest.TestCase): + def build_engine(self, temp_dir: str) -> QuestionEngine: + corpus = TaxYearCorpus(cache_root=Path(temp_dir)) + + def fake_fetch(url: str) -> bytes: + return f"source for {url}".encode() + + corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch) + return QuestionEngine(corpus=corpus) + + def test_standard_deduction_question_returns_structured_analysis(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="What is the standard deduction for single filers?", + tax_year=2025, + case_facts={"filingStatus": "single"}, + ) + + self.assertEqual(analysis["issue"], "standard_deduction") + self.assertEqual(analysis["taxYear"], 2025) + self.assertEqual(analysis["conclusion"]["answer"], "$15,000") + self.assertEqual(analysis["confidence"], "high") + self.assertTrue(analysis["authorities"]) + self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions") + + def test_complex_question_flags_primary_law_escalation(self) -> None: + with tempfile.TemporaryDirectory() as temp_dir: + engine = self.build_engine(temp_dir) + + analysis = engine.answer( + question="Does section 469 passive activity loss limitation apply here?", + tax_year=2025, + case_facts={}, + ) + + self.assertEqual(analysis["confidence"], "low") + self.assertTrue(analysis["primaryLawRequired"]) + self.assertIn("Internal Revenue Code", analysis["missingFacts"][0]) + + def test_renderers_produce_conversation_and_memo(self) -> None: + analysis = { + "issue": "standard_deduction", + "taxYear": 2025, + "factsUsed": [{"field": "filingStatus", "value": "single"}], + "missingFacts": [], + "authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}], + "conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."}, + "confidence": "high", + "followUpQuestions": [], + "primaryLawRequired": False, + } + + conversation = render_analysis(analysis) + memo = render_memo(analysis) + + self.assertIn("$15,000", conversation) + self.assertIn("Issue", memo) + self.assertIn("Authorities", memo) + + +if __name__ == "__main__": + unittest.main()