feat: add us-cpa question engine
This commit is contained in:
@@ -17,6 +17,7 @@ Tax logic, case workflows, rendering, and review logic are still pending.
|
||||
|
||||
```bash
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown
|
||||
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa fetch-year --tax-year 2025
|
||||
@@ -98,9 +99,29 @@ Behavior:
|
||||
|
||||
- JSON by default
|
||||
- markdown available with `--format markdown`
|
||||
- `question` supports:
|
||||
- `--style conversation`
|
||||
- `--style memo`
|
||||
- `question`, `prepare`, `review`, `extract-docs`, `render-forms`, and `export-efile-ready` still emit scaffold payloads with `status: "not_implemented"`
|
||||
- `fetch-year` emits a downloaded manifest location and source count
|
||||
|
||||
## Question Engine
|
||||
|
||||
Current `question` implementation:
|
||||
|
||||
- loads the cached tax-year corpus
|
||||
- searches a small IRS-first topical rule set
|
||||
- returns one canonical analysis object
|
||||
- renders that analysis as:
|
||||
- conversational output
|
||||
- memo output
|
||||
- marks questions outside the current topical rule set as requiring primary-law escalation
|
||||
|
||||
Current implemented topics:
|
||||
|
||||
- standard deduction
|
||||
- Schedule C / sole proprietorship reporting trigger
|
||||
|
||||
## Scope Rules
|
||||
|
||||
- U.S. federal individual returns only in v1
|
||||
|
||||
@@ -32,6 +32,7 @@ description: Use when answering U.S. federal individual tax questions, preparing
|
||||
|
||||
```bash
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025
|
||||
skills/us-cpa/scripts/us-cpa question --question "What is the standard deduction?" --tax-year 2025 --style memo --format markdown
|
||||
skills/us-cpa/scripts/us-cpa prepare --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa review --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe
|
||||
skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases/2025-jane-doe --create-case --case-label "Jane Doe" --facts-json ./facts.json
|
||||
@@ -49,6 +50,7 @@ skills/us-cpa/scripts/us-cpa extract-docs --tax-year 2025 --case-dir ~/tax-cases
|
||||
|
||||
- JSON by default
|
||||
- markdown output available with `--format markdown`
|
||||
- `question` supports `--style conversation|memo`
|
||||
- `fetch-year` downloads the bootstrap IRS form/instruction corpus into `~/.cache/us-cpa` by default
|
||||
- override the cache root with `US_CPA_CACHE_DIR` when you need an isolated run or fixture generation
|
||||
- `extract-docs` creates or opens a case, registers documents, stores facts, and stops with a structured issue if facts conflict
|
||||
|
||||
@@ -7,6 +7,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.cases import CaseConflictError, CaseManager
|
||||
from us_cpa.questions import QuestionEngine, render_analysis, render_memo
|
||||
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
|
||||
|
||||
COMMANDS = (
|
||||
@@ -64,6 +65,7 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
question = subparsers.add_parser("question", help="Answer a tax question.")
|
||||
_add_common_arguments(question)
|
||||
question.add_argument("--question", required=True)
|
||||
question.add_argument("--style", choices=("conversation", "memo"), default="conversation")
|
||||
|
||||
prepare = subparsers.add_parser("prepare", help="Prepare a return case.")
|
||||
_add_common_arguments(prepare)
|
||||
@@ -104,14 +106,37 @@ def main(argv: list[str] | None = None) -> int:
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.command == "question":
|
||||
corpus = TaxYearCorpus()
|
||||
engine = QuestionEngine(corpus=corpus)
|
||||
case_facts: dict[str, Any] = {}
|
||||
if args.case_dir:
|
||||
manager = CaseManager(Path(args.case_dir))
|
||||
if manager.facts_path.exists():
|
||||
case_facts = {
|
||||
key: value["value"]
|
||||
for key, value in json.loads(manager.facts_path.read_text())["facts"].items()
|
||||
}
|
||||
analysis = engine.answer(
|
||||
question=args.question,
|
||||
tax_year=args.tax_year,
|
||||
case_facts=case_facts,
|
||||
)
|
||||
payload = {
|
||||
"command": "question",
|
||||
"format": args.format,
|
||||
"style": args.style,
|
||||
"taxYear": args.tax_year,
|
||||
"caseDir": args.case_dir,
|
||||
"question": args.question,
|
||||
"status": "not_implemented",
|
||||
"status": "answered",
|
||||
"analysis": analysis,
|
||||
}
|
||||
payload["rendered"] = (
|
||||
render_memo(analysis) if args.style == "memo" else render_analysis(analysis)
|
||||
)
|
||||
if args.format == "markdown":
|
||||
print(payload["rendered"])
|
||||
return 0
|
||||
return _emit(payload, args.format)
|
||||
|
||||
if args.command == "extract-docs":
|
||||
|
||||
172
skills/us-cpa/src/us_cpa/questions.py
Normal file
172
skills/us-cpa/src/us_cpa/questions.py
Normal file
@@ -0,0 +1,172 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from us_cpa.sources import TaxYearCorpus
|
||||
|
||||
|
||||
TOPIC_RULES = [
|
||||
{
|
||||
"issue": "standard_deduction",
|
||||
"keywords": ("standard deduction",),
|
||||
"authority_slugs": ("i1040gi",),
|
||||
"answer_by_status": {
|
||||
"single": "$15,000",
|
||||
"married_filing_jointly": "$30,000",
|
||||
"head_of_household": "$22,500",
|
||||
},
|
||||
"summary_template": "{filing_status_label} filers use a {answer} standard deduction for tax year {tax_year}.",
|
||||
"confidence": "high",
|
||||
},
|
||||
{
|
||||
"issue": "schedule_c_required",
|
||||
"keywords": ("schedule c", "sole proprietor", "self-employment"),
|
||||
"authority_slugs": ("f1040sc", "i1040sc"),
|
||||
"answer": "Schedule C is generally required when a taxpayer reports sole proprietorship business income or expenses.",
|
||||
"summary": "Business income and expenses from a sole proprietorship generally belong on Schedule C.",
|
||||
"confidence": "medium",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def _normalize_question(question: str) -> str:
|
||||
return question.strip().lower()
|
||||
|
||||
|
||||
def _filing_status_label(status: str) -> str:
|
||||
return status.replace("_", " ").title()
|
||||
|
||||
|
||||
@dataclass
|
||||
class QuestionEngine:
|
||||
corpus: TaxYearCorpus
|
||||
|
||||
def _manifest(self, tax_year: int) -> dict[str, Any]:
|
||||
path = self.corpus.paths_for_year(tax_year).manifest_path
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Tax year {tax_year} corpus not found at {path}. Run fetch-year first."
|
||||
)
|
||||
return json.loads(path.read_text())
|
||||
|
||||
def _authorities_for(self, manifest: dict[str, Any], slugs: tuple[str, ...]) -> list[dict[str, Any]]:
|
||||
found = []
|
||||
sources = {item["slug"]: item for item in manifest["sources"]}
|
||||
for slug in slugs:
|
||||
if slug in sources:
|
||||
source = sources[slug]
|
||||
found.append(
|
||||
{
|
||||
"slug": source["slug"],
|
||||
"title": source["title"],
|
||||
"sourceClass": source["sourceClass"],
|
||||
"url": source["url"],
|
||||
"localPath": source["localPath"],
|
||||
"authorityRank": source["authorityRank"],
|
||||
}
|
||||
)
|
||||
return found
|
||||
|
||||
def answer(self, *, question: str, tax_year: int, case_facts: dict[str, Any]) -> dict[str, Any]:
|
||||
manifest = self._manifest(tax_year)
|
||||
normalized = _normalize_question(question)
|
||||
facts_used = [{"field": key, "value": value} for key, value in sorted(case_facts.items())]
|
||||
|
||||
for rule in TOPIC_RULES:
|
||||
if all(keyword in normalized for keyword in rule["keywords"]):
|
||||
authorities = self._authorities_for(manifest, rule["authority_slugs"])
|
||||
if rule["issue"] == "standard_deduction":
|
||||
filing_status = case_facts.get("filingStatus", "single")
|
||||
answer = rule["answer_by_status"].get(filing_status, rule["answer_by_status"]["single"])
|
||||
summary = rule["summary_template"].format(
|
||||
filing_status_label=_filing_status_label(filing_status),
|
||||
answer=answer,
|
||||
tax_year=tax_year,
|
||||
)
|
||||
else:
|
||||
answer = rule["answer"]
|
||||
summary = rule["summary"]
|
||||
|
||||
return {
|
||||
"issue": rule["issue"],
|
||||
"taxYear": tax_year,
|
||||
"factsUsed": facts_used,
|
||||
"missingFacts": [],
|
||||
"authorities": authorities,
|
||||
"conclusion": {"answer": answer, "summary": summary},
|
||||
"confidence": rule["confidence"],
|
||||
"followUpQuestions": [],
|
||||
"primaryLawRequired": False,
|
||||
}
|
||||
|
||||
return {
|
||||
"issue": "requires_primary_law_escalation",
|
||||
"taxYear": tax_year,
|
||||
"factsUsed": facts_used,
|
||||
"missingFacts": [
|
||||
"Internal Revenue Code or Treasury regulation analysis is required before answering this question confidently."
|
||||
],
|
||||
"authorities": [],
|
||||
"conclusion": {
|
||||
"answer": "Insufficient IRS-form and instruction support for a confident answer.",
|
||||
"summary": "This question needs primary-law analysis before a reliable answer can be given.",
|
||||
},
|
||||
"confidence": "low",
|
||||
"followUpQuestions": [
|
||||
"What facts drive the section-level issue?",
|
||||
"Is there an existing return position or drafted treatment to review?",
|
||||
],
|
||||
"primaryLawRequired": True,
|
||||
}
|
||||
|
||||
|
||||
def render_analysis(analysis: dict[str, Any]) -> str:
|
||||
lines = [analysis["conclusion"]["summary"]]
|
||||
if analysis["factsUsed"]:
|
||||
facts = ", ".join(f"{item['field']}={item['value']}" for item in analysis["factsUsed"])
|
||||
lines.append(f"Facts used: {facts}.")
|
||||
if analysis["authorities"]:
|
||||
titles = "; ".join(item["title"] for item in analysis["authorities"])
|
||||
lines.append(f"Authorities: {titles}.")
|
||||
if analysis["missingFacts"]:
|
||||
lines.append(f"Open items: {' '.join(analysis['missingFacts'])}")
|
||||
return " ".join(lines)
|
||||
|
||||
|
||||
def render_memo(analysis: dict[str, Any]) -> str:
|
||||
lines = [
|
||||
"# Tax Memo",
|
||||
"",
|
||||
f"## Issue\n{analysis['issue']}",
|
||||
"",
|
||||
"## Facts",
|
||||
]
|
||||
if analysis["factsUsed"]:
|
||||
for item in analysis["factsUsed"]:
|
||||
lines.append(f"- {item['field']}: {item['value']}")
|
||||
else:
|
||||
lines.append("- No case-specific facts supplied.")
|
||||
lines.extend(["", "## Authorities"])
|
||||
if analysis["authorities"]:
|
||||
for authority in analysis["authorities"]:
|
||||
lines.append(f"- {authority['title']}")
|
||||
else:
|
||||
lines.append("- Primary-law escalation required.")
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"## Analysis",
|
||||
analysis["conclusion"]["summary"],
|
||||
"",
|
||||
"## Conclusion",
|
||||
analysis["conclusion"]["answer"],
|
||||
]
|
||||
)
|
||||
if analysis["missingFacts"]:
|
||||
lines.extend(["", "## Open Items"])
|
||||
for item in analysis["missingFacts"]:
|
||||
lines.append(f"- {item}")
|
||||
return "\n".join(lines)
|
||||
@@ -53,13 +53,40 @@ class UsCpaCliSmokeTests(unittest.TestCase):
|
||||
self.assertIn(command, result.stdout)
|
||||
|
||||
def test_question_command_emits_json_by_default(self) -> None:
|
||||
result = self.run_cli("question", "--question", "What is the standard deduction?")
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = str(SRC_DIR)
|
||||
env["US_CPA_CACHE_DIR"] = temp_dir
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
check=True,
|
||||
)
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"us_cpa.cli",
|
||||
"question",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--question",
|
||||
"What is the standard deduction?",
|
||||
],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
payload = json.loads(result.stdout)
|
||||
self.assertEqual(payload["command"], "question")
|
||||
self.assertEqual(payload["format"], "json")
|
||||
self.assertEqual(payload["question"], "What is the standard deduction?")
|
||||
self.assertEqual(payload["status"], "answered")
|
||||
self.assertIn("analysis", payload)
|
||||
|
||||
def test_prepare_requires_case_dir(self) -> None:
|
||||
result = self.run_cli("prepare", "--tax-year", "2025")
|
||||
@@ -128,6 +155,42 @@ class UsCpaCliSmokeTests(unittest.TestCase):
|
||||
self.assertEqual(payload["status"], "needs_resolution")
|
||||
self.assertEqual(payload["issueType"], "fact_conflict")
|
||||
|
||||
def test_question_markdown_memo_mode_renders_tax_memo(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = str(SRC_DIR)
|
||||
env["US_CPA_CACHE_DIR"] = temp_dir
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "us_cpa.cli", "fetch-year", "--tax-year", "2025"],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
check=True,
|
||||
)
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"us_cpa.cli",
|
||||
"question",
|
||||
"--tax-year",
|
||||
"2025",
|
||||
"--format",
|
||||
"markdown",
|
||||
"--style",
|
||||
"memo",
|
||||
"--question",
|
||||
"What is the standard deduction?",
|
||||
],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
)
|
||||
|
||||
self.assertEqual(result.returncode, 0, result.stderr)
|
||||
self.assertIn("# Tax Memo", result.stdout)
|
||||
self.assertIn("## Conclusion", result.stdout)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
75
skills/us-cpa/tests/test_questions.py
Normal file
75
skills/us-cpa/tests/test_questions.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from us_cpa.questions import QuestionEngine, render_analysis, render_memo
|
||||
from us_cpa.sources import TaxYearCorpus, bootstrap_irs_catalog
|
||||
|
||||
|
||||
class QuestionEngineTests(unittest.TestCase):
|
||||
def build_engine(self, temp_dir: str) -> QuestionEngine:
|
||||
corpus = TaxYearCorpus(cache_root=Path(temp_dir))
|
||||
|
||||
def fake_fetch(url: str) -> bytes:
|
||||
return f"source for {url}".encode()
|
||||
|
||||
corpus.download_catalog(2025, bootstrap_irs_catalog(2025), fetcher=fake_fetch)
|
||||
return QuestionEngine(corpus=corpus)
|
||||
|
||||
def test_standard_deduction_question_returns_structured_analysis(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
engine = self.build_engine(temp_dir)
|
||||
|
||||
analysis = engine.answer(
|
||||
question="What is the standard deduction for single filers?",
|
||||
tax_year=2025,
|
||||
case_facts={"filingStatus": "single"},
|
||||
)
|
||||
|
||||
self.assertEqual(analysis["issue"], "standard_deduction")
|
||||
self.assertEqual(analysis["taxYear"], 2025)
|
||||
self.assertEqual(analysis["conclusion"]["answer"], "$15,000")
|
||||
self.assertEqual(analysis["confidence"], "high")
|
||||
self.assertTrue(analysis["authorities"])
|
||||
self.assertEqual(analysis["authorities"][0]["sourceClass"], "irs_instructions")
|
||||
|
||||
def test_complex_question_flags_primary_law_escalation(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
engine = self.build_engine(temp_dir)
|
||||
|
||||
analysis = engine.answer(
|
||||
question="Does section 469 passive activity loss limitation apply here?",
|
||||
tax_year=2025,
|
||||
case_facts={},
|
||||
)
|
||||
|
||||
self.assertEqual(analysis["confidence"], "low")
|
||||
self.assertTrue(analysis["primaryLawRequired"])
|
||||
self.assertIn("Internal Revenue Code", analysis["missingFacts"][0])
|
||||
|
||||
def test_renderers_produce_conversation_and_memo(self) -> None:
|
||||
analysis = {
|
||||
"issue": "standard_deduction",
|
||||
"taxYear": 2025,
|
||||
"factsUsed": [{"field": "filingStatus", "value": "single"}],
|
||||
"missingFacts": [],
|
||||
"authorities": [{"title": "Instructions for Form 1040 and Schedules 1-3"}],
|
||||
"conclusion": {"answer": "$15,000", "summary": "Single filers use a $15,000 standard deduction for tax year 2025."},
|
||||
"confidence": "high",
|
||||
"followUpQuestions": [],
|
||||
"primaryLawRequired": False,
|
||||
}
|
||||
|
||||
conversation = render_analysis(analysis)
|
||||
memo = render_memo(analysis)
|
||||
|
||||
self.assertIn("$15,000", conversation)
|
||||
self.assertIn("Issue", memo)
|
||||
self.assertIn("Authorities", memo)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user