diff --git a/agents/autowebcompat-repro/hackbot_agents/autowebcompat_repro/agent.py b/agents/autowebcompat-repro/hackbot_agents/autowebcompat_repro/agent.py index 109c6917db..2cc6b49af2 100644 --- a/agents/autowebcompat-repro/hackbot_agents/autowebcompat_repro/agent.py +++ b/agents/autowebcompat-repro/hackbot_agents/autowebcompat_repro/agent.py @@ -131,7 +131,7 @@ async def run_autowebcompat_repro( user_prompt = build_user_prompt(bug_data, bug_id) result_msg: ResultMessage | None = None - with Reporter(verbose=verbose, log_path=log) as reporter: + with Reporter(verbose=verbose, log_path=log, max_turns=max_turns) as reporter: reporter.header(subject) async with ClaudeSDKClient(options=options) as client: await client.query(user_prompt) diff --git a/agents/bug-fix/hackbot_agents/bug_fix/agent.py b/agents/bug-fix/hackbot_agents/bug_fix/agent.py index d6868a92b0..c06a4d6abf 100644 --- a/agents/bug-fix/hackbot_agents/bug_fix/agent.py +++ b/agents/bug-fix/hackbot_agents/bug_fix/agent.py @@ -161,7 +161,7 @@ async def run_bug_fix( ) result_msg: ResultMessage | None = None - with Reporter(verbose=verbose, log_path=log) as reporter: + with Reporter(verbose=verbose, log_path=log, max_turns=max_turns) as reporter: reporter.header(f"bug {bug}") async with ClaudeSDKClient(options=options) as client: await client.query(user_prompt) diff --git a/agents/build-repair/hackbot_agents/build_repair/agent.py b/agents/build-repair/hackbot_agents/build_repair/agent.py index d32e908a22..1133545d38 100644 --- a/agents/build-repair/hackbot_agents/build_repair/agent.py +++ b/agents/build-repair/hackbot_agents/build_repair/agent.py @@ -204,7 +204,7 @@ async def run_build_repair( captured: dict[str, dict] = {} tracked = {BUILD_TOOL, *([TRY_PUSH_TOOL] if run_try_push else [])} - with Reporter(verbose=verbose, log_path=log) as reporter: + with Reporter(verbose=verbose, log_path=log, max_turns=max_turns) as reporter: # Stage 1: analysis (high effort, no source edits yet). reporter.header(f"{label}: analysis") analysis_opts = _build_options( diff --git a/agents/frontend-triage/hackbot_agents/frontend_triage/agent.py b/agents/frontend-triage/hackbot_agents/frontend_triage/agent.py index 4198f70afd..4a658fcfe1 100644 --- a/agents/frontend-triage/hackbot_agents/frontend_triage/agent.py +++ b/agents/frontend-triage/hackbot_agents/frontend_triage/agent.py @@ -231,7 +231,7 @@ async def run_frontend_triage( ) result_msg: ResultMessage | None = None - with Reporter(verbose=verbose, log_path=log) as reporter: + with Reporter(verbose=verbose, log_path=log, max_turns=max_turns) as reporter: reporter.header(f"bug {bug}") async with ClaudeSDKClient(options=options) as client: await client.query(user_prompt) diff --git a/libs/hackbot-runtime/hackbot_runtime/claude.py b/libs/hackbot-runtime/hackbot_runtime/claude.py index da1d6c2234..ac3fa6ee12 100644 --- a/libs/hackbot-runtime/hackbot_runtime/claude.py +++ b/libs/hackbot-runtime/hackbot_runtime/claude.py @@ -32,10 +32,14 @@ def _truncate(s: str, n: int = 500) -> str: class Reporter: """Routes streamed claude-agent-sdk messages to stdout and/or a log file.""" - def __init__(self, verbose: bool, log_path: Path | None): + def __init__( + self, verbose: bool, log_path: Path | None, max_turns: int | None = None + ): self.verbose = verbose self._log = log_path.open("w", encoding="utf-8") if log_path else None + self.max_turns = max_turns self._turn = 0 + self._last_msg_id: str | None = None def __enter__(self): return self @@ -47,6 +51,7 @@ def __exit__(self, *exc): def header(self, title: str) -> None: """Emit a section header (e.g. ``"bug 12345"``) and reset the turn count.""" self._turn = 0 + self._last_msg_id = None banner = f"\n{'#' * 60}\n# {title}\n{'#' * 60}" self._emit(banner, always=True) @@ -62,8 +67,19 @@ def message(self, msg) -> None: is_main = msg.parent_tool_use_id is None label = "agent" if is_main else "subagent" if is_main: - self._turn += 1 - self._emit(f"\n--- turn {self._turn} ---") + msg_id = msg.message_id + # The CLI streams one logical model response as several + # AssistantMessages (thinking / text / tool_use), all sharing + # one message_id. Count a turn only when a new id appears so the + # live marker matches ResultMessage.num_turns. If message_id is + # unavailable (older CLI), fall back to counting every message. + if msg_id is None or msg_id != self._last_msg_id: + self._turn += 1 + self._last_msg_id = msg_id + if self.max_turns: + self._emit(f"\n--- turn {self._turn}/{self.max_turns} ---") + else: + self._emit(f"\n--- turn {self._turn} ---") for block in msg.content: if isinstance(block, TextBlock): self._emit(f"\n[{label}] {block.text}", always=is_main) diff --git a/libs/hackbot-runtime/tests/test_runtime.py b/libs/hackbot-runtime/tests/test_runtime.py index 3469283b42..2f35e1efbe 100644 --- a/libs/hackbot-runtime/tests/test_runtime.py +++ b/libs/hackbot-runtime/tests/test_runtime.py @@ -215,3 +215,107 @@ def test_publish_file_copies_locally_without_uploader(tmp_path): assert key == "logs/agent.log" written = tmp_path / "artifacts" / "local-test" / "logs" / "agent.log" assert written.read_text() == "hello log" + + +# --- Reporter turn counting (requires the claude-sdk extra) ----------------- + + +def _asst(*, message_id, parent=None): + """A main- or sub-agent AssistantMessage carrying a single text block. + + The block content is irrelevant to turn counting; only ``message_id`` and + ``parent_tool_use_id`` (main vs subagent) matter here. + """ + from claude_agent_sdk import AssistantMessage, TextBlock + + return AssistantMessage( + content=[TextBlock(text="x")], + model="claude-test", + parent_tool_use_id=parent, + message_id=message_id, + ) + + +def _turn_lines(text): + return [ln for ln in text.splitlines() if ln.startswith("--- turn ")] + + +def test_reporter_counts_one_turn_per_message_id(capsys): + """Stream fragments sharing a message_id collapse into a single turn.""" + pytest.importorskip("claude_agent_sdk") + from hackbot_runtime.claude import Reporter + + with Reporter(verbose=True, log_path=None) as reporter: + # One logical response streamed as three fragments (id "m1"), then a + # response with two parallel tool fragments (id "m2"), then "m3". + for mid in ["m1", "m1", "m1", "m2", "m2", "m3"]: + reporter.message(_asst(message_id=mid)) + + assert _turn_lines(capsys.readouterr().out) == [ + "--- turn 1 ---", + "--- turn 2 ---", + "--- turn 3 ---", + ] + + +def test_reporter_ignores_subagent_messages(capsys): + pytest.importorskip("claude_agent_sdk") + from hackbot_runtime.claude import Reporter + + with Reporter(verbose=True, log_path=None) as reporter: + reporter.message(_asst(message_id="m1")) + # Subagent (parent_tool_use_id set) must not bump the turn counter, + # even with a fresh message_id. + reporter.message(_asst(message_id="s1", parent="tool_1")) + reporter.message(_asst(message_id="m2")) + + assert _turn_lines(capsys.readouterr().out) == [ + "--- turn 1 ---", + "--- turn 2 ---", + ] + + +def test_reporter_falls_back_when_message_id_missing(capsys): + """Older CLI without message_id: every main message counts as a turn.""" + pytest.importorskip("claude_agent_sdk") + from hackbot_runtime.claude import Reporter + + with Reporter(verbose=True, log_path=None) as reporter: + for _ in range(3): + reporter.message(_asst(message_id=None)) + + assert _turn_lines(capsys.readouterr().out) == [ + "--- turn 1 ---", + "--- turn 2 ---", + "--- turn 3 ---", + ] + + +def test_reporter_header_resets_turn_and_id(capsys): + pytest.importorskip("claude_agent_sdk") + from hackbot_runtime.claude import Reporter + + with Reporter(verbose=True, log_path=None) as reporter: + reporter.message(_asst(message_id="m1")) + reporter.header("bug 2") + # Same id as before the reset must still open a fresh turn 1. + reporter.message(_asst(message_id="m1")) + + assert _turn_lines(capsys.readouterr().out) == [ + "--- turn 1 ---", + "--- turn 1 ---", + ] + + +def test_reporter_shows_max_turns_budget(capsys): + pytest.importorskip("claude_agent_sdk") + from hackbot_runtime.claude import Reporter + + with Reporter(verbose=True, log_path=None, max_turns=30) as reporter: + reporter.message(_asst(message_id="m1")) + reporter.message(_asst(message_id="m2")) + + assert _turn_lines(capsys.readouterr().out) == [ + "--- turn 1/30 ---", + "--- turn 2/30 ---", + ]