diff --git a/packages/uipath/src/uipath/eval/mocks/_structured_output.py b/packages/uipath/src/uipath/eval/mocks/_structured_output.py index 599780353..92fbf9dc9 100644 --- a/packages/uipath/src/uipath/eval/mocks/_structured_output.py +++ b/packages/uipath/src/uipath/eval/mocks/_structured_output.py @@ -28,6 +28,31 @@ logger = logging.getLogger(__name__) +def _model_supports_only_default_temperature(model: str | None) -> bool: + """Return whether ``model`` rejects any non-default ``temperature``. + + The gpt-5 family and the o-series reasoning models only accept the default + temperature (1) on the LLM Gateway and return HTTP 400 for any explicit + value, including ``0``. + """ + name = (model or "").lower() + return name.startswith(("gpt-5", "o1", "o3", "o4")) + + +def _normalize_completion_kwargs(completion_kwargs: dict[str, Any]) -> dict[str, Any]: + """Strip parameters a model rejects before calling the gateway. + + Models that only accept the default temperature reject an explicit + ``temperature``, so it is dropped here rather than forwarded verbatim + (SRE-607465 / PC-4769). + """ + if "temperature" not in completion_kwargs: + return completion_kwargs + if not _model_supports_only_default_temperature(completion_kwargs.get("model")): + return completion_kwargs + return {k: v for k, v in completion_kwargs.items() if k != "temperature"} + + def _inline_defs( schema: dict[str, Any], ) -> tuple[dict[str, Any], dict[str, Any]]: @@ -248,6 +273,7 @@ async def generate_structured_output( completion_kwargs: dict[str, Any], ) -> Any: """Generate structured output using the strategy for the requested model.""" + completion_kwargs = _normalize_completion_kwargs(completion_kwargs) strategy = _strategy_for_model(completion_kwargs.get("model")) return await strategy.generate( llm, diff --git a/packages/uipath/tests/cli/eval/mocks/test_structured_output.py b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py index 79ad31591..3b6b04826 100644 --- a/packages/uipath/tests/cli/eval/mocks/test_structured_output.py +++ b/packages/uipath/tests/cli/eval/mocks/test_structured_output.py @@ -293,3 +293,45 @@ async def test_openai_models_prefer_response_format(): assert result == {"a": 1} assert len(llm.calls) == 1 assert "response_format" in llm.calls[0] + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "model", + [ + "gpt-5-2025-08-07", + "gpt-5-mini", + "o1-2024-12-17", + "o3-mini-2025-01-31", + ], +) +async def test_default_only_temperature_models_drop_temperature(model: str): + # Models like gpt-5 / o-series only accept the default temperature (1); the + # LLM gateway returns HTTP 400 when temperature=0 is forwarded (SRE-607465 / + # PC-4769). The mock path must strip the restricted temperature before + # calling chat_completions. + llm = _FakeLLM([_response(SimpleNamespace(content='{"a": 1}', tool_calls=None))]) + await generate_structured_output( + llm, + [{"role": "user", "content": "x"}], + schema={"type": "object"}, + response_format_name="OutputSchema", + description="d", + completion_kwargs={"model": model, "temperature": 0}, + ) + assert "temperature" not in llm.calls[0] + assert llm.calls[0]["model"] == model + + +@pytest.mark.asyncio +async def test_standard_models_keep_temperature(): + llm = _FakeLLM([_response(SimpleNamespace(content='{"a": 1}', tool_calls=None))]) + await generate_structured_output( + llm, + [{"role": "user", "content": "x"}], + schema={"type": "object"}, + response_format_name="OutputSchema", + description="d", + completion_kwargs={"model": "gpt-4.1-mini-2025-04-14", "temperature": 0}, + ) + assert llm.calls[0]["temperature"] == 0