Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions packages/uipath/src/uipath/eval/mocks/_structured_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,31 @@
logger = logging.getLogger(__name__)


def _model_supports_only_default_temperature(model: str | None) -> bool:
"""Return whether ``model`` rejects any non-default ``temperature``.

The gpt-5 family and the o-series reasoning models only accept the default
temperature (1) on the LLM Gateway and return HTTP 400 for any explicit
value, including ``0``.
"""
name = (model or "").lower()
return name.startswith(("gpt-5", "o1", "o3", "o4"))


def _normalize_completion_kwargs(completion_kwargs: dict[str, Any]) -> dict[str, Any]:
"""Strip parameters a model rejects before calling the gateway.

Models that only accept the default temperature reject an explicit
``temperature``, so it is dropped here rather than forwarded verbatim
(SRE-607465 / PC-4769).
"""
if "temperature" not in completion_kwargs:
return completion_kwargs
if not _model_supports_only_default_temperature(completion_kwargs.get("model")):
return completion_kwargs
return {k: v for k, v in completion_kwargs.items() if k != "temperature"}


def _inline_defs(
schema: dict[str, Any],
) -> tuple[dict[str, Any], dict[str, Any]]:
Expand Down Expand Up @@ -248,6 +273,7 @@ async def generate_structured_output(
completion_kwargs: dict[str, Any],
) -> Any:
"""Generate structured output using the strategy for the requested model."""
completion_kwargs = _normalize_completion_kwargs(completion_kwargs)
strategy = _strategy_for_model(completion_kwargs.get("model"))
return await strategy.generate(
llm,
Expand Down
42 changes: 42 additions & 0 deletions packages/uipath/tests/cli/eval/mocks/test_structured_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,45 @@ async def test_openai_models_prefer_response_format():
assert result == {"a": 1}
assert len(llm.calls) == 1
assert "response_format" in llm.calls[0]


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model",
[
"gpt-5-2025-08-07",
"gpt-5-mini",
"o1-2024-12-17",
"o3-mini-2025-01-31",
],
)
async def test_default_only_temperature_models_drop_temperature(model: str):
# Models like gpt-5 / o-series only accept the default temperature (1); the
# LLM gateway returns HTTP 400 when temperature=0 is forwarded (SRE-607465 /
# PC-4769). The mock path must strip the restricted temperature before
# calling chat_completions.
llm = _FakeLLM([_response(SimpleNamespace(content='{"a": 1}', tool_calls=None))])
await generate_structured_output(
llm,
[{"role": "user", "content": "x"}],
schema={"type": "object"},
response_format_name="OutputSchema",
description="d",
completion_kwargs={"model": model, "temperature": 0},
)
assert "temperature" not in llm.calls[0]
assert llm.calls[0]["model"] == model


@pytest.mark.asyncio
async def test_standard_models_keep_temperature():
llm = _FakeLLM([_response(SimpleNamespace(content='{"a": 1}', tool_calls=None))])
await generate_structured_output(
llm,
[{"role": "user", "content": "x"}],
schema={"type": "object"},
response_format_name="OutputSchema",
description="d",
completion_kwargs={"model": "gpt-4.1-mini-2025-04-14", "temperature": 0},
)
assert llm.calls[0]["temperature"] == 0
Loading