UiPath · Chibionos · Jun 22, 2026
diff --git a/packages/uipath/src/uipath/eval/runtime/runtime.py b/packages/uipath/src/uipath/eval/runtime/runtime.py
@@ -1022,6 +1022,9 @@ async def run_evaluator(
                 agent_output=output_data,
                 agent_trace=execution_output.spans,
                 expected_agent_behavior=eval_item.expected_agent_behavior,
+                simulation_instructions=eval_item.mocking_strategy.prompt
+                if isinstance(eval_item.mocking_strategy, LLMMockingStrategy)
+                else "",
             )
 
             result = await evaluator.validate_and_evaluate_criteria(

diff --git a/packages/uipath/tests/cli/eval/test_eval_tracing_integration.py b/packages/uipath/tests/cli/eval/test_eval_tracing_integration.py
@@ -12,8 +12,10 @@
 import pytest
 
 from uipath.eval.evaluators import BaseEvaluator
+from uipath.eval.mocks._types import LLMMockingStrategy
 from uipath.eval.models import NumericEvaluationResult
 from uipath.eval.models.evaluation_set import EvaluationSet
+from uipath.eval.models.models import AgentExecution
 from uipath.eval.runtime import UiPathEvalContext, UiPathEvalRuntime
 from uipath.runtime.schema import UiPathRuntimeSchema
 
@@ -402,6 +404,64 @@ async def test_run_evaluator_creates_evaluator_span(
         assert span["attributes"]["evaluator_name"] == "AccuracyEvaluator"
         assert span["attributes"]["eval_item_id"] == "eval-item-456"
 
+    @pytest.mark.asyncio
+    async def test_run_evaluator_passes_simulation_instructions(
+        self,
+        mock_trace_manager: MagicMock,
+        mock_factory: MagicMock,
+        mock_event_bus: MagicMock,
+        mock_execution_output: MagicMock,
+    ) -> None:
+        """Test that trajectory evaluators receive simulation instructions."""
+        context = create_eval_context(
+            eval_set="test.json",
+            entrypoint="main.py:main",
+        )
+
+        runtime = UiPathEvalRuntime(
+            context=context,
+            factory=mock_factory,
+            trace_manager=mock_trace_manager,
+            event_bus=mock_event_bus,
+        )
+
+        eval_item = MagicMock()
+        eval_item.id = "eval-item-with-simulation"
+        eval_item.name = "Simulated item"
+        eval_item.inputs = {"input": "test"}
+        eval_item.expected_agent_behavior = "Agent should use the simulated tool"
+        eval_item.mocking_strategy = LLMMockingStrategy(
+            prompt="Return mocked API responses for the tool calls",
+            tools_to_simulate=[],
+        )
+
+        evaluator = MagicMock(spec=BaseEvaluator)
+        evaluator.id = "trajectory-evaluator"
+        evaluator.name = "TrajectoryEvaluator"
+
+        async def capture_agent_execution(
+            agent_execution: AgentExecution,
+            evaluation_criteria: object,
+        ) -> NumericEvaluationResult:
+            assert (
+                agent_execution.simulation_instructions
+                == "Return mocked API responses for the tool calls"
+            )
+            return NumericEvaluationResult(score=1.0)
+
+        evaluator.validate_and_evaluate_criteria = AsyncMock(
+            side_effect=capture_agent_execution
+        )
+
+        await runtime.run_evaluator(
+            evaluator=evaluator,
+            execution_output=mock_execution_output,
+            eval_item=eval_item,
+            evaluation_criteria=None,
+        )
+
+        evaluator.validate_and_evaluate_criteria.assert_awaited_once()
+
     @pytest.mark.asyncio
     async def test_multiple_evaluators_create_multiple_spans(
         self,

diff --git a/packages/uipath/tests/evaluators/test_evaluator_methods.py b/packages/uipath/tests/evaluators/test_evaluator_methods.py
@@ -1728,6 +1728,70 @@ async def mock_chat_completions(*args: Any, **kwargs: Any) -> Any:
         assert isinstance(result, NumericEvaluationResult)
         assert result.score == 0.9
 
+    @pytest.mark.asyncio
+    async def test_llm_trajectory_replaces_all_prompt_placeholders(
+        self, sample_agent_execution: AgentExecution, mocker: MockerFixture
+    ) -> None:
+        """Test trajectory prompt interpolation for all built-in placeholders."""
+        captured_prompt = ""
+
+        mock_tool_call = mocker.MagicMock()
+        mock_tool_call.id = "call_1"
+        mock_tool_call.name = "submit_evaluation"
+        mock_tool_call.arguments = {
+            "score": 90,
+            "justification": "The agent followed the expected behavior",
+        }
+
+        mock_response = mocker.MagicMock()
+        mock_response.choices = [
+            mocker.MagicMock(
+                message=mocker.MagicMock(content=None, tool_calls=[mock_tool_call])
+            )
+        ]
+
+        async def mock_chat_completions(*args: Any, **kwargs: Any) -> Any:
+            nonlocal captured_prompt
+            captured_prompt = kwargs["messages"][-1]["content"]
+            return mock_response
+
+        mock_llm_instance = mocker.MagicMock()
+        mock_llm_instance.chat_completions = mock_chat_completions
+
+        mocker.patch("uipath.eval.evaluators.llm_as_judge_evaluator.UiPath")
+        mocker.patch(
+            "uipath.eval.evaluators.llm_as_judge_evaluator.UiPathLlmChatService",
+            return_value=mock_llm_instance,
+        )
+
+        config = {
+            "name": "LlmTrajectoryTest",
+            "prompt": (
+                "input={{UserOrSyntheticInput}}\n"
+                "instructions={{SimulationInstructions}}\n"
+                "expected={{ExpectedAgentBehavior}}\n"
+                "history={{AgentRunHistory}}"
+            ),
+            "model": "gpt-4",
+        }
+        evaluator = LLMJudgeTrajectoryEvaluator.model_validate(
+            {"evaluatorConfig": config, "id": str(uuid.uuid4())}
+        )
+        agent_execution = sample_agent_execution.model_copy(
+            update={"simulation_instructions": "Mock the backend API response"}
+        )
+        criteria = TrajectoryEvaluationCriteria(
+            expected_agent_behavior="Agent should respond helpfully"
+        )
+
+        result = await evaluator.evaluate(agent_execution, criteria)
+
+        assert isinstance(result, NumericEvaluationResult)
+        assert "{{" not in captured_prompt
+        assert "Agent should respond helpfully" in captured_prompt
+        assert "Mock the backend API response" in captured_prompt
+        assert "{'input': 'Test input'}" in captured_prompt
+
     @pytest.mark.asyncio
     async def test_llm_trajectory_validate_and_evaluate_criteria(
         self, sample_agent_execution: AgentExecution, mocker: MockerFixture