diff --git a/src/uipath/runtime/governance/audit/__init__.py b/src/uipath/runtime/governance/audit/__init__.py
new file mode 100644
index 0000000..6f7ecc5
--- /dev/null
+++ b/src/uipath/runtime/governance/audit/__init__.py
@@ -0,0 +1,70 @@
+"""Audit sink framework for governance events.
+
+This module provides a pluggable audit system that supports multiple
+output destinations (sinks) for governance events. Events are emitted
+to all registered sinks, allowing flexible audit trail configuration.
+
+Usage::
+
+    from uipath.runtime.governance.audit import get_audit_manager, AuditEvent
+
+    # Get the global audit manager
+    manager = get_audit_manager()
+
+    # Emit an event (goes to all registered sinks)
+    manager.emit(AuditEvent(
+        event_type="rule_evaluation",
+        trace_id="abc-123",
+        agent_name="my-agent",
+        data={"rule_id": "ASI-01", "matched": True},
+    ))
+
+    # Register a custom sink
+    manager.register_sink(MyCustomSink())
+
+Built-in sinks:
+
+- :class:`TracesAuditSink`  – OpenTelemetry spans for Orchestrator Traces UI
+- :class:`ConsoleAuditSink` – stderr output for debugging
+
+Sink registration:
+
+- The ``traces`` sink (OpenTelemetry spans → Orchestrator audit UI) is
+  **platform-mandated** and always registered. It cannot be disabled by
+  a developer-side env var — governance is platform-owned.
+- The ``console`` sink is a developer aid for local debugging and is
+  opt-in via env var.
+
+Environment variables (developer-facing, console only):
+
+- ``UIPATH_AUDIT_VERBOSE`` – verbose console output.
+- ``UIPATH_GOVERNANCE_CONSOLE_LOG`` – enable the console sink.
+"""
+
+from .base import (
+    AuditEvent,
+    AuditManager,
+    AuditSink,
+    EventType,
+    get_audit_manager,
+    reset_audit_manager,
+)
+from .console import ConsoleAuditSink
+from .factory import create_sink
+from .traces import TracesAuditSink
+
+__all__ = [
+    # Core classes
+    "AuditEvent",
+    "AuditManager",
+    "AuditSink",
+    "EventType",
+    # Global manager
+    "get_audit_manager",
+    "reset_audit_manager",
+    # Factory
+    "create_sink",
+    # Built-in sinks
+    "ConsoleAuditSink",
+    "TracesAuditSink",
+]
diff --git a/src/uipath/runtime/governance/audit/base.py b/src/uipath/runtime/governance/audit/base.py
new file mode 100644
index 0000000..5fbdf0b
--- /dev/null
+++ b/src/uipath/runtime/governance/audit/base.py
@@ -0,0 +1,730 @@
+"""Base classes and models for the audit sink framework.
+
+This module provides the core abstractions for the governance audit system:
+- AuditEvent: The data model for audit events
+- EventType: Constants for common event types
+- AuditSink: Abstract base class for sink implementations
+- AuditManager: Central hub for routing events to sinks
+
+The AuditManager uses a background thread to process events asynchronously,
+avoiding blocking the main agent execution path during audit trace HTTP calls.
+"""
+
+from __future__ import annotations
+
+import atexit
+import json
+import logging
+import os
+import queue
+import threading
+from abc import ABC, abstractmethod
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    pass
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Audit Event Model
+# =============================================================================
+
+
+@dataclass
+class AuditEvent:
+    """Generic audit event that can be sent to any sink.
+
+    Attributes:
+        event_type: Type of event (e.g., "rule_evaluation", "hook_summary")
+        timestamp: When the event occurred (auto-set if not provided)
+        trace_id: Trace identifier for correlation
+        agent_name: Name of the agent being governed
+        hook: Lifecycle hook where event occurred (optional)
+        data: Event-specific data dictionary
+        metadata: Additional metadata for filtering/routing
+    """
+
+    event_type: str
+    trace_id: str = ""
+    agent_name: str = "unknown"
+    hook: str = ""
+    data: dict[str, Any] = field(default_factory=dict)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        result = asdict(self)
+        result["timestamp"] = self.timestamp.isoformat()
+        return result
+
+    def to_json(self) -> str:
+        """Convert to JSON string."""
+        return json.dumps(self.to_dict())
+
+
+class EventType:
+    """Constants for common event types."""
+
+    RULE_EVALUATION = "rule_evaluation"
+    HOOK_START = "hook_start"
+    HOOK_END = "hook_end"
+    SESSION_START = "session_start"
+    SESSION_END = "session_end"
+    POLICY_VIOLATION = "policy_violation"
+    POLICY_ALLOW = "policy_allow"
+    PACKS_LOADED = "packs_loaded"
+
+
+# =============================================================================
+# Audit Sink Base Class
+# =============================================================================
+
+
+class AuditSink(ABC):
+    """Abstract base class for audit output destinations.
+
+    Subclass this to create custom audit sinks. Each sink receives
+    all audit events and decides how to handle them.
+
+    Example:
+        class SlackAuditSink(AuditSink):
+            def __init__(self, webhook_url: str):
+                self.webhook_url = webhook_url
+                self._name = "slack"
+
+            @property
+            def name(self) -> str:
+                return self._name
+
+            def emit(self, event: AuditEvent) -> None:
+                if event.data.get("matched") and event.data.get("action") == "deny":
+                    # Send to Slack on violations
+                    requests.post(self.webhook_url, json=event.to_dict())
+
+            def flush(self) -> None:
+                pass
+    """
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Unique name for this sink."""
+        pass
+
+    @abstractmethod
+    def emit(self, event: AuditEvent) -> None:
+        """Emit an audit event to this sink.
+
+        Args:
+            event: The audit event to emit
+
+        Note:
+            Implementations should handle errors gracefully and not
+            raise exceptions that would disrupt governance evaluation.
+        """
+        pass
+
+    def flush(self) -> None:
+        """Flush any buffered events.
+
+        Override if sink buffers events before writing.
+        """
+        return
+
+    def close(self) -> None:
+        """Clean up resources.
+
+        Override if sink holds resources that need cleanup.
+        """
+        return
+
+    def accepts(self, event: AuditEvent) -> bool:
+        """Check if this sink should receive the event.
+
+        Override to filter events. Default accepts all events.
+
+        Args:
+            event: The audit event to check
+
+        Returns:
+            True if sink should receive event, False to skip
+        """
+        return True
+
+
+# =============================================================================
+# Audit Manager
+# =============================================================================
+
+
+class AuditManager:
+    """Manages multiple audit sinks and routes events to them.
+
+    The AuditManager is the central hub for audit events. It maintains
+    a list of registered sinks and broadcasts events to all of them.
+
+    Thread Safety:
+        Events are queued and processed by a background thread, making
+        emit() non-blocking. This avoids blocking agent execution during
+        audit trace HTTP calls.
+    """
+
+    # Trip a sink after this many consecutive emit failures (circuit-breaker).
+    _SINK_FAILURE_THRESHOLD = 10
+    # Bound the async queue so a stuck sink can't grow memory without limit.
+    # Matches the order of magnitude of a long-running agent's per-session
+    # audit volume; on overflow the oldest event is dropped (loss visible
+    # via stats.events_dropped).
+    _DEFAULT_QUEUE_MAXSIZE = 10_000
+
+    def __init__(
+        self,
+        async_mode: bool = True,
+        queue_maxsize: int = _DEFAULT_QUEUE_MAXSIZE,
+    ) -> None:
+        """Initialize the audit manager.
+
+        Args:
+            async_mode: If True (default), events are processed in a background
+                       thread. If False, events are processed synchronously.
+            queue_maxsize: Max queued events in async mode. On overflow the
+                       oldest queued event is dropped to make room.
+        """
+        self._sinks: list[AuditSink] = []
+        # Single lock guards _sinks, _sink_failures, _tripped_sinks,
+        # _event_count, _error_count, _dropped_count — every counter and
+        # collection that the worker thread and emit-caller mutate.
+        self._sinks_lock = threading.Lock()
+        # Per-sink consecutive-failure counter, keyed by sink name.
+        self._sink_failures: dict[str, int] = {}
+        self._tripped_sinks: set[str] = set()
+        self._event_count = 0
+        self._error_count = 0
+        self._dropped_count = 0
+        self._async_mode = async_mode
+        self._pid = os.getpid()
+
+        # Background processing
+        self._queue: queue.Queue[AuditEvent | None] = queue.Queue(maxsize=queue_maxsize)
+        self._worker_thread: threading.Thread | None = None
+        self._shutdown = threading.Event()
+
+        if self._async_mode:
+            self._start_worker()
+
+    def _start_worker(self) -> None:
+        """Start the background worker thread."""
+        if self._worker_thread is not None and self._worker_thread.is_alive():
+            return
+
+        self._shutdown.clear()
+        self._worker_thread = threading.Thread(
+            target=self._worker_loop,
+            name="governance-audit-worker",
+            daemon=True,
+        )
+        self._worker_thread.start()
+        logger.debug("Background audit worker started")
+
+    def _worker_loop(self) -> None:
+        """Background worker loop that processes queued events."""
+        while not self._shutdown.is_set():
+            # Wait for an event with a timeout so we can re-check shutdown.
+            try:
+                event = self._queue.get(timeout=0.5)
+            except queue.Empty:
+                continue
+            # Every successful get() must be paired with exactly one
+            # task_done() — including the shutdown sentinel and the case
+            # where _emit_sync raises — otherwise unfinished_tasks never
+            # drains and flush()/join() hangs.
+            try:
+                if event is None:
+                    # Shutdown signal
+                    break
+                self._emit_sync(event)
+            except Exception as e:
+                logger.warning("Audit worker error: %s", e)
+            finally:
+                self._queue.task_done()
+
+        # Drain remaining events on shutdown
+        self._drain_queue()
+
+    def _drain_queue(self) -> None:
+        """Process any remaining events in the queue."""
+        while True:
+            try:
+                event = self._queue.get_nowait()
+            except queue.Empty:
+                break
+            # As in _worker_loop: pair every get() with one task_done(),
+            # even when _emit_sync raises, so shutdown accounting is sound.
+            try:
+                if event is not None:
+                    self._emit_sync(event)
+            except Exception as e:
+                logger.warning("Audit drain error: %s", e)
+            finally:
+                self._queue.task_done()
+
+    def _emit_sync(self, event: AuditEvent) -> None:
+        """Emit event synchronously to all sinks (called from worker thread)."""
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+            tripped = set(self._tripped_sinks)
+        for sink in sinks:
+            if sink.name in tripped:
+                continue
+            try:
+                if sink.accepts(event):
+                    sink.emit(event)
+                # Success — reset failure counter for this sink.
+                with self._sinks_lock:
+                    if self._sink_failures.get(sink.name):
+                        self._sink_failures[sink.name] = 0
+            except Exception as e:
+                with self._sinks_lock:
+                    self._error_count += 1
+                    fails = self._sink_failures.get(sink.name, 0) + 1
+                    self._sink_failures[sink.name] = fails
+                    tripped_now = fails >= self._SINK_FAILURE_THRESHOLD
+                    if tripped_now:
+                        self._tripped_sinks.add(sink.name)
+                if tripped_now:
+                    logger.error(
+                        "Audit sink '%s' tripped after %d consecutive failures; "
+                        "will be skipped for the rest of this process. Last error: %s",
+                        sink.name,
+                        fails,
+                        e,
+                    )
+                else:
+                    logger.warning(
+                        "Audit sink '%s' failed to emit event (%d/%d): %s",
+                        sink.name,
+                        fails,
+                        self._SINK_FAILURE_THRESHOLD,
+                        e,
+                    )
+
+    def register_sink(self, sink: AuditSink) -> None:
+        """Register an audit sink.
+
+        Args:
+            sink: The sink to register
+
+        Note:
+            Duplicate sinks (same name) are ignored.
+            The circuit-breaker failure counter is cleared so a freshly
+            registered sink doesn't inherit a previous instance's tripped
+            state. ``unregister_sink`` already clears these, but the
+            defensive reset here guards against external manipulation
+            of the internal counters (tests, future callers).
+        """
+        with self._sinks_lock:
+            if any(s.name == sink.name for s in self._sinks):
+                logger.debug("Sink '%s' already registered, skipping", sink.name)
+                return
+            self._sinks.append(sink)
+            self._sink_failures.pop(sink.name, None)
+            self._tripped_sinks.discard(sink.name)
+        logger.info("Registered audit sink: %s", sink.name)
+
+    def unregister_sink(self, name: str) -> bool:
+        """Unregister an audit sink by name.
+
+        Args:
+            name: Name of the sink to remove
+
+        Returns:
+            True if sink was removed, False if not found
+        """
+        sink_to_close: AuditSink | None = None
+        with self._sinks_lock:
+            for i, sink in enumerate(self._sinks):
+                if sink.name == name:
+                    sink_to_close = sink
+                    del self._sinks[i]
+                    self._sink_failures.pop(name, None)
+                    self._tripped_sinks.discard(name)
+                    break
+        if sink_to_close is not None:
+            try:
+                sink_to_close.close()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to close: %s", name, e)
+            logger.info("Unregistered audit sink: %s", name)
+            return True
+        return False
+
+    def get_sink(self, name: str) -> AuditSink | None:
+        """Get a registered sink by name."""
+        with self._sinks_lock:
+            for sink in self._sinks:
+                if sink.name == name:
+                    return sink
+        return None
+
+    def list_sinks(self) -> list[str]:
+        """Get names of all registered sinks."""
+        with self._sinks_lock:
+            return [s.name for s in self._sinks]
+
+    def emit(self, event: AuditEvent) -> None:
+        """Emit an audit event to all registered sinks.
+
+        In async mode (default), this queues the event for background
+        processing and returns immediately. This avoids blocking the
+        main agent execution path during audit trace HTTP calls.
+
+        On post-fork callers (worker process inheriting the parent's
+        manager), the queue is reinitialized and the worker thread
+        re-spawned before enqueue — otherwise events would silently
+        accumulate in a queue no one is draining.
+
+        Args:
+            event: The audit event to emit
+        """
+        self._ensure_alive_after_fork()
+
+        with self._sinks_lock:
+            self._event_count += 1
+
+        if self._async_mode:
+            # Non-blocking enqueue with drop-oldest backpressure: if the
+            # worker is wedged on a slow sink, this keeps memory bounded
+            # rather than growing without limit. The dropped count is
+            # surfaced via ``stats``.
+            try:
+                self._queue.put_nowait(event)
+            except queue.Full:
+                try:
+                    self._queue.get_nowait()
+                    self._queue.task_done()
+                except queue.Empty:
+                    pass
+                with self._sinks_lock:
+                    self._dropped_count += 1
+                try:
+                    self._queue.put_nowait(event)
+                except queue.Full:
+                    # Worker is so far behind that the queue refilled
+                    # between get_nowait and put_nowait — give up on
+                    # this event rather than block.
+                    pass
+        else:
+            # Synchronous processing
+            self._emit_sync(event)
+
+    def _ensure_alive_after_fork(self) -> None:
+        """Reset queue and respawn worker if we're in a forked child."""
+        current_pid = os.getpid()
+        if current_pid == self._pid:
+            return
+        # Child process inherited a dead worker_thread reference and a
+        # queue the parent owned. Rebuild both so child events drain.
+        self._pid = current_pid
+        self._queue = queue.Queue(maxsize=self._queue.maxsize)
+        self._shutdown = threading.Event()
+        self._worker_thread = None
+        if self._async_mode:
+            self._start_worker()
+
+    def emit_rule_evaluation(
+        self,
+        rule_id: str,
+        rule_name: str,
+        pack_name: str,
+        hook: str,
+        matched: bool,
+        action: str,
+        detail: str = "",
+        agent_name: str = "agent",
+        trace_id: str = "",
+        description: str = "",
+    ) -> None:
+        """Convenience method to emit a rule evaluation event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.RULE_EVALUATION,
+                trace_id=trace_id,
+                agent_name=agent_name,
+                hook=hook,
+                data={
+                    "rule_id": rule_id,
+                    "rule_name": rule_name,
+                    "pack_name": pack_name,
+                    "matched": matched,
+                    "action": action,
+                    "detail": detail,
+                    "description": description,
+                    "status": "MATCHED" if matched else "PASS",
+                },
+            )
+        )
+
+    def emit_hook_summary(
+        self,
+        hook: str,
+        agent_name: str,
+        total_rules: int,
+        matched_rules: int,
+        final_action: str,
+        trace_id: str = "",
+        enforcement_mode: str = "audit",
+    ) -> None:
+        """Convenience method to emit a hook summary event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.HOOK_END,
+                trace_id=trace_id,
+                agent_name=agent_name,
+                hook=hook,
+                data={
+                    "total_rules": total_rules,
+                    "matched_rules": matched_rules,
+                    "final_action": final_action,
+                    "enforcement_mode": enforcement_mode,
+                },
+            )
+        )
+
+    def emit_session_start(
+        self,
+        session_id: str,
+        agent_name: str,
+        packs: list[str],
+        enforcement_mode: str = "audit",
+    ) -> None:
+        """Convenience method to emit a session start event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.SESSION_START,
+                trace_id=session_id,
+                agent_name=agent_name,
+                data={
+                    "session_id": session_id,
+                    "packs": packs,
+                    "enforcement_mode": enforcement_mode,
+                },
+            )
+        )
+
+    def emit_session_end(
+        self,
+        session_id: str,
+        agent_name: str,
+        total_evaluations: int,
+        rules_matched: int,
+        rules_denied: int,
+    ) -> None:
+        """Convenience method to emit a session end event."""
+        self.emit(
+            AuditEvent(
+                event_type=EventType.SESSION_END,
+                trace_id=session_id,
+                agent_name=agent_name,
+                data={
+                    "session_id": session_id,
+                    "total_evaluations": total_evaluations,
+                    "rules_matched": rules_matched,
+                    "rules_denied": rules_denied,
+                },
+            )
+        )
+
+    def flush(self, timeout: float = 5.0) -> None:
+        """Flush all pending events and sinks.
+
+        In async mode, polls the queue until it drains or ``timeout``
+        seconds elapse, whichever comes first. ``queue.Queue.join`` has
+        no timeout argument — using it would block indefinitely on a
+        wedged sink, which defeats the bounded-shutdown contract that
+        :func:`_cleanup_audit_manager` relies on at process exit.
+
+        Args:
+            timeout: Maximum seconds to wait for queue to drain (default 5.0)
+        """
+        if self._async_mode:
+            import time
+
+            deadline = time.monotonic() + max(0.0, timeout)
+            poll_interval = min(0.05, timeout) if timeout > 0 else 0.0
+            while time.monotonic() < deadline:
+                try:
+                    if self._queue.unfinished_tasks == 0:
+                        break
+                except Exception:  # noqa: BLE001 - queue introspection is best-effort
+                    break
+                time.sleep(poll_interval)
+            else:
+                # Loop didn't break — drain timed out. Log so a wedged
+                # sink is surfaced rather than swallowed.
+                try:
+                    pending = self._queue.unfinished_tasks
+                except Exception:  # noqa: BLE001
+                    pending = -1
+                if pending:
+                    logger.warning(
+                        "Audit queue did not drain within %.2fs "
+                        "(unfinished tasks=%s); sink may be wedged",
+                        timeout, pending,
+                    )
+
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+        for sink in sinks:
+            try:
+                sink.flush()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to flush: %s", sink.name, e)
+
+    def close(self) -> None:
+        """Close all sinks and release resources.
+
+        Stops the background worker thread and drains any remaining events.
+        Shutdown is bounded: ``_shutdown`` is the primary signal the
+        worker polls; the sentinel ``None`` enqueue is best-effort. If
+        the queue is full and the worker is wedged on a slow sink,
+        ``put_nowait`` fails fast rather than hanging process exit.
+        """
+        if self._async_mode and self._worker_thread is not None:
+            # Signal shutdown first so the worker's next queue.get() loop
+            # iteration exits even if we can't enqueue the sentinel.
+            self._shutdown.set()
+            try:
+                self._queue.put_nowait(None)  # Wake up worker
+            except queue.Full:
+                # Queue saturated by a stuck sink; the worker will see
+                # _shutdown on its next loop iteration once whatever it's
+                # blocked on completes (or the 2s join timeout fires).
+                logger.debug(
+                    "Audit queue full at shutdown; relying on _shutdown signal"
+                )
+
+            # Wait for worker to finish (with timeout)
+            if self._worker_thread.is_alive():
+                self._worker_thread.join(timeout=2.0)
+
+            logger.debug("Background audit worker stopped")
+
+        with self._sinks_lock:
+            sinks = list(self._sinks)
+            self._sinks.clear()
+            self._sink_failures.clear()
+            self._tripped_sinks.clear()
+        for sink in sinks:
+            try:
+                sink.close()
+            except Exception as e:
+                logger.warning("Audit sink '%s' failed to close: %s", sink.name, e)
+
+    @property
+    def stats(self) -> dict[str, Any]:
+        """Get audit statistics."""
+        with self._sinks_lock:
+            sink_names = [s.name for s in self._sinks]
+            event_count = self._event_count
+            error_count = self._error_count
+            dropped_count = self._dropped_count
+        return {
+            "sinks": len(sink_names),
+            "sink_names": sink_names,
+            "events_emitted": event_count,
+            "events_queued": self._queue.qsize() if self._async_mode else 0,
+            "events_dropped": dropped_count,
+            "errors": error_count,
+            "async_mode": self._async_mode,
+        }
+
+
+# =============================================================================
+# Global Audit Manager
+# =============================================================================
+
+_audit_manager: AuditManager | None = None
+_atexit_registered = False
+
+
+def _cleanup_audit_manager() -> None:
+    """Cleanup handler called at process exit."""
+    global _audit_manager
+    if _audit_manager is not None:
+        try:
+            _audit_manager.flush(timeout=2.0)
+            _audit_manager.close()
+        except Exception:
+            pass
+
+
+def get_audit_manager() -> AuditManager:
+    """Get or create the global audit manager.
+
+    On first call, initializes sinks based on environment configuration.
+    The manager uses a background thread for async event processing.
+
+    Returns:
+        The global AuditManager instance
+    """
+    global _audit_manager, _atexit_registered
+
+    if _audit_manager is None:
+        # Check if async mode should be disabled (for testing or debugging)
+        async_mode = os.getenv("UIPATH_AUDIT_SYNC", "false").lower() != "true"
+        _audit_manager = AuditManager(async_mode=async_mode)
+        _configure_default_sinks(_audit_manager)
+
+        # Register cleanup handler
+        if not _atexit_registered:
+            atexit.register(_cleanup_audit_manager)
+            _atexit_registered = True
+
+    return _audit_manager
+
+
+def _configure_default_sinks(manager: AuditManager) -> None:
+    """Configure default sinks.
+
+    The traces sink (OpenTelemetry spans to the Orchestrator audit UI)
+    is **platform-mandated** and is always registered — no developer-side
+    env var can disable it. This preserves the principle that governance
+    is platform-owned and developers cannot bypass the audit trail.
+
+    The console sink is a developer aid for local debugging and is
+    opt-in via ``UIPATH_GOVERNANCE_CONSOLE_LOG=true``.
+    """
+    from .factory import create_sink
+
+    sink_names: list[str] = ["traces"]  # mandatory — platform-controlled
+
+    if os.getenv("UIPATH_GOVERNANCE_CONSOLE_LOG", "false").lower() == "true":
+        sink_names.append("console")
+
+    for sink_name in sink_names:
+        sink = create_sink(sink_name)
+        if sink:
+            manager.register_sink(sink)
+            logger.info("Audit sink registered: %s", sink_name)
+
+    logger.info("Governance audit sinks configured: %s", ", ".join(sink_names))
+
+
+def reset_audit_manager() -> None:
+    """Reset the global audit manager (for testing).
+
+    Flushes pending events and stops the background worker before resetting.
+    """
+    global _audit_manager
+    if _audit_manager:
+        try:
+            _audit_manager.flush(timeout=1.0)
+        except Exception:
+            pass
+        _audit_manager.close()
+    _audit_manager = None
diff --git a/src/uipath/runtime/governance/audit/console.py b/src/uipath/runtime/governance/audit/console.py
new file mode 100644
index 0000000..3d28a57
--- /dev/null
+++ b/src/uipath/runtime/governance/audit/console.py
@@ -0,0 +1,130 @@
+"""Console audit sink for human-readable output.
+
+This sink writes audit events to stderr in a human-readable format,
+useful for debugging and development.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+
+from .base import AuditEvent, AuditSink, EventType
+
+
+class ConsoleAuditSink(AuditSink):
+    """Audit sink that writes to console (stderr).
+
+    Useful for debugging and development. Output is human-readable.
+
+    Args:
+        verbose: If True, show all events. If False, only show matches.
+    """
+
+    def __init__(self, verbose: bool = False) -> None:
+        """Configure the sink's verbosity (verbose shows every event)."""
+        self._verbose = verbose
+
+    @property
+    def name(self) -> str:
+        """Constant sink identifier."""
+        return "console"
+
+    def accepts(self, event: AuditEvent) -> bool:
+        """Filter to matched rules and lifecycle events unless verbose."""
+        if self._verbose:
+            return True
+        # Only show matched rules and important events
+        if event.event_type == EventType.RULE_EVALUATION:
+            return event.data.get("matched", False)
+        return event.event_type in (
+            EventType.SESSION_START,
+            EventType.SESSION_END,
+            EventType.HOOK_END,
+            EventType.POLICY_VIOLATION,
+        )
+
+    def emit(self, event: AuditEvent) -> None:
+        """Write the event to stderr using the appropriate formatter."""
+        if event.event_type == EventType.RULE_EVALUATION:
+            self._emit_rule_evaluation(event)
+        elif event.event_type == EventType.HOOK_END:
+            self._emit_hook_summary(event)
+        elif event.event_type == EventType.SESSION_START:
+            self._emit_session_start(event)
+        elif event.event_type == EventType.SESSION_END:
+            self._emit_session_end(event)
+        else:
+            self._emit_generic(event)
+
+    def _emit_rule_evaluation(self, event: AuditEvent) -> None:
+        data = event.data
+        matched = data.get("matched", False)
+        status = "MATCHED" if matched else "PASS"
+        rule_id = data.get("rule_id", "?")
+        rule_name = data.get("rule_name", "?")
+        action = data.get("action", "?").upper()
+        detail = data.get("detail", "")
+
+        if matched:
+            print(
+                f"[GOVERNANCE] [{status}] {rule_id} | {rule_name} | "
+                f"action={action} | {detail}",
+                file=sys.stderr,
+                flush=True,
+            )
+        elif self._verbose:
+            print(
+                f"[GOVERNANCE] [{status}] {rule_id} | {rule_name}",
+                file=sys.stderr,
+                flush=True,
+            )
+
+    def _emit_hook_summary(self, event: AuditEvent) -> None:
+        data = event.data
+        hook = event.hook
+        total = data.get("total_rules", 0)
+        matched = data.get("matched_rules", 0)
+        action = data.get("final_action", "allow").upper()
+        mode = data.get("enforcement_mode", "audit")
+
+        if mode == "audit" and action == "DENY":
+            action = "AUDIT (would deny)"
+
+        print(
+            f"[GOVERNANCE] HOOK: {hook} | rules={total} | matched={matched} | "
+            f"action={action}",
+            file=sys.stderr,
+            flush=True,
+        )
+
+    def _emit_session_start(self, event: AuditEvent) -> None:
+        data = event.data
+        packs = data.get("packs", [])
+        mode = data.get("enforcement_mode", "audit")
+        print(
+            f"[GOVERNANCE] Session started | agent={event.agent_name} | "
+            f"packs={','.join(packs)} | mode={mode}",
+            file=sys.stderr,
+            flush=True,
+        )
+
+    def _emit_session_end(self, event: AuditEvent) -> None:
+        data = event.data
+        total = data.get("total_evaluations", 0)
+        matched = data.get("rules_matched", 0)
+        denied = data.get("rules_denied", 0)
+        print(
+            f"[GOVERNANCE] Session ended | evaluations={total} | "
+            f"matched={matched} | denied={denied}",
+            file=sys.stderr,
+            flush=True,
+        )
+
+    def _emit_generic(self, event: AuditEvent) -> None:
+        print(
+            f"[GOVERNANCE] {event.event_type} | {event.agent_name} | "
+            f"{json.dumps(event.data)}",
+            file=sys.stderr,
+            flush=True,
+        )
diff --git a/src/uipath/runtime/governance/audit/factory.py b/src/uipath/runtime/governance/audit/factory.py
new file mode 100644
index 0000000..1c8e248
--- /dev/null
+++ b/src/uipath/runtime/governance/audit/factory.py
@@ -0,0 +1,45 @@
+"""Factory function for creating audit sinks by name.
+
+This module provides the create_sink function used by the AuditManager
+to instantiate sinks based on environment configuration.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+
+from .base import AuditSink
+
+logger = logging.getLogger(__name__)
+
+
+def create_sink(name: str) -> AuditSink | None:
+    """Create an audit sink by name.
+
+    Args:
+        name: Name of the sink to create (``traces`` or ``console``).
+
+    Returns:
+        The created sink, or ``None`` if the name is unknown.
+
+    Supported sinks:
+        - ``traces``: OpenTelemetry spans for Orchestrator Traces UI
+        - ``console``: human-readable stderr output
+    """
+    name = name.lower()
+
+    if name == "traces":
+        from .traces import TracesAuditSink
+
+        return TracesAuditSink()
+
+    elif name == "console":
+        from .console import ConsoleAuditSink
+
+        verbose = os.getenv("UIPATH_AUDIT_VERBOSE", "false").lower() == "true"
+        return ConsoleAuditSink(verbose=verbose)
+
+    else:
+        logger.warning("Unknown audit sink: %s", name)
+        return None
diff --git a/src/uipath/runtime/governance/audit/traces.py b/src/uipath/runtime/governance/audit/traces.py
new file mode 100644
index 0000000..81de1e4
--- /dev/null
+++ b/src/uipath/runtime/governance/audit/traces.py
@@ -0,0 +1,268 @@
+"""OpenTelemetry traces audit sink for Orchestrator integration.
+
+This sink creates OpenTelemetry spans for governance events, which
+appear in the UiPath Orchestrator Traces UI for observability.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any
+
+from uipath.runtime.governance.native.backend_client import (
+    ENV_FOLDER_KEY,
+    ENV_JOB_KEY,
+    ENV_ORGANIZATION_ID,
+    ENV_TENANT_ID,
+    ENV_TRACE_ID,
+)
+
+from .base import AuditEvent, AuditSink, EventType
+
+logger = logging.getLogger(__name__)
+
+# Value for the ``type`` / ``span_type`` span attributes on every
+# governance span. Matches ``SpanType.AGENT_RUN`` in uipath-agents-python
+# — we use the string literal here (not a cross-package import) to keep
+# uipath-runtime free of a uipath-agents dependency. If the agents-side
+# registry adds new values, this constant is the single place to update.
+SPAN_TYPE_AGENT_RUN = "agentRun"
+
+# Identifies this auditor on every governance span. Lets a downstream
+# consumer distinguish traces emitted by the Python in-runtime governance
+# checker from those produced by the governance-server (or any future
+# language-specific governance SDK). Set as the ``source`` span
+# attribute on every governance trace span.
+GOVERNANCE_SOURCE = "governance-checker-python"
+
+
+class TracesAuditSink(AuditSink):
+    """Audit sink that creates OpenTelemetry spans.
+
+    Spans appear in UiPath Orchestrator Traces UI, providing structured
+    data for each governance evaluation.
+    """
+
+    def __init__(self) -> None:
+        """Initialize the sink with a deferred tracer and zero span count."""
+        self._tracer: Any = None  # Can be None, Tracer, or False
+        self._spans_created = 0
+
+    @property
+    def name(self) -> str:
+        """Constant sink identifier."""
+        return "traces"
+
+    def _get_tracer(self) -> Any:
+        """Get or create the OpenTelemetry tracer."""
+        if self._tracer is None:
+            try:
+                from opentelemetry import trace
+
+                self._tracer = trace.get_tracer("uipath.governance")
+                logger.info("OpenTelemetry tracer initialized for governance traces")
+            except ImportError:
+                # OpenTelemetry is supplied transitively by uipath-core; an
+                # ImportError here means the host install is broken or
+                # governance is running outside the UiPath SDK environment.
+                logger.warning(
+                    "OpenTelemetry not available - governance traces disabled. "
+                    "OTel is normally provided by uipath-core; reinstall the SDK."
+                )
+                self._tracer = False
+        return self._tracer if self._tracer else None
+
+    def _get_uipath_trace_id(self) -> str | None:
+        """Get the trace id from the environment."""
+        return os.environ.get(ENV_TRACE_ID)
+
+    def _get_uipath_context(self) -> dict[str, str]:
+        """Get UiPath context attributes from the environment."""
+        context = {}
+        organization_id = os.environ.get(ENV_ORGANIZATION_ID)
+        if organization_id:
+            context["uipath.organization_id"] = organization_id
+        tenant_id = os.environ.get(ENV_TENANT_ID)
+        if tenant_id:
+            context["uipath.tenant_id"] = tenant_id
+        folder_key = os.environ.get(ENV_FOLDER_KEY)
+        if folder_key:
+            context["uipath.folder_key"] = folder_key
+        job_key = os.environ.get(ENV_JOB_KEY)
+        if job_key:
+            context["uipath.job_key"] = job_key
+        return context
+
+    def emit(self, event: AuditEvent) -> None:
+        """Create a span for RULE_EVALUATION or HOOK_END events; drop others."""
+        if event.event_type == EventType.RULE_EVALUATION:
+            self._emit_rule_span(event)
+        elif event.event_type == EventType.HOOK_END:
+            self._emit_hook_span(event)
+
+    def _emit_hook_span(self, event: AuditEvent) -> None:
+        """Create a span for a hook summary (always emitted for each governance check)."""
+        tracer = self._get_tracer()
+        if tracer is None:
+            return
+
+        try:
+            from opentelemetry import context
+
+            data = event.data
+            hook = event.hook or "unknown"
+            span_name = f"governance.{hook.lower()}"
+
+            # Use the current OTel context if one is active; otherwise start a
+            # root span. A previous version fabricated a random parent
+            # span_id when only a trace_id was known, which produced orphan
+            # parents the backend could never resolve. The governance span
+            # now correctly appears as a child of whichever span is current
+            # (e.g. the runtime's root span) or as a fresh root.
+            ctx = context.get_current()
+            uipath_trace_id = event.trace_id or self._get_uipath_trace_id()
+
+            with tracer.start_as_current_span(span_name, context=ctx) as span:
+                # Required for Orchestrator Traces
+                span.set_attribute("type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN)
+                # Identifies which agent emitted this audit trace. Lets
+                # downstream consumers (Orchestrator Traces UI, audit
+                # dashboards) filter governance spans by producer when
+                # multiple SDKs / governance backends co-exist.
+                span.set_attribute("source", GOVERNANCE_SOURCE)
+                span.set_attribute("uipath.custom_instrumentation", True)
+                if uipath_trace_id:
+                    span.set_attribute("uipath.trace_id", uipath_trace_id)
+
+                # UiPath context
+                for key, value in self._get_uipath_context().items():
+                    span.set_attribute(key, value)
+
+                # Hook summary attributes
+                span.set_attribute("governance.hook", hook)
+                span.set_attribute("governance.total_rules", data.get("total_rules", 0))
+                span.set_attribute(
+                    "governance.matched_rules", data.get("matched_rules", 0)
+                )
+                span.set_attribute(
+                    "governance.final_action", data.get("final_action", "allow")
+                )
+                span.set_attribute(
+                    "governance.enforcement_mode", data.get("enforcement_mode", "audit")
+                )
+                span.set_attribute("governance.agent_name", event.agent_name)
+
+                # Hook spans are summary containers — they're left at
+                # Status.UNSET regardless of final_action. Severity is
+                # carried by the per-rule spans (see _emit_rule_span);
+                # marking the hook span as ERROR would falsely paint
+                # the entire lifecycle phase as failed when only a
+                # specific rule fired underneath.
+
+                self._spans_created += 1
+
+        except Exception as e:
+            logger.warning("Failed to create governance hook span: %s", e)
+
+    def _emit_rule_span(self, event: AuditEvent) -> None:
+        """Create a span for a rule evaluation."""
+        tracer = self._get_tracer()
+        if tracer is None:
+            return
+
+        try:
+            from opentelemetry import context
+
+            data = event.data
+            rule_id = data.get("rule_id", "unknown")
+            span_name = f"governance.rule.{rule_id}"
+
+            # See note in _emit_hook_span: rely on the current OTel context
+            # rather than fabricating a remote-parent span_id.
+            ctx = context.get_current()
+            uipath_trace_id = event.trace_id or self._get_uipath_trace_id()
+
+            with tracer.start_as_current_span(span_name, context=ctx) as span:
+                # Required for Orchestrator Traces
+                span.set_attribute("type", SPAN_TYPE_AGENT_RUN)
+                span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN)
+                # Identifies which agent emitted this audit trace. Lets
+                # downstream consumers (Orchestrator Traces UI, audit
+                # dashboards) filter governance spans by producer when
+                # multiple SDKs / governance backends co-exist.
+                span.set_attribute("source", GOVERNANCE_SOURCE)
+                span.set_attribute("uipath.custom_instrumentation", True)
+                if uipath_trace_id:
+                    span.set_attribute("uipath.trace_id", uipath_trace_id)
+
+                # UiPath context
+                for key, value in self._get_uipath_context().items():
+                    span.set_attribute(key, value)
+
+                # Governance attributes
+                span.set_attribute("governance.rule_id", rule_id)
+                span.set_attribute("governance.rule_name", data.get("rule_name", ""))
+                span.set_attribute("governance.pack_name", data.get("pack_name", ""))
+                span.set_attribute("governance.hook", event.hook)
+                span.set_attribute("governance.matched", data.get("matched", False))
+                span.set_attribute("governance.action", data.get("action", "allow"))
+                span.set_attribute("governance.status", data.get("status", "PASS"))
+                span.set_attribute("governance.agent_name", event.agent_name)
+
+                detail = data.get("detail", "")
+                if detail:
+                    span.set_attribute("governance.detail", detail[:500])
+
+                # Severity for matched non-allow rules is carried by the
+                # platform-standard ``verbosityLevel`` span field (UiPath
+                # Orchestrator log levels: 3=Warning, 4=Error). Default
+                # platform verbosity is 2 (Information), so we only set
+                # this attribute when there's a violation worth flagging.
+                #
+                # - Audit mode (and any audit-action rule even in
+                #   enforce mode): runtime did NOT block the agent →
+                #   verbosityLevel=3 (Warning), Status stays UNSET. The
+                #   agent's span shouldn't be marked failed just because
+                #   an advisory rule fired.
+                # - Enforce mode + deny / escalate: runtime actually
+                #   blocked → verbosityLevel=4 (Error) + Status.ERROR.
+                #   The agent span genuinely failed.
+                action_str = data.get("action", "allow").lower()
+                if data.get("matched") and action_str != "allow":
+                    from uipath.runtime.governance.config import (
+                        EnforcementMode,
+                        get_enforcement_mode,
+                    )
+
+                    mode = get_enforcement_mode()
+                    will_block = (
+                        mode == EnforcementMode.ENFORCE
+                        and action_str in {"deny", "escalate"}
+                    )
+                    span.set_attribute("verbosityLevel", 4 if will_block else 3)
+                    if will_block:
+                        try:
+                            from opentelemetry.trace import Status, StatusCode
+
+                            span.set_status(
+                                Status(
+                                    StatusCode.ERROR,
+                                    f"Policy violation: "
+                                    f"{data.get('rule_name', rule_id)} "
+                                    f"(action={action_str})",
+                                )
+                            )
+                        except ImportError:
+                            pass
+
+                self._spans_created += 1
+
+        except Exception as e:
+            logger.warning("Failed to create governance span: %s", e)
+
+    @property
+    def spans_created(self) -> int:
+        """Number of spans created."""
+        return self._spans_created
diff --git a/src/uipath/runtime/governance/delegation_guard.py b/src/uipath/runtime/governance/delegation_guard.py
new file mode 100644
index 0000000..18a4aa5
--- /dev/null
+++ b/src/uipath/runtime/governance/delegation_guard.py
@@ -0,0 +1,263 @@
+"""Delegation depth guard.
+
+Patches an agent's ``invoke`` method to track recursion depth and raise
+a ``GovernanceBlockException`` when the configured maximum is exceeded.
+This prevents runaway sub-agent chains.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import functools
+import logging
+import os
+from contextvars import ContextVar, Token
+from typing import Any
+
+from uipath.core.governance.exceptions import (
+    GovernanceBlockException,
+    GovernanceViolation,
+)
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_MAX_DELEGATION_DEPTH = 25
+_ENV_MAX_DELEGATION_DEPTH = "UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH"
+
+# Single module-level ContextVar holding per-agent delegation depths
+# keyed by ``id(agent)``. Each install / uninstall pair shares this one
+# ContextVar instead of allocating a new one per agent — the interpreter
+# interns ContextVars and never GCs them, so per-agent allocation was an
+# unbounded leak in long-running hosts (every `install_delegation_guard`
+# call permanently grew the interpreter's ContextVar registry).
+#
+# Per-context isolation (asyncio task / thread) still works the standard
+# ContextVar way: each context sees its own copy of the depths dict, and
+# nested invokes use ``set`` / ``reset`` for LIFO depth tracking. The
+# dict itself is copied on every increment (copy-on-write) so concurrent
+# contexts don't share state through a mutable mapping.
+_DELEGATION_DEPTHS: ContextVar[dict[int, int]] = ContextVar(
+    "_uipath_delegation_depths"
+)
+
+
+def _current_depth(agent_key: int) -> int:
+    """Return the current depth for ``agent_key`` in this context."""
+    try:
+        return _DELEGATION_DEPTHS.get().get(agent_key, 0)
+    except LookupError:
+        return 0
+
+
+def _enter_depth_if_under(
+    agent_key: int, max_depth: int
+) -> tuple[int, Token[dict[int, int]] | None]:
+    """Attempt to increment depth for ``agent_key``.
+
+    Returns ``(new_depth, token)`` where ``token`` is ``None`` if the
+    new depth would exceed ``max_depth`` — caller raises and does not
+    need to clean up. On success, caller must reset via ``token``.
+    """
+    try:
+        depths = _DELEGATION_DEPTHS.get()
+    except LookupError:
+        depths = {}
+    new_depth = depths.get(agent_key, 0) + 1
+    if new_depth > max_depth:
+        return new_depth, None
+    new_depths = dict(depths)
+    new_depths[agent_key] = new_depth
+    token = _DELEGATION_DEPTHS.set(new_depths)
+    return new_depth, token
+
+
+def _exit_depth(token: Token[dict[int, int]]) -> None:
+    """Undo a successful :func:`_enter_depth_if_under` call.
+
+    Tolerates cross-context resets (token created in a different
+    context — happens when a child task awaits an agent invoke) by
+    accepting the leak rather than crashing the agent on dispose.
+    """
+    try:
+        _DELEGATION_DEPTHS.reset(token)
+    except (ValueError, LookupError):
+        logger.debug("Delegation depth reset from foreign context")
+
+
+def _resolve_max_depth() -> int:
+    """Read max-depth from env at install time, falling back to default on parse error.
+
+    Called once from :func:`install_delegation_guard`; the resolved value is
+    captured per agent (``resolved_max``), so changing the env var after the
+    guard is installed has no effect on already-wrapped agents.
+    """
+    raw = os.getenv(_ENV_MAX_DELEGATION_DEPTH)
+    if raw is None:
+        return _DEFAULT_MAX_DELEGATION_DEPTH
+    try:
+        return int(raw)
+    except ValueError:
+        logger.warning(
+            "Invalid %s=%r; using default %d",
+            _ENV_MAX_DELEGATION_DEPTH,
+            raw,
+            _DEFAULT_MAX_DELEGATION_DEPTH,
+        )
+        return _DEFAULT_MAX_DELEGATION_DEPTH
+
+
+def _build_violation(current: int, resolved_max: int) -> GovernanceBlockException:
+    """Build the depth-exceeded exception (shared by sync and async guards)."""
+    return GovernanceBlockException.from_violation(
+        GovernanceViolation(
+            rule_id="ASI-02",
+            rule_name="Excessive Agency",
+            detail=f"Delegation depth {current} exceeds max {resolved_max}",
+        )
+    )
+
+
+def _wrap_invoke(original: Any, agent_key: int, resolved_max: int) -> Any:
+    """Return a depth-guarded wrapper matching the sync/async shape of ``original``.
+
+    Coroutine functions get an ``async def`` wrapper so the returned object
+    is itself an awaitable — wrapping with a sync function would return an
+    un-awaited coroutine and silently bypass the guard entirely.
+
+    Depth lives in the module-level :data:`_DELEGATION_DEPTHS` ContextVar
+    keyed by ``agent_key`` (``id(agent)``), so every guarded agent shares
+    the same ContextVar instance and the interpreter's ContextVar
+    registry doesn't grow with each install.
+    """
+    if asyncio.iscoroutinefunction(original):
+
+        @functools.wraps(original)
+        async def _guarded_async(input_data: Any, **kwargs: Any) -> Any:
+            current, token = _enter_depth_if_under(agent_key, resolved_max)
+            if token is None:
+                raise _build_violation(current, resolved_max)
+            try:
+                return await original(input_data, **kwargs)
+            finally:
+                _exit_depth(token)
+
+        return _guarded_async
+
+    @functools.wraps(original)
+    def _guarded_sync(input_data: Any, **kwargs: Any) -> Any:
+        current, token = _enter_depth_if_under(agent_key, resolved_max)
+        if token is None:
+            raise _build_violation(current, resolved_max)
+        try:
+            return original(input_data, **kwargs)
+        finally:
+            _exit_depth(token)
+
+    return _guarded_sync
+
+
+# Method names we guard on the agent. ``ainvoke`` is required because
+# LangChain / LangGraph / LlamaIndex agents expose it as the primary
+# async entrypoint; wrapping only ``invoke`` would let async callers
+# bypass the depth check entirely. A single ContextVar is shared across
+# both so an async call that internally falls through to sync ``invoke``
+# still increments the same counter.
+_GUARDED_METHODS = ("invoke", "ainvoke")
+
+
+def install_delegation_guard(agent: Any, max_depth: int | None = None) -> None:
+    """Patch the agent's invoke methods to enforce a maximum delegation depth.
+
+    Patches both ``invoke`` and ``ainvoke`` when present; each wrapper
+    matches the sync/async shape of the original so awaitables stay
+    awaitable. No-op when neither attribute exists or the agent has
+    already been guarded.
+
+    Per-call-chain depth is tracked in a single :class:`contextvars.ContextVar`
+    shared across both methods so an ``ainvoke`` that internally calls
+    ``invoke`` still increments the same counter. Concurrent invokes on
+    the same agent (across threads or asyncio tasks) keep separate
+    counters because ContextVar values are per-context.
+
+    Originals are stashed on the agent under
+    ``_uipath_original_<method>`` so :func:`uninstall_delegation_guard`
+    can restore them on dispose.
+    """
+    if max_depth is None:
+        max_depth = _resolve_max_depth()
+    if getattr(agent, "_delegation_wrapped", False):
+        return
+
+    originals = {
+        name: getattr(agent, name, None)
+        for name in _GUARDED_METHODS
+        if callable(getattr(agent, name, None))
+    }
+    if not originals:
+        return
+
+    agent_key = id(agent)
+    resolved_max = max_depth
+
+    patched: list[str] = []
+    for name, original in originals.items():
+        try:
+            setattr(agent, name, _wrap_invoke(original, agent_key, resolved_max))
+            setattr(agent, f"_uipath_original_{name}", original)
+            patched.append(name)
+        except (AttributeError, TypeError) as exc:
+            # Some agent objects expose `invoke` via __getattr__ or via a
+            # slot/descriptor that can't be re-assigned. Skip those —
+            # better to guard partial coverage than to crash the runtime.
+            logger.debug("Could not patch %s on agent: %s", name, exc)
+
+    if not patched:
+        # Nothing was actually wrapped — don't mark the agent as guarded,
+        # or a later retry / uninstall would wrongly assume methods were
+        # patched.
+        logger.debug("Delegation guard patched no methods; leaving agent unguarded")
+        return
+
+    agent._delegation_wrapped = True
+    logger.debug(
+        "Delegation guard installed (max=%d, methods=%s)",
+        resolved_max,
+        patched,
+    )
+
+
+def uninstall_delegation_guard(agent: Any) -> None:
+    """Restore the agent's invoke methods if a delegation guard was installed.
+
+    Safe to call on agents that were never guarded. Also clears the
+    agent's entry from the current context's depth map — ``id(agent)``
+    is reused by Python after GC, so a stale entry could mis-attribute
+    a future agent's count to this one.
+    """
+    if not getattr(agent, "_delegation_wrapped", False):
+        return
+    for name in _GUARDED_METHODS:
+        attr = f"_uipath_original_{name}"
+        original = getattr(agent, attr, None)
+        if original is not None:
+            try:
+                setattr(agent, name, original)
+            except Exception as exc:  # noqa: BLE001 - dispose path; never raise
+                logger.debug("Could not restore original %s: %s", name, exc)
+        try:
+            delattr(agent, attr)
+        except AttributeError:
+            pass
+    agent._delegation_wrapped = False
+    # Drop the agent's depth entry in the current context. Best-effort
+    # — if dispose runs from a different context than where the depth
+    # was set, the foreign context still owns its own copy and will
+    # discard it when it ends.
+    agent_key = id(agent)
+    try:
+        depths = _DELEGATION_DEPTHS.get()
+    except LookupError:
+        return
+    if agent_key in depths:
+        new_depths = {k: v for k, v in depths.items() if k != agent_key}
+        _DELEGATION_DEPTHS.set(new_depths)
diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py
new file mode 100644
index 0000000..c7671b6
--- /dev/null
+++ b/src/uipath/runtime/governance/native/__init__.py
@@ -0,0 +1,51 @@
+"""Native UiPath governance policy evaluator.
+
+YAML-defined rules evaluated in-process at each agent lifecycle hook.
+Reads policies from the UiPath governance backend
+(``GET /api/v1/policy``) at startup and runs the deterministic
+detectors backing ISO 42001 controls.
+
+This subpackage owns:
+
+- :class:`GovernanceEvaluator` – the evaluator implementation.
+- The native policy model: :class:`Rule`, :class:`Check`,
+  :class:`Condition`, :class:`PolicyIndex`.
+- Policy fetch + YAML compilation plumbing.
+
+Shared output types (``Action``, ``AuditRecord``, …) live in
+:mod:`uipath.core.governance`.
+"""
+
+from .evaluator import GovernanceEvaluator
+from .loader import (
+    get_policy_index,
+    load_policy_index,
+    prefetch_policy_index,
+    reset_policy_index,
+)
+from .models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+    Severity,
+)
+
+__all__ = [
+    "GovernanceEvaluator",
+    # Loader
+    "get_policy_index",
+    "load_policy_index",
+    "prefetch_policy_index",
+    "reset_policy_index",
+    # Native policy model
+    "Check",
+    "CheckContext",
+    "Condition",
+    "PolicyIndex",
+    "PolicyPack",
+    "Rule",
+    "Severity",
+]
diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py
new file mode 100644
index 0000000..80f8394
--- /dev/null
+++ b/src/uipath/runtime/governance/native/evaluator.py
@@ -0,0 +1,1083 @@
+"""Governance rule evaluator."""
+
+from __future__ import annotations
+
+import logging
+import math
+import re
+from collections import Counter
+from datetime import datetime, timezone
+from functools import lru_cache
+from typing import Any
+
+from uipath.core.governance.exceptions import GovernanceBlockException
+from uipath.core.governance.models import (
+    Action,
+    AuditRecord,
+    LifecycleHook,
+    RuleEvaluation,
+)
+
+from uipath.runtime.governance.audit import get_audit_manager
+from uipath.runtime.governance.config import EnforcementMode, get_enforcement_mode
+from uipath.runtime.governance.native.guardrail_compensation import (
+    disabled_guardrails,
+    submit_compensation,
+)
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    Rule,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]:
+    """Build the ``data`` payload for the /runtime/govern compensating call.
+
+    The server runs the guardrail check against the same content the
+    evaluator was looking at — so we forward whichever
+    :class:`CheckContext` field is populated for the active hook. Fields
+    not relevant to the hook are omitted to keep the payload tight.
+    """
+    if context.hook in (LifecycleHook.BEFORE_AGENT,):
+        return {"content": context.agent_input}
+    if context.hook in (LifecycleHook.AFTER_AGENT,):
+        return {"content": context.agent_output}
+    if context.hook in (LifecycleHook.BEFORE_MODEL,):
+        payload: dict[str, Any] = {"content": context.model_input}
+        if context.messages:
+            payload["messages"] = context.messages
+        return payload
+    if context.hook in (LifecycleHook.AFTER_MODEL,):
+        return {"content": context.model_output}
+    if context.hook in (LifecycleHook.TOOL_CALL,):
+        return {"tool_name": context.tool_name, "tool_args": context.tool_args}
+    if context.hook in (LifecycleHook.AFTER_TOOL,):
+        return {"tool_name": context.tool_name, "tool_result": context.tool_result}
+    # Memory-write and unknown hooks: pass an empty content so the
+    # server still receives a structurally-valid payload.
+    return {"content": ""}
+
+
+@lru_cache(maxsize=256)
+def _compile_regex(pattern: str) -> re.Pattern[str] | None:
+    """Compile and cache a regex pattern.
+
+    Args:
+        pattern: The regex pattern string
+
+    Returns:
+        Compiled pattern or None if invalid
+    """
+    try:
+        return re.compile(pattern)
+    except re.error as e:
+        logger.warning("Invalid regex pattern '%s': %s", pattern, e)
+        return None
+
+
+# --- vaderSentiment: lazy-imported singleton ---
+# Hard dependency, but lazy-loaded to keep import-time cost off the
+# critical path. The except branch is defence against a corrupted
+# install (file present in METADATA but module unimportable) — the
+# operator no-ops rather than crashing the agent.
+_VADER_UNINITIALIZED = object()
+_vader_analyzer: Any = _VADER_UNINITIALIZED
+
+
+def _get_vader_analyzer() -> Any:
+    """Return a cached SentimentIntensityAnalyzer, or None if unavailable."""
+    global _vader_analyzer
+    if _vader_analyzer is _VADER_UNINITIALIZED:
+        try:
+            from vaderSentiment.vaderSentiment import (  # type: ignore[import-untyped]
+                SentimentIntensityAnalyzer,
+            )
+
+            _vader_analyzer = SentimentIntensityAnalyzer()
+        except ImportError:
+            logger.error(
+                "vaderSentiment failed to import despite being a hard dependency; "
+                "sentiment_concern checks will not fire. Reinstall uipath-core."
+            )
+            _vader_analyzer = None
+    return _vader_analyzer
+
+
+# --- chardet: lazy-imported module for encoding integrity (A.7.4) ---
+# Hard dependency, lazy-loaded for symmetry with the other library
+# wrappers. The except branch covers corrupted installs only.
+_CHARDET_UNINITIALIZED = object()
+_chardet_module: Any = _CHARDET_UNINITIALIZED
+
+
+def _get_chardet() -> Any:
+    """Return the chardet module, or None if unavailable."""
+    global _chardet_module
+    if _chardet_module is _CHARDET_UNINITIALIZED:
+        try:
+            import chardet
+
+            _chardet_module = chardet
+        except ImportError:
+            logger.error(
+                "chardet failed to import despite being a hard dependency; "
+                "encoding_concern confidence check will not fire (stdlib "
+                "signals still apply). Reinstall uipath-core."
+            )
+            _chardet_module = None
+    return _chardet_module
+
+
+# --- Static patterns for encoding_concern (A.7.4) ---
+# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when
+# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252.
+_MOJIBAKE_BIGRAMS: tuple[str, ...] = (
+    "Ã©",
+    "Ã¨",
+    "Ã¢",
+    "Ã ",
+    "Ã¹",
+    "Ã®",
+    "Ã´",
+    "Ã§",  # accented vowels
+    "Ã„",
+    "Ã–",
+    "Ãœ",
+    "ÃŸ",  # German umlauts / eszett
+    "â€™",
+    "â€œ",
+    "â€\x9d",
+    "â€“",
+    "â€”",
+    "â€¢",  # smart quotes / dashes
+    "Â£",
+    "Â°",
+    "Â§",
+    "Â¶",
+    "Â©",
+    "Â®",  # NBSP-leading symbols
+    "ï¿",
+    "¿½",  # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1)
+    "ï»",
+    "»¿",  # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1)
+)
+
+# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw
+# bytes leaked through a string layer rather than being decoded.
+_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}")
+
+
+# --- Static patterns for incident_concern (A.8.4) ---
+# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape
+# (categorical types over stack/status), but for string payloads from
+# model output / tool result rather than exception objects.
+_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = {
+    "safety_refusal": [
+        re.compile(
+            r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)"
+            r"\s+(?:help|assist|provide|answer|do\s+that))\b"
+        ),
+        re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"),
+        re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"),
+    ],
+    "tool_failure": [
+        re.compile(
+            r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)"
+        ),
+        re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"),
+        re.compile(r"(?i)\b(timed?\s*out|timeout)\b"),
+    ],
+    "auth_failure": [
+        re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"),
+        re.compile(
+            r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b"
+        ),
+    ],
+    "quota_exceeded": [
+        re.compile(r"\b(429)\b"),
+        re.compile(
+            r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b"
+        ),
+    ],
+    "hallucination": [
+        re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"),
+        re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"),
+    ],
+}
+
+# --- Static patterns for commitment_concern (A.10.4) ---
+# Commitment-language signals. The verb pattern covers both first-person
+# promise verbs ("we will refund") and formal-business commitment markers
+# common in proposal / SOW outputs ("Cost: $X", "fixed scope",
+# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and
+# deadline signals combine via OR semantics — see
+# :meth:`_check_commitment_concern`.
+_COMMITMENT_VERB_PATTERN = re.compile(
+    r"(?i)("
+    # First-person promise / liability verbs
+    r"\brefund\b|\breimburse\b|"
+    r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|"
+    r"\bsla\b|"
+    r"\bwaive[d]?\b|"
+    r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|"
+    r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|"
+    r"handover|hand\s+over|ship)\b|"
+    # Proposal / SOW commitment markers
+    r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|"
+    r"\bcost\s*:\s*\$?\d|"
+    r"\bquote\s*:\s*\$?\d|"
+    r"\bdeliverables?\b|"
+    r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|"
+    r"\bI\s+propose\b"
+    r")"
+)
+# Currency-anchored amount detection. Requires a currency marker adjacent
+# to the number so URL fragments (e.g. ``/667851``) don't false-positive.
+# Covers symbol-then-number ($780) and number-then-code (780 USD).
+#
+# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched
+# here — they fire on benign status / progress text ("75% complete",
+# "99.9% uptime") under OR semantics. Real percentage-bearing
+# commitments ("we'll give you a 20% discount", "refund 100%") still
+# fire via the verb pattern.
+_COMMITMENT_AMOUNT_FALLBACK = re.compile(
+    r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?"
+    r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|"
+    r"dollars?|euros?|pounds?|yen|rupees?)\b"
+)
+_COMMITMENT_DEADLINE_PATTERN = re.compile(
+    r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b"
+    r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b"
+)
+
+
+class GovernanceEvaluator:
+    """Evaluates governance rules against check contexts.
+
+    Supports two enforcement modes:
+    - AUDIT: Log all violations but never block (DENY becomes AUDIT in final action)
+    - ENFORCE: Actually block on DENY rules
+
+    Default mode is AUDIT for safety.
+    """
+
+    def __init__(
+        self,
+        policy_index: PolicyIndex,
+        mode: EnforcementMode | None = None,
+    ) -> None:
+        """Initialize with a compiled policy index and optional mode override."""
+        self._policy_index = policy_index
+        self._mode = mode
+
+    @property
+    def policy_index(self) -> PolicyIndex:
+        """Return the compiled policy index this evaluator runs against."""
+        return self._policy_index
+
+    @property
+    def mode(self) -> EnforcementMode:
+        """Get the enforcement mode (uses config default if not set)."""
+        if self._mode is not None:
+            return self._mode
+        return get_enforcement_mode()
+
+    @mode.setter
+    def mode(self, value: EnforcementMode) -> None:
+        """Set the enforcement mode."""
+        self._mode = value
+
+    def is_audit_mode(self) -> bool:
+        """Check if running in audit-only mode."""
+        return self.mode == EnforcementMode.AUDIT
+
+    def is_enforce_mode(self) -> bool:
+        """Check if running in enforce mode (will block on DENY)."""
+        return self.mode == EnforcementMode.ENFORCE
+
+    def evaluate(self, context: CheckContext) -> AuditRecord:
+        """Evaluate rules registered for ``context.hook`` against the context.
+
+        Only rules whose ``hook`` field matches the current lifecycle hook
+        are evaluated — a ``tool_call`` rule does not fire on
+        ``before_model``, and vice versa. This avoids running checks
+        against fields the context cannot provide and keeps the audit
+        stream scoped to the active phase.
+
+        The final action depends on the enforcement mode:
+        - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted.
+        - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block)
+        - ENFORCE mode: DENY rules result in DENY action AND a
+          :class:`GovernanceBlockException` is raised.
+
+        Audit events (per-rule + hook summary) are emitted via the
+        global :func:`get_audit_manager` so callers do not need to do
+        any emission themselves.
+
+        Args:
+            context: The check context with hook and content
+
+        Returns:
+            AuditRecord with all evaluations and final action.
+
+        Raises:
+            GovernanceBlockException: In ENFORCE mode when a DENY rule matches.
+        """
+        mode = self.mode
+        if mode == EnforcementMode.DISABLED:
+            return AuditRecord(
+                timestamp=datetime.now(timezone.utc),
+                agent_name=context.agent_name,
+                runtime_id=context.runtime_id,
+                trace_id=context.trace_id,
+                hook=context.hook,
+                evaluations=[],
+                final_action=Action.ALLOW,
+                metadata={**context.metadata, "enforcement_mode": mode.value},
+            )
+
+        rules = self._policy_index.get_rules_for_hook(context.hook)
+
+        evaluations: list[RuleEvaluation] = []
+        raw_action = Action.ALLOW  # The action before mode adjustment
+        deny_would_fire = False  # Track if DENY would have fired
+
+        for rule in rules:
+            if not rule.enabled:
+                continue
+
+            evaluation = self._evaluate_rule(rule, context)
+            evaluations.append(evaluation)
+
+            if evaluation.matched:
+                # Take the most restrictive action. Use evaluation.action
+                # (which already folds in per-check overrides), not
+                # rule.action, so check-level overrides are honored here too.
+                eval_action = evaluation.action
+                if eval_action == Action.DENY:
+                    raw_action = Action.DENY
+                    deny_would_fire = True
+                elif eval_action == Action.ESCALATE and raw_action != Action.DENY:
+                    raw_action = Action.ESCALATE
+                elif eval_action == Action.AUDIT and raw_action == Action.ALLOW:
+                    raw_action = Action.AUDIT
+
+        # Apply enforcement mode
+        final_action = self._apply_enforcement_mode(raw_action)
+
+        # Build metadata with mode info
+        record_metadata = dict(context.metadata)
+        record_metadata["enforcement_mode"] = mode.value
+        if deny_would_fire and self.is_audit_mode():
+            record_metadata["audit_mode_would_deny"] = True
+
+        audit = AuditRecord(
+            timestamp=datetime.now(timezone.utc),
+            agent_name=context.agent_name,
+            runtime_id=context.runtime_id,
+            trace_id=context.trace_id,
+            hook=context.hook,
+            evaluations=evaluations,
+            final_action=final_action,
+            metadata=record_metadata,
+        )
+
+        self._emit_audit(audit, mode)
+
+        # For any guardrail mapped to UiPath but currently disabled, hand
+        # the disabled guardrails to the governance-server's
+        # /runtime/govern endpoint. The SERVER runs the guardrail check
+        # AND writes the trace (the payload carries traceId / src_timestamp
+        # / hook / agent so it can correlate) — the agent does NOT emit a
+        # trace itself, to avoid double-writing. Fire-and-forget on a
+        # daemon thread so a slow or unreachable endpoint never blocks
+        # the agent.
+        self._dispatch_compensation(audit, context)
+
+        if final_action == Action.DENY:
+            raise GovernanceBlockException.from_audit_record(audit)
+
+        return audit
+
+    def _dispatch_compensation(
+        self, audit: AuditRecord, context: CheckContext
+    ) -> None:
+        """Schedule compensating governance for any matched fallback rules.
+
+        Hands the call to the bounded background pool in
+        :func:`uipath.runtime.governance.native.guardrail_compensation.submit_compensation`.
+        That helper owns concurrency, queue caps, exception isolation,
+        and graceful process-exit cancellation — this method just
+        builds the payload, logs the summary, and submits.
+        """
+        try:
+            disabled = disabled_guardrails(audit, self._policy_index)
+            if not disabled:
+                return
+
+            validators = [rule["validator"] for rule in disabled]
+
+            # Surface the disabled-guardrail fire-up: how many rules
+            # triggered the compensating call, and which validators
+            # they map to (e.g. pii_detection / prompt_injection /
+            # harmful_content). One line per dispatch so an operator
+            # can see the volume + breakdown at a glance.
+            logger.info(
+                "Compensating governance triggered: hook=%s, count=%d, validators=[%s]",
+                audit.hook.value,
+                len(disabled),
+                ", ".join(validators),
+            )
+
+            submit_compensation(
+                rules=disabled,
+                data=_compensation_data_for_hook(context),
+                hook=audit.hook.value,
+                trace_id=audit.trace_id,
+                src_timestamp=audit.timestamp.isoformat(),
+                agent_name=audit.agent_name,
+                runtime_id=audit.runtime_id,
+            )
+        except Exception as exc:  # noqa: BLE001 - fail-open
+            logger.warning(
+                "Failed to dispatch compensating governance call: %s", exc
+            )
+
+    def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None:
+        """Emit per-rule and hook-summary events to the global audit manager.
+
+        Failure-isolated: audit-sink errors must never break evaluation.
+        Sink-level circuit breaking is handled inside :class:`AuditManager`.
+        """
+        try:
+            manager = get_audit_manager()
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.debug("Audit manager unavailable; skipping emission: %s", exc)
+            return
+
+        hook_name = audit.hook.name
+
+        # ``guardrail_fallback`` rules are server-traced: the agent POSTs
+        # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and
+        # the governance-server emits the audit event with the actual
+        # validator verdict. Emitting a Python-side ``rule_evaluation``
+        # event here would produce a duplicate trace carrying no
+        # verdict, so filter these rules out of every event the Python
+        # evaluator emits (per-rule AND the hook summary's counts).
+        emittable = [
+            ev for ev in audit.evaluations
+            if not self._is_guardrail_fallback_rule(ev.rule_id)
+        ]
+
+        for evaluation in emittable:
+            manager.emit_rule_evaluation(
+                rule_id=evaluation.rule_id,
+                rule_name=evaluation.rule_name,
+                pack_name=evaluation.pack_name,
+                hook=hook_name,
+                matched=evaluation.matched,
+                action=evaluation.action.value if evaluation.matched else "allow",
+                detail=evaluation.detail,
+                agent_name=audit.agent_name,
+                trace_id=audit.trace_id,
+                description=evaluation.description,
+            )
+
+        manager.emit_hook_summary(
+            hook=hook_name,
+            agent_name=audit.agent_name,
+            total_rules=len(emittable),
+            matched_rules=sum(1 for ev in emittable if ev.matched),
+            final_action=audit.final_action.value,
+            trace_id=audit.trace_id,
+            enforcement_mode=mode.value,
+        )
+
+    def _is_guardrail_fallback_rule(self, rule_id: str) -> bool:
+        """Return True if the rule is a UiPath-compensating fallback rule.
+
+        Such rules carry a ``guardrail_fallback`` condition; their audit
+        trace is emitted by the governance-server in response to the
+        ``/runtime/govern`` POST, so the Python evaluator must not emit
+        a duplicate trace for them.
+        """
+        rule = self._policy_index.get_rule(rule_id)
+        if rule is None:
+            return False
+        for check in rule.checks:
+            for cond in check.conditions:
+                if cond.operator == "guardrail_fallback":
+                    return True
+        return False
+
+    def _apply_enforcement_mode(self, raw_action: Action) -> Action:
+        """Apply enforcement mode to the raw action.
+
+        In AUDIT mode:
+        - DENY becomes AUDIT (log but don't block)
+        - ESCALATE becomes AUDIT (log but don't escalate)
+        - AUDIT stays AUDIT
+        - ALLOW stays ALLOW
+
+        In ENFORCE mode:
+        - All actions pass through unchanged
+        """
+        if self.mode == EnforcementMode.AUDIT:
+            if raw_action in (Action.DENY, Action.ESCALATE):
+                return Action.AUDIT
+        return raw_action
+
+    def evaluate_before_agent(
+        self,
+        agent_input: str,
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        model_name: str = "",
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate BEFORE_AGENT rules."""
+        context = CheckContext(
+            hook=LifecycleHook.BEFORE_AGENT,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            agent_input=agent_input,
+            model_name=model_name,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_agent(
+        self,
+        agent_output: str,
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_AGENT rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_AGENT,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            agent_output=agent_output,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_before_model(
+        self,
+        model_input: str,
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        messages: list[dict[str, Any]] | None = None,
+        model_name: str = "",
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate BEFORE_MODEL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.BEFORE_MODEL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            model_input=model_input,
+            model_name=model_name,
+            messages=messages or [],
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_model(
+        self,
+        model_output: str,
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_MODEL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_MODEL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            model_output=model_output,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_tool_call(
+        self,
+        tool_name: str,
+        tool_args: dict[str, Any],
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        session_state: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate TOOL_CALL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.TOOL_CALL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            tool_name=tool_name,
+            tool_args=tool_args,
+            session_state=session_state or {},
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def evaluate_after_tool(
+        self,
+        tool_name: str,
+        tool_result: str,
+        agent_name: str,
+        runtime_id: str,
+        trace_id: str,
+        **kwargs: Any,
+    ) -> AuditRecord:
+        """Evaluate AFTER_TOOL rules."""
+        context = CheckContext(
+            hook=LifecycleHook.AFTER_TOOL,
+            agent_name=agent_name,
+            runtime_id=runtime_id,
+            trace_id=trace_id,
+            tool_name=tool_name,
+            tool_result=tool_result,
+            metadata=kwargs.get("metadata", {}),
+        )
+        return self.evaluate(context)
+
+    def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation:
+        """Evaluate a single rule against the context."""
+        if not rule.checks:
+            # No checks = always matches (for audit-only rules)
+            return RuleEvaluation(
+                rule_id=rule.rule_id,
+                rule_name=rule.name,
+                matched=True,
+                detail="Rule has no conditions (always matches)",
+                pack_name=rule.pack_name,
+                action=rule.action,
+                description=rule.description,
+            )
+
+        check_results: list[dict[str, Any]] = []
+        any_check_matched = False
+        # Resolve the rule's action from the MATCHED checks so per-check
+        # `action` overrides take effect. ``Check.action`` defaults to the
+        # rule's action (see _yaml_to_index), so for rules without an
+        # override this equals ``rule.action`` exactly. Take the most
+        # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW),
+        # mirroring evaluate()'s cross-rule aggregation.
+        matched_action = Action.ALLOW
+
+        for check in rule.checks:
+            matched, detail = self._evaluate_check(check, context)
+            check_results.append(
+                {
+                    "matched": matched,
+                    "detail": detail,
+                    "action": check.action.value,
+                }
+            )
+            if matched:
+                any_check_matched = True
+                if check.action == Action.DENY:
+                    matched_action = Action.DENY
+                elif (
+                    check.action == Action.ESCALATE
+                    and matched_action != Action.DENY
+                ):
+                    matched_action = Action.ESCALATE
+                elif (
+                    check.action == Action.AUDIT
+                    and matched_action == Action.ALLOW
+                ):
+                    matched_action = Action.AUDIT
+
+        # Surface the FIRST matched check's message; falls back to the
+        # first check's detail (empty string when none matched) for
+        # backward compatibility with rules that have a single check.
+        first_matched_detail = next(
+            (cr["detail"] for cr in check_results if cr["matched"]),
+            check_results[0]["detail"] if check_results else "",
+        )
+
+        return RuleEvaluation(
+            rule_id=rule.rule_id,
+            rule_name=rule.name,
+            matched=any_check_matched,
+            detail=first_matched_detail,
+            pack_name=rule.pack_name,
+            action=matched_action if any_check_matched else Action.ALLOW,
+            description=rule.description,
+            check_results=check_results,
+        )
+
+    def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]:
+        """Evaluate a single check against the context."""
+        if not check.conditions:
+            return True, "No conditions (always matches)"
+
+        results = []
+        for condition in check.conditions:
+            matched = self._evaluate_condition(condition, context)
+            results.append(matched)
+
+        if check.logic == "any":
+            final_match = any(results)
+        else:  # "all" is default
+            final_match = all(results)
+
+        detail = check.message if final_match else ""
+        return final_match, detail
+
+    def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool:
+        """Evaluate a single condition against the context."""
+        field_value = self._get_field_value(condition.field, context)
+        result = self._apply_operator(condition.operator, field_value, condition.value)
+
+        if condition.negate:
+            result = not result
+
+        return result
+
+    def _get_field_value(self, field: str, context: CheckContext) -> Any:
+        """Get a field value from the context."""
+        parts = field.split(".")
+
+        # Start with context
+        value: Any = context
+
+        for part in parts:
+            if hasattr(value, part):
+                value = getattr(value, part)
+            elif isinstance(value, dict) and part in value:
+                value = value[part]
+            else:
+                return None
+
+        return value
+
+    def _apply_operator(
+        self, operator: str, field_value: Any, check_value: Any
+    ) -> bool:
+        """Apply an operator to compare field value against check value."""
+        # Handle existence checks before the None check
+        if operator == "exists":
+            return field_value is not None
+        if operator == "not_exists":
+            return field_value is None
+
+        # guardrail_fallback fires only when the guardrail is mapped to
+        # UiPath but its policy is disabled. Config travels in
+        # ``check_value``; the rule's ``field`` is unused (so
+        # ``field_value`` is ``None`` here, which is expected — we must
+        # special-case this before the generic ``None`` short-circuit
+        # below).
+        if operator == "guardrail_fallback":
+            cfg = check_value if isinstance(check_value, dict) else {}
+            return bool(cfg.get("mapped_to_uipath", False)) and not bool(
+                cfg.get("policy_enabled", True)
+            )
+
+        if field_value is None:
+            return False
+
+        # Numeric operators don't need stringification — short-circuit
+        # before `str(field_value)` (expensive for dict / large payloads).
+        if operator in ("gt", "gte", "lt", "lte"):
+            try:
+                lhs = float(field_value)
+                rhs = float(check_value)
+            except (ValueError, TypeError):
+                return False
+            if operator == "gt":
+                return lhs > rhs
+            if operator == "gte":
+                return lhs >= rhs
+            if operator == "lt":
+                return lhs < rhs
+            return lhs <= rhs
+
+        field_str = str(field_value)
+
+        match operator:
+            case "equals" | "eq":
+                return field_str == str(check_value)
+
+            case "not_equals" | "ne":
+                return field_str != str(check_value)
+
+            case "contains":
+                return str(check_value).lower() in field_str.lower()
+
+            case "not_contains":
+                return str(check_value).lower() not in field_str.lower()
+
+            case "regex" | "matches":
+                compiled = _compile_regex(str(check_value))
+                if compiled is None:
+                    return False
+                return bool(compiled.search(field_str))
+
+            case "in_list":
+                if isinstance(check_value, list):
+                    return field_str in check_value
+                return False
+
+            case "not_in_list":
+                if isinstance(check_value, list):
+                    return field_str not in check_value
+                return True
+
+            case "vader_concern":
+                # VADER compound score <= threshold.
+                # check_value: dict like {"threshold": -0.3} (default -0.3)
+                return self._check_vader_concern(field_str, check_value)
+
+            case "encoding_concern":
+                # chardet-backed encoding integrity check (A.7.4).
+                # check_value: dict with optional `min_confidence` (default 0.5)
+                # and `max_replacement_ratio` (default 0.05).
+                return self._check_encoding_concern(field_str, check_value)
+
+            case "entropy_concern":
+                # Shannon entropy outside expected range (A.7.4).
+                # check_value: dict with optional `min` (default 1.5) and
+                # `max` (default 7.5) bits/byte. Stdlib only.
+                return self._check_entropy_concern(field_str, check_value)
+
+            case "incident_concern":
+                # Categorical incident detection (A.8.4).
+                # check_value: dict with optional `categories` list
+                # (subset of safety_refusal/tool_failure/auth_failure/
+                # quota_exceeded/hallucination). Default: all categories.
+                return self._check_incident_concern(field_str, check_value)
+
+            case "commitment_concern":
+                # Customer commitment language detection (A.10.4).
+                # check_value: dict with optional `require_amount` (default
+                # True) and `require_deadline` (default False). Fires when
+                # a commitment verb co-occurs with the configured signals.
+                return self._check_commitment_concern(field_str, check_value)
+
+            case _:
+                logger.debug("Unknown operator: %s", operator)
+                return False
+
+    @staticmethod
+    def _check_vader_concern(text: str, params: Any) -> bool:
+        """Return True if VADER compound score on `text` is <= threshold.
+
+        Args:
+            text: Text to analyse.
+            params: Either a dict with `threshold` key, or a numeric threshold
+                directly. Default threshold is -0.3 (clearly-negative).
+
+        Returns:
+            True iff vaderSentiment is available AND compound score <= threshold.
+            Returns False on empty input or if the library is not installed —
+            sentiment checks no-op rather than crash.
+        """
+        if not text or not text.strip():
+            return False
+
+        analyzer = _get_vader_analyzer()
+        if analyzer is None:
+            return False
+
+        if isinstance(params, dict):
+            threshold = float(params.get("threshold", -0.3))
+        else:
+            try:
+                threshold = float(params)
+            except (TypeError, ValueError):
+                threshold = -0.3
+
+        try:
+            compound = float(analyzer.polarity_scores(text)["compound"])
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.debug("VADER analysis failed: %s", exc)
+            return False
+
+        return compound <= threshold
+
+    @staticmethod
+    def _check_encoding_concern(text: str, params: Any) -> bool:
+        r"""Return True if `text` shows encoding integrity issues.
+
+        Sums multiple deterministic corruption signals against text length:
+          - U+FFFD replacement characters (already-decoded lossy text)
+          - Literal ``�`` escape sequences carried through a JSON
+            / repr layer rather than being decoded
+          - Literal ``\xHH`` hex escapes (raw bytes leaked into a string)
+          - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``Ã©``, ``â€™``)
+        If the corruption ratio exceeds ``max_replacement_ratio`` the
+        check fires. chardet (when installed) is consulted as a
+        secondary low-confidence signal.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        min_confidence = float(params.get("min_confidence", 0.5))
+        max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05))
+        min_corruption_events = int(params.get("min_corruption_events", 2))
+
+        length = max(len(text), 1)
+
+        replacement_chars = text.count("�")
+        literal_ufffd_escapes = text.count("\\ufffd")
+        hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text))
+        mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS)
+
+        # Absolute count of distinct corruption *events* (one per
+        # U+FFFD, one per literal escape sequence, one per mojibake
+        # bigram). Even diluted by a lot of clean text, a few of these
+        # in production output is a strong signal.
+        corruption_events = (
+            replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams
+        )
+        if corruption_events >= min_corruption_events:
+            return True
+
+        # Ratio-based fallback for cases below the absolute floor: still
+        # catches very short payloads where a single corruption char is
+        # disproportionate.
+        # Weight each event by its source-char span so denser corruption
+        # in shorter text trips the ratio sooner:
+        #   U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars,
+        #   mojibake bigram = 2 chars.
+        corruption_chars = (
+            replacement_chars
+            + 6 * literal_ufffd_escapes
+            + 4 * hex_escapes
+            + 2 * mojibake_bigrams
+        )
+        if corruption_chars / length > max_replacement_ratio:
+            return True
+
+        # Secondary: chardet on the encoded bytes. For pure str input
+        # this almost always reports high UTF-8/ASCII confidence (the
+        # branch is intentionally permissive), but it does catch bytes
+        # routed through `repr()` or `__str__` of a `bytes` object that
+        # chardet recognises as a non-UTF8 encoding with low confidence.
+        chardet = _get_chardet()
+        if chardet is None:
+            return False
+        try:
+            detection = chardet.detect(text.encode("utf-8", errors="replace"))
+            confidence = float(detection.get("confidence") or 0.0)
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.debug("chardet detection failed: %s", exc)
+            return False
+
+        return confidence < min_confidence
+
+    @staticmethod
+    def _check_entropy_concern(text: str, params: Any) -> bool:
+        """Return True if Shannon entropy of `text` is outside an expected range.
+
+        Stdlib-only. Entropy is computed in bits per symbol over byte
+        frequencies. English prose typically lands ~3.5–4.5 bits/byte;
+        binary noise approaches 8 bits/byte; constant/repetitive text
+        approaches 0.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        lo = float(params.get("min", 1.5))
+        hi = float(params.get("max", 7.5))
+
+        data = text.encode("utf-8", errors="replace")
+        total = len(data)
+        if total == 0:
+            return False
+
+        counts = Counter(data)
+        entropy = 0.0
+        for c in counts.values():
+            p = c / total
+            entropy -= p * math.log2(p)
+
+        return entropy < lo or entropy > hi
+
+    @staticmethod
+    def _check_incident_concern(text: str, params: Any) -> bool:
+        """Return True if `text` matches any configured incident pattern (A.8.4).
+
+        Categories: safety_refusal, tool_failure, auth_failure,
+        quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to
+        restrict; default scans all categories.
+        """
+        if not text or not text.strip():
+            return False
+
+        if isinstance(params, dict):
+            requested = params.get("categories")
+        else:
+            requested = None
+
+        if not requested:
+            categories = list(_INCIDENT_PATTERNS.keys())
+        else:
+            categories = [c for c in requested if c in _INCIDENT_PATTERNS]
+
+        for category in categories:
+            for pattern in _INCIDENT_PATTERNS[category]:
+                if pattern.search(text):
+                    return True
+        return False
+
+    @staticmethod
+    def _check_commitment_concern(text: str, params: Any) -> bool:
+        """Return True if `text` carries customer-commitment language (A.10.4).
+
+        OR semantics: a commitment-verb match always fires; when
+        ``require_amount`` is true, a currency-anchored amount alone also
+        fires; when ``require_deadline`` is true, a deadline phrase alone
+        also fires. With both flags false the rule matches on verb only
+        (verb-only mode).
+
+        The verb pattern covers first-person promise verbs *and* proposal
+        / SOW commitment markers ("Cost: $X", "fixed scope",
+        "Deliverables", "Timeline: N days", "I propose"). The amount
+        pattern requires a currency marker adjacent to the number so URL
+        fragments don't false-positive.
+        """
+        if not text or not text.strip():
+            return False
+
+        if not isinstance(params, dict):
+            params = {}
+        require_amount = bool(params.get("require_amount", True))
+        require_deadline = bool(params.get("require_deadline", False))
+
+        verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text))
+
+        # Verb-only mode: neither supporting signal is enabled.
+        if not require_amount and not require_deadline:
+            return verb_match
+
+        amount_match = require_amount and bool(
+            _COMMITMENT_AMOUNT_FALLBACK.search(text)
+        )
+        deadline_match = require_deadline and bool(
+            _COMMITMENT_DEADLINE_PATTERN.search(text)
+        )
+        return verb_match or amount_match or deadline_match
diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py
new file mode 100644
index 0000000..fca63c6
--- /dev/null
+++ b/src/uipath/runtime/governance/native/guardrail_compensation.py
@@ -0,0 +1,433 @@
+"""Compensating governance for disabled centralized guardrails.
+
+When a ``guardrail_fallback`` rule fires (the guardrail is mapped to
+UiPath but the centralized policy is disabled), the framework asks the
+governance-server to run the real guardrail check via its
+``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint.
+
+This call is **fire-and-forget**: the server runs the guardrail AND
+writes the audit trace from its side. The agent doesn't inspect the
+response — it only cares about whether the call reached the server.
+
+The call also runs on a **bounded background pool** so even an agent
+that fires hundreds of compensation events in a session can't pile up
+threads or memory. :data:`COMPENSATION_MAX_WORKERS` workers process
+the queue, and an in-flight semaphore drops submissions when the pool
+is genuinely saturated — at that point the next call is logged and
+skipped rather than queued indefinitely.
+
+URL composition, request headers, org/tenant resolution, and the
+request timeout all come from
+:mod:`uipath.runtime.governance.native.backend_client` so the policy
+fetch and the compensating call share one definition of every
+operator-tunable.
+"""
+
+from __future__ import annotations
+
+import atexit
+import json
+import logging
+import os
+import threading
+import urllib.error
+import urllib.request
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, TypedDict
+
+from uipath.runtime.governance.native.backend_client import (
+    BACKEND_REQUEST_TIMEOUT_SECONDS,
+    COMPENSATION_MAX_WORKERS,
+    ENV_ACCESS_TOKEN,
+    ENV_ORGANIZATION_ID,
+    ENV_TENANT_ID,
+    ENV_TRACE_ID,
+    GOVERN_API_PATH,
+    TENANT_HEADER,
+    build_governance_url,
+    governance_request_headers,
+    resolve_job_context,
+    resolve_organization_id,
+    resolve_tenant_id,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ----------------------------------------------------------------------------
+# Bounded thread pool — caps both concurrent threads AND queued work.
+#
+# ThreadPoolExecutor alone caps concurrent worker threads, but its internal
+# queue is unbounded — a misbehaving agent that fires compensation faster than
+# the server can absorb would queue indefinitely (memory pressure). The
+# semaphore caps total in-flight submissions (running + queued) at a
+# multiple of the worker count. Saturated submissions are dropped with a
+# warning. Process exit cancels queued work and lets running tasks finish
+# (bounded by their HTTP timeout) via the atexit handler.
+# ----------------------------------------------------------------------------
+
+_INFLIGHT_OVERSUBSCRIPTION = 4  # queue up to (workers × this many) before dropping
+_INFLIGHT_CAP = COMPENSATION_MAX_WORKERS * _INFLIGHT_OVERSUBSCRIPTION
+
+_pool = ThreadPoolExecutor(
+    max_workers=COMPENSATION_MAX_WORKERS,
+    thread_name_prefix="governance-compensation",
+)
+_inflight = threading.BoundedSemaphore(_INFLIGHT_CAP)
+
+
+@atexit.register
+def _shutdown_pool() -> None:
+    """Cancel queued compensation tasks at process exit.
+
+    ``wait=False`` returns immediately so process shutdown isn't held
+    up; ``cancel_futures=True`` (Python 3.9+) drops anything not yet
+    running. Tasks already running finish bounded by their HTTP
+    timeout (``BACKEND_REQUEST_TIMEOUT_SECONDS``).
+    """
+    try:
+        _pool.shutdown(wait=False, cancel_futures=True)
+    except Exception:  # noqa: BLE001 - shutdown must never raise from atexit
+        pass
+
+
+# ----------------------------------------------------------------------------
+# Public API
+# ----------------------------------------------------------------------------
+
+
+class FiredRule(TypedDict):
+    """Per-rule metadata carried in the /runtime/govern payload.
+
+    One entry per matching ``guardrail_fallback`` condition (in practice
+    one per rule, since each fallback-rule typically declares a single
+    such condition). The server uses these to write per-rule LLMOps
+    trace records (Doc-2 audit structure).
+    """
+
+    ruleId: str
+    ruleName: str
+    packName: str
+    validator: str
+
+
+def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]:
+    """Return per-rule metadata for each fired guardrail-fallback rule.
+
+    A guardrail rule fires only when it is mapped to UiPath
+    (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) —
+    see the ``guardrail_fallback`` operator. The validator name (e.g.
+    ``pii_detection``) is read from the rule's ``guardrail_fallback``
+    check config and used as the ``type`` of the compensating call.
+
+    One :class:`FiredRule` entry is emitted per matching
+    ``guardrail_fallback`` condition. Rules in this codebase declare a
+    single fallback condition each, so the returned list has one entry
+    per fired rule in practice; multi-condition rules would emit more
+    than one entry sharing the same ``ruleId``.
+
+    Each entry carries the metadata the server needs to write one
+    per-rule LLMOps trace record::
+
+        {
+          "ruleId": "...",
+          "ruleName": "...",
+          "packName": "...",
+          "validator": "pii_detection",
+        }
+    """
+    out: list[FiredRule] = []
+    for ev in audit.evaluations:
+        if not ev.matched:
+            continue
+        rule = policy_index.get_rule(ev.rule_id)
+        if rule is None:
+            continue
+        for check in rule.checks:
+            for cond in check.conditions:
+                if cond.operator != "guardrail_fallback":
+                    continue
+                if not isinstance(cond.value, dict):
+                    continue
+                # The ``guardrail_fallback`` operator at evaluation time
+                # only matches when ``mapped_to_uipath=True`` AND
+                # ``policy_enabled=False``. We re-check here defensively
+                # so a future code path that bypasses the evaluator (or
+                # a multi-condition rule that fired on a sibling check)
+                # can't trigger a compensation call for a guardrail
+                # that isn't actually disabled.
+                if not bool(cond.value.get("mapped_to_uipath", False)):
+                    continue
+                if bool(cond.value.get("policy_enabled", True)):
+                    continue
+                validator = str(cond.value.get("validator", ""))
+                if validator:
+                    out.append(
+                        {
+                            "ruleId": ev.rule_id,
+                            "ruleName": ev.rule_name,
+                            "packName": getattr(rule, "pack_name", "") or "",
+                            "validator": validator,
+                        }
+                    )
+    return out
+
+
+def _validators(rules: list[FiredRule]) -> list[str]:
+    """Distinct validator names from the fired rules, preserving order."""
+    return list(dict.fromkeys(r["validator"] for r in rules if r.get("validator")))
+
+
+def _resolve_trace_id(fallback: str) -> str:
+    """Resolve the agent's trace id while still on the caller thread.
+
+    MUST be called before the background-pool hop in
+    :func:`submit_compensation`: the worker thread that issues the
+    ``/govern`` call has no OpenTelemetry context, so resolving there would
+    fall back to a detached id — orphaning the server-written compensation
+    records from the agent's real trace.
+
+    Order: ``UIPATH_TRACE_ID`` env var -> live OTel span trace id
+    (32-char hex) -> the caller-supplied ``fallback``.
+
+    ``UIPATH_TRACE_ID`` is preferred over the live OTel span because the
+    native governance audit spans are exported under that id (the platform
+    rebinds spans to the agent's run trace). The compensation records must
+    land on the *same* trace, so we use it first. The live OTel span is the
+    fallback for contexts where the env var isn't set; in conversational
+    runs the hook thread has no live span anyway, so the env var is what
+    keeps native + compensation on one trace.
+    """
+    env_trace_id = os.environ.get(ENV_TRACE_ID)
+    if env_trace_id:
+        return env_trace_id
+
+    try:
+        from opentelemetry import trace
+
+        ctx = trace.get_current_span().get_span_context()
+        if ctx.is_valid:
+            return format(ctx.trace_id, "032x")
+    except Exception:  # noqa: BLE001 - tracing is best-effort; fall through
+        pass
+
+    return fallback
+
+
+def submit_compensation(
+    rules: list[FiredRule],
+    data: dict[str, Any],
+    hook: str,
+    trace_id: str,
+    src_timestamp: str,
+    agent_name: str,
+    runtime_id: str,
+) -> None:
+    """Schedule a /runtime/govern call on the bounded background pool.
+
+    Fire-and-forget. Returns immediately; the call runs on a worker
+    thread bounded by :data:`COMPENSATION_MAX_WORKERS`. When the
+    in-flight queue is saturated (cap = workers × oversubscription),
+    the call is dropped with a warning and the agent continues.
+
+    ``rules`` is the per-rule metadata from :func:`disabled_guardrails`;
+    the validators sent to the guardrail API are derived from it.
+
+    Never raises — including when the pool has already been shut down
+    by process exit.
+    """
+    if not rules:
+        return
+
+    validators = _validators(rules)
+    if not validators:
+        return
+
+    # Resolve the trace id HERE, on the caller (hook) thread where the
+    # agent's OTel span is still live. The /govern call below runs on a
+    # background worker (_pool.submit -> _run -> request_governance) where
+    # that context is gone, so the resolved value is captured now and
+    # carried into the worker — ensuring the server writes compensation
+    # records under the agent's real trace, not a detached id.
+    trace_id = _resolve_trace_id(trace_id)
+
+    if not _inflight.acquire(blocking=False):
+        logger.warning(
+            "Compensation pool saturated (>%d in flight); dropping call "
+            "(validators=[%s])",
+            _INFLIGHT_CAP,
+            ", ".join(validators),
+        )
+        return
+
+    def _run() -> None:
+        try:
+            request_governance(
+                rules=rules,
+                data=data,
+                hook=hook,
+                trace_id=trace_id,
+                src_timestamp=src_timestamp,
+                agent_name=agent_name,
+                runtime_id=runtime_id,
+            )
+        except Exception as exc:  # noqa: BLE001 - fail-open by contract
+            logger.warning(
+                "Compensation worker failed (validators=[%s]): %s",
+                ", ".join(validators),
+                exc,
+            )
+        finally:
+            _inflight.release()
+
+    try:
+        _pool.submit(_run)
+    except RuntimeError as exc:
+        # Pool was shut down (atexit or test teardown) — release the
+        # semaphore slot we took and log; never raise.
+        _inflight.release()
+        logger.warning(
+            "Compensation pool unavailable (validators=[%s]): %s",
+            ", ".join(validators),
+            exc,
+        )
+
+
+def request_governance(
+    rules: list[FiredRule],
+    data: dict[str, Any],
+    hook: str,
+    trace_id: str,
+    src_timestamp: str,
+    agent_name: str,
+    runtime_id: str,
+) -> None:
+    """Synchronous POST to the org-scoped ``/runtime/govern`` endpoint.
+
+    Most callers should use :func:`submit_compensation` to run this on
+    the bounded background pool. ``request_governance`` is exposed
+    directly only for callers that already manage their own
+    concurrency (and for tests).
+
+    POSTs::
+
+        {
+          "type": ["pii_detection", "harmful_content"],
+          "rules": [
+            {"ruleId": "...", "ruleName": "...",
+             "packName": "...", "validator": "pii_detection"}
+          ],
+          "data": {...},
+          "hook": "before_model",
+          "traceId": "...",
+          "src_timestamp": "...",
+          "agentName": "...",
+          "runtimeId": "...",
+          "folderKey": "...", "jobKey": "...", "processKey": "...",
+          "referenceId": "...", "agentVersion": "..."
+        }
+
+    ``type`` (the distinct validators) drives the guardrail API call;
+    ``rules`` + the job-context fields let the server write one LLMOps
+    trace record per rule (Doc-2 audit structure). The job-context keys
+    are included only when resolvable from the environment.
+
+    Skipped if the org or tenant id can't be resolved (no URL / no
+    header). The server runs the disabled guardrails AND writes the
+    audit trace itself — the agent does not consume or parse the
+    response body. The only thing this function reports back is
+    *whether the call landed*:
+
+    - **Success** → ``INFO`` log ``Govern call has been made``.
+    - **Failure** → ``WARNING`` log; returns ``None``.
+
+    Never raises.
+    """
+    if not rules:
+        return
+
+    validators = _validators(rules)
+    if not validators:
+        return
+
+    org_id = resolve_organization_id()
+    if not org_id:
+        logger.warning(
+            "Govern call skipped: organization id is not available "
+            "(set %s). validators=[%s]",
+            ENV_ORGANIZATION_ID,
+            ", ".join(validators),
+        )
+        return
+
+    tenant_id = resolve_tenant_id()
+    if not tenant_id:
+        logger.warning(
+            "Govern call skipped: tenant id is not available "
+            "(set %s). validators=[%s]",
+            ENV_TENANT_ID,
+            ", ".join(validators),
+        )
+        return
+
+    # Bearer token is required by the backend; sending without one
+    # produces a 401 per call and pollutes logs. Skip cleanly when the
+    # token isn't present (e.g. local dev, missing host bootstrap)
+    # rather than burning quota on guaranteed auth failures.
+    if not os.environ.get(ENV_ACCESS_TOKEN):
+        logger.warning(
+            "Govern call skipped: %s is not set in the environment; "
+            "compensation requires a bearer token. validators=[%s]",
+            ENV_ACCESS_TOKEN,
+            ", ".join(validators),
+        )
+        return
+
+    try:
+        payload = json.dumps(
+            {
+                "type": validators,
+                "rules": rules,
+                "data": data,
+                "hook": hook,
+                "traceId": trace_id,
+                "src_timestamp": src_timestamp,
+                "agentName": agent_name,
+                "runtimeId": runtime_id,
+                **resolve_job_context(),
+            },
+            default=str,  # coerce any non-JSON-native value safely
+        ).encode("utf-8")
+    except Exception as exc:  # noqa: BLE001 - fail-open
+        logger.warning(
+            "Govern call payload serialization failed (validators=[%s]): %s",
+            ", ".join(validators),
+            exc,
+        )
+        return
+
+    url = build_governance_url(org_id, GOVERN_API_PATH)
+    headers = governance_request_headers(json_body=True)
+    headers[TENANT_HEADER] = tenant_id
+
+    request = urllib.request.Request(
+        url,
+        data=payload,
+        headers=headers,
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(  # noqa: S310 - URL is built from config
+            request, timeout=BACKEND_REQUEST_TIMEOUT_SECONDS
+        ) as response:
+            logger.info(
+                "Govern call has been made (status=%s, validators=[%s])",
+                getattr(response, "status", "?"),
+                ", ".join(validators),
+            )
+    except Exception as exc:  # noqa: BLE001 - fail-and-log
+        logger.warning(
+            "Govern call failed (validators=[%s]): %s",
+            ", ".join(validators),
+            exc,
+        )
diff --git a/tests/test_audit_console.py b/tests/test_audit_console.py
new file mode 100644
index 0000000..8a8cd52
--- /dev/null
+++ b/tests/test_audit_console.py
@@ -0,0 +1,275 @@
+"""Tests for ``ConsoleAuditSink``.
+
+The console sink is a developer-aid that writes governance events to
+stderr in a human-readable format. Filtering and per-event-type
+formatting are the things worth pinning so a non-verbose run doesn't
+spam unmatched evaluations.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from uipath.runtime.governance.audit.base import AuditEvent, EventType
+from uipath.runtime.governance.audit.console import ConsoleAuditSink
+
+# ---------------------------------------------------------------------------
+# Basic surface
+# ---------------------------------------------------------------------------
+
+
+def test_sink_name_is_console() -> None:
+    assert ConsoleAuditSink().name == "console"
+
+
+def test_default_is_non_verbose() -> None:
+    """Constructor default keeps the sink quiet (matches-only)."""
+    sink = ConsoleAuditSink()
+    unmatched = AuditEvent(
+        event_type=EventType.RULE_EVALUATION,
+        data={"matched": False, "rule_id": "A", "rule_name": "n"},
+    )
+    assert sink.accepts(unmatched) is False
+
+
+# ---------------------------------------------------------------------------
+# accepts() — filtering behavior
+# ---------------------------------------------------------------------------
+
+
+def test_accepts_verbose_passes_everything() -> None:
+    sink = ConsoleAuditSink(verbose=True)
+    assert sink.accepts(AuditEvent(event_type=EventType.RULE_EVALUATION)) is True
+    assert sink.accepts(AuditEvent(event_type=EventType.HOOK_END)) is True
+    assert sink.accepts(AuditEvent(event_type=EventType.PACKS_LOADED)) is True
+
+
+def test_accepts_non_verbose_filters_unmatched_rule_eval() -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    matched = AuditEvent(
+        event_type=EventType.RULE_EVALUATION, data={"matched": True}
+    )
+    unmatched = AuditEvent(
+        event_type=EventType.RULE_EVALUATION, data={"matched": False}
+    )
+    assert sink.accepts(matched) is True
+    assert sink.accepts(unmatched) is False
+
+
+@pytest.mark.parametrize(
+    "event_type",
+    [
+        EventType.SESSION_START,
+        EventType.SESSION_END,
+        EventType.HOOK_END,
+        EventType.POLICY_VIOLATION,
+    ],
+)
+def test_accepts_non_verbose_passes_lifecycle_events(event_type: str) -> None:
+    """Lifecycle events flow through even when verbose is off."""
+    sink = ConsoleAuditSink(verbose=False)
+    assert sink.accepts(AuditEvent(event_type=event_type)) is True
+
+
+def test_accepts_non_verbose_drops_other_event_types() -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    # PACKS_LOADED isn't in the lifecycle allowlist for non-verbose.
+    assert sink.accepts(AuditEvent(event_type=EventType.PACKS_LOADED)) is False
+
+
+# ---------------------------------------------------------------------------
+# _emit_rule_evaluation
+# ---------------------------------------------------------------------------
+
+
+def test_emit_matched_rule_writes_full_line(capsys: pytest.CaptureFixture[str]) -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.RULE_EVALUATION,
+            data={
+                "matched": True,
+                "rule_id": "A.10.4",
+                "rule_name": "commitment-language",
+                "action": "audit",
+                "detail": "Customer commitment detected.",
+            },
+        )
+    )
+    out = capsys.readouterr().err
+    assert "MATCHED" in out
+    assert "A.10.4" in out
+    assert "commitment-language" in out
+    assert "action=AUDIT" in out
+    assert "Customer commitment detected." in out
+
+
+def test_emit_unmatched_rule_silent_when_non_verbose(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.RULE_EVALUATION,
+            data={"matched": False, "rule_id": "A", "rule_name": "n"},
+        )
+    )
+    assert capsys.readouterr().err == ""
+
+
+def test_emit_unmatched_rule_prints_pass_when_verbose(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    sink = ConsoleAuditSink(verbose=True)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.RULE_EVALUATION,
+            data={"matched": False, "rule_id": "A.1", "rule_name": "rule-one"},
+        )
+    )
+    out = capsys.readouterr().err
+    assert "PASS" in out
+    assert "A.1" in out
+    assert "rule-one" in out
+
+
+# ---------------------------------------------------------------------------
+# _emit_hook_summary
+# ---------------------------------------------------------------------------
+
+
+def test_emit_hook_summary_basic(capsys: pytest.CaptureFixture[str]) -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.HOOK_END,
+            hook="after_model",
+            data={
+                "total_rules": 5,
+                "matched_rules": 1,
+                "final_action": "allow",
+                "enforcement_mode": "audit",
+            },
+        )
+    )
+    out = capsys.readouterr().err
+    assert "HOOK: after_model" in out
+    assert "rules=5" in out
+    assert "matched=1" in out
+    assert "action=ALLOW" in out
+
+
+def test_emit_hook_summary_audit_mode_would_deny_marker(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """In AUDIT mode a DENY action is annotated as 'would deny'.
+
+    Without this, operators reading the console would think a deny
+    actually fired when the runtime only audited it.
+    """
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.HOOK_END,
+            hook="before_model",
+            data={
+                "total_rules": 1,
+                "matched_rules": 1,
+                "final_action": "deny",
+                "enforcement_mode": "audit",
+            },
+        )
+    )
+    out = capsys.readouterr().err
+    assert "AUDIT (would deny)" in out
+
+
+def test_emit_hook_summary_enforce_mode_deny_not_annotated(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """In ENFORCE mode the 'would deny' annotation is NOT applied."""
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.HOOK_END,
+            hook="before_model",
+            data={
+                "total_rules": 1,
+                "matched_rules": 1,
+                "final_action": "deny",
+                "enforcement_mode": "enforce",
+            },
+        )
+    )
+    out = capsys.readouterr().err
+    assert "would deny" not in out
+    assert "action=DENY" in out
+
+
+# ---------------------------------------------------------------------------
+# Session start / end
+# ---------------------------------------------------------------------------
+
+
+def test_emit_session_start_includes_packs_and_mode(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.SESSION_START,
+            agent_name="my-agent",
+            data={"packs": ["iso42001", "owasp"], "enforcement_mode": "audit"},
+        )
+    )
+    out = capsys.readouterr().err
+    assert "Session started" in out
+    assert "agent=my-agent" in out
+    assert "iso42001,owasp" in out
+    assert "mode=audit" in out
+
+
+def test_emit_session_end_counters(capsys: pytest.CaptureFixture[str]) -> None:
+    sink = ConsoleAuditSink(verbose=False)
+    sink.emit(
+        AuditEvent(
+            event_type=EventType.SESSION_END,
+            trace_id="trace-abc",
+            data={
+                "total_evaluations": 12,
+                "rules_matched": 3,
+                "rules_denied": 1,
+            },
+        )
+    )
+    out = capsys.readouterr().err
+    assert "Session ended" in out
+    assert "evaluations=12" in out
+    assert "matched=3" in out
+    assert "denied=1" in out
+
+
+# ---------------------------------------------------------------------------
+# Generic / fallback
+# ---------------------------------------------------------------------------
+
+
+def test_emit_generic_unknown_event_type(capsys: pytest.CaptureFixture[str]) -> None:
+    """Anything that isn't a known event type falls through to _emit_generic.
+
+    The generic formatter serializes ``data`` as JSON so operators can
+    still inspect the payload even for events the sink doesn't know about.
+    """
+    sink = ConsoleAuditSink(verbose=True)
+    sink.emit(
+        AuditEvent(
+            event_type="custom_event",
+            agent_name="x",
+            data={"foo": "bar", "n": 1},
+        )
+    )
+    out = capsys.readouterr().err
+    assert "custom_event" in out
+    assert "x" in out
+    assert '"foo": "bar"' in out
+    assert '"n": 1' in out
diff --git a/tests/test_audit_register_sink.py b/tests/test_audit_register_sink.py
new file mode 100644
index 0000000..ff03710
--- /dev/null
+++ b/tests/test_audit_register_sink.py
@@ -0,0 +1,103 @@
+"""Tests for ``AuditManager.register_sink`` failure-counter semantics.
+
+A re-registered same-name sink must NOT inherit the previous instance's
+tripped circuit-breaker state. ``unregister_sink`` already clears these
+counters, but ``register_sink`` also clears them on a successful add as
+defense-in-depth (covers tests / external callers that touch the
+internal counter dicts directly).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from uipath.runtime.governance.audit.base import (
+    AuditEvent,
+    AuditManager,
+    AuditSink,
+    EventType,
+)
+
+
+class _NoopSink(AuditSink):
+    """Sink that records emit calls and never raises."""
+
+    def __init__(self, name: str = "test-sink") -> None:
+        self._name = name
+        self.events: list[AuditEvent] = []
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def emit(self, event: AuditEvent) -> None:
+        self.events.append(event)
+
+
+def _event() -> AuditEvent:
+    return AuditEvent(event_type=EventType.RULE_EVALUATION, agent_name="a")
+
+
+@pytest.fixture
+def manager() -> Any:
+    """Build a fresh, sync-mode AuditManager for the test."""
+    return AuditManager(async_mode=False)
+
+
+def test_register_clears_stale_failure_counter(manager: AuditManager) -> None:
+    """A new sink with a name that previously tripped starts fresh."""
+    # Simulate prior instance having tripped the circuit-breaker without
+    # going through unregister (e.g. test code or external code that
+    # mutated the counters directly).
+    manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD
+    manager._tripped_sinks.add("test-sink")
+
+    new_sink = _NoopSink(name="test-sink")
+    manager.register_sink(new_sink)
+
+    # Counter and tripped-set must be cleared.
+    assert manager._sink_failures.get("test-sink", 0) == 0
+    assert "test-sink" not in manager._tripped_sinks
+
+    # And the new sink actually receives events (would be skipped if
+    # still considered tripped).
+    manager.emit(_event())
+    assert len(new_sink.events) == 1
+
+
+def test_register_does_not_clear_for_duplicate(manager: AuditManager) -> None:
+    """Re-registering an already-present sink is a no-op (no counter reset)."""
+    sink = _NoopSink(name="test-sink")
+    manager.register_sink(sink)
+
+    # Simulate the existing sink having accumulated some failures.
+    manager._sink_failures["test-sink"] = 3
+
+    # A second register call with the same name should NOT clear those
+    # failures — the duplicate-check fires before the reset.
+    duplicate = _NoopSink(name="test-sink")
+    manager.register_sink(duplicate)
+
+    assert manager._sink_failures["test-sink"] == 3
+
+
+def test_unregister_then_register_starts_fresh(manager: AuditManager) -> None:
+    """The full lifecycle: register → trip → unregister → register again."""
+    sink = _NoopSink(name="test-sink")
+    manager.register_sink(sink)
+    manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD
+    manager._tripped_sinks.add("test-sink")
+
+    manager.unregister_sink("test-sink")
+    # Unregister already clears.
+    assert "test-sink" not in manager._tripped_sinks
+
+    new_sink = _NoopSink(name="test-sink")
+    manager.register_sink(new_sink)
+    assert manager._sink_failures.get("test-sink", 0) == 0
+    assert "test-sink" not in manager._tripped_sinks
+
+    manager.emit(_event())
+    assert len(new_sink.events) == 1
diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py
new file mode 100644
index 0000000..a46149b
--- /dev/null
+++ b/tests/test_commitment_concern.py
@@ -0,0 +1,205 @@
+"""Tests for the commitment_concern check (A.10.4).
+
+The check now uses OR semantics: a verb match, an amount match, or a
+deadline match is each sufficient when its enabling flag is on. With
+both flags false the rule matches verb-only.
+
+The verb pattern also covers proposal / SOW style commitment markers
+("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose")
+so formal-business commitments without first-person verbs still fire.
+
+Amount detection requires a currency marker adjacent to the number to
+prevent URL fragments (forum-post IDs, image dimensions, etc.) from
+false-positiving.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+
+# ---------------------------------------------------------------------------
+# The proposal-style sample that originally slipped through the rule.
+# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables",
+# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID.
+# Triple-quoted so we keep the line breaks the model produced.
+# ---------------------------------------------------------------------------
+SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851).
+
+Here's how I propose we turn your software around quickly:
+
+Plan
+- Triage (logs + reproduce)
+- Quick stabilization
+
+Deliverables
+- Defect triage report
+
+Timeline: 4 days total
+- Day 1: Triage + reproduction
+
+Cost: $780 (fixed for the above scope)
+"""
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "Cost: $780 (fixed for the above scope)",
+        "Deliverables: a, b, c",
+        "Timeline: 4 days total for the whole engagement",
+        "I propose we turn this around in a week",
+        "We will refund the difference",
+        "I'll deliver the report by Friday",
+        "the warranty covers parts only",
+        "fixed price of one hundred dollars",
+    ],
+)
+def test_verb_match_alone_fires(text: str) -> None:
+    """Each verb-style commitment marker fires on its own (verb-only mode)."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": False, "require_deadline": False}
+        )
+        is True
+    )
+
+
+def test_full_proposal_sample_fires() -> None:
+    """The originally-missed proposal output now fires."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            SAMPLE_PROPOSAL,
+            {"require_amount": False, "require_deadline": False},
+        )
+        is True
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "$780",
+        "We charge USD 1,200 per seat",
+        "The fee is 500 EUR",
+    ],
+)
+def test_amount_alone_fires_when_require_amount_true(text: str) -> None:
+    """Currency-anchored amount alone fires under OR semantics."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": False}
+        )
+        is True
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "Task is 75% complete.",
+        "We maintain 99.9% uptime.",
+        "Battery at 50%.",
+        "Score: 12%.",
+    ],
+)
+def test_bare_percentage_does_not_fire(text: str) -> None:
+    """Status-only percentages must not trigger commitment_concern.
+
+    Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount
+    regex, which fired on benign status / progress text. Real
+    percentage-bearing commitments ("we'll give a 20% discount")
+    still fire via the verb pattern.
+    """
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": False}
+        )
+        is False
+    )
+
+
+def test_percentage_with_verb_still_fires() -> None:
+    """A commitment verb co-occurring with a percentage still fires."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "We will refund 100% of the purchase price.",
+            {"require_amount": True, "require_deadline": False},
+        )
+        is True
+    )
+
+
+def test_amount_alone_does_not_fire_when_require_amount_false() -> None:
+    """Amount-only text is silent when require_amount=False and no verb."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "The list price is $780.",
+            {"require_amount": False, "require_deadline": False},
+        )
+        is False
+    )
+
+
+def test_deadline_alone_fires_when_require_deadline_true() -> None:
+    """Deadline phrase alone fires under OR semantics."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "Will be done within 5 days.",
+            {"require_amount": False, "require_deadline": True},
+        )
+        is True
+    )
+
+
+def test_url_fragment_digits_do_not_false_positive() -> None:
+    """A long URL with embedded digits is not a 'commitment'.
+
+    Catches the prior price-parser misbehaviour where Price.fromstring()
+    picked up forum-post IDs (e.g. ``667851``) and conflated them with
+    unrelated currency symbols elsewhere in the text.
+    """
+    text = (
+        "See https://forum.example.com/t/topic/667851 for details — "
+        "no commitment language here."
+    )
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": True}
+        )
+        is False
+    )
+
+
+@pytest.mark.parametrize(
+    "text",
+    [
+        "",
+        "   ",
+        "Just chatting about the weather today.",
+        "The product is durable and well-made.",
+    ],
+)
+def test_no_signal_does_not_fire(text: str) -> None:
+    """Text without any commitment signal stays silent regardless of flags."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            text, {"require_amount": True, "require_deadline": True}
+        )
+        is False
+    )
+
+
+def test_non_dict_params_treated_as_defaults() -> None:
+    """``params`` of the wrong type degrades to defaults rather than crashing."""
+    assert (
+        GovernanceEvaluator._check_commitment_concern("we will refund", None)
+        is True
+    )
+    assert (
+        GovernanceEvaluator._check_commitment_concern(
+            "no verbs here", "garbage"
+        )
+        is False
+    )
diff --git a/tests/test_delegation_guard.py b/tests/test_delegation_guard.py
new file mode 100644
index 0000000..a1ba432
--- /dev/null
+++ b/tests/test_delegation_guard.py
@@ -0,0 +1,320 @@
+"""Tests for the async-aware delegation depth guard.
+
+The guard wraps an agent's ``invoke`` and ``ainvoke`` so a single
+ContextVar tracks delegation depth across both sync and async call
+chains. The async wrapper must itself be a coroutine — wrapping with a
+sync function would return an un-awaited coroutine and silently bypass
+the depth check.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from types import SimpleNamespace
+
+import pytest
+from uipath.core.governance.exceptions import GovernanceBlockException
+
+from uipath.runtime.governance.delegation_guard import (
+    install_delegation_guard,
+    uninstall_delegation_guard,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers — minimal agent shapes the guard might encounter in the wild.
+# ---------------------------------------------------------------------------
+
+
+def _make_sync_agent() -> SimpleNamespace:
+    agent = SimpleNamespace()
+    agent.invoke = lambda payload, **_: {"sync": payload}
+    return agent
+
+
+def _make_async_agent() -> SimpleNamespace:
+    agent = SimpleNamespace()
+
+    async def _ainvoke(payload, **_):
+        return {"async": payload}
+
+    agent.ainvoke = _ainvoke
+    return agent
+
+
+def _make_dual_agent() -> SimpleNamespace:
+    """Agent with both sync invoke and async ainvoke (LangGraph React shape)."""
+    agent = _make_sync_agent()
+
+    async def _ainvoke(payload, **_):
+        return {"async": payload}
+
+    agent.ainvoke = _ainvoke
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Sync path — preserves the original behaviour the guard always had.
+# ---------------------------------------------------------------------------
+
+
+def test_sync_invoke_passes_through_under_limit() -> None:
+    agent = _make_sync_agent()
+    install_delegation_guard(agent, max_depth=3)
+    assert agent.invoke({"x": 1}) == {"sync": {"x": 1}}
+
+
+def test_sync_invoke_raises_when_depth_exceeded() -> None:
+    """Recursive sync invokes blow the limit."""
+    agent = SimpleNamespace()
+    calls = {"n": 0}
+
+    def _invoke(_payload, **_):
+        calls["n"] += 1
+        # Recurse into ourselves through the guarded attribute.
+        return agent.invoke({})
+
+    agent.invoke = _invoke
+    install_delegation_guard(agent, max_depth=3)
+
+    with pytest.raises(GovernanceBlockException):
+        agent.invoke({})
+    # Depth check fires inside the wrapper before the original runs, so
+    # we got exactly max_depth=3 successful entries plus one rejection.
+    assert calls["n"] == 3
+
+
+# ---------------------------------------------------------------------------
+# Async path — the new shape this change unlocks.
+# ---------------------------------------------------------------------------
+
+
+def test_async_wrapper_is_a_coroutine_function() -> None:
+    """The wrapped ainvoke must itself be awaitable.
+
+    Regression test for the original bug: a sync wrapper around an async
+    method returned an un-awaited coroutine and silently bypassed the
+    depth check entirely.
+    """
+    agent = _make_async_agent()
+    install_delegation_guard(agent, max_depth=3)
+    assert asyncio.iscoroutinefunction(agent.ainvoke)
+
+
+def test_async_invoke_passes_through_under_limit() -> None:
+    agent = _make_async_agent()
+    install_delegation_guard(agent, max_depth=3)
+    result = asyncio.run(agent.ainvoke({"x": 1}))
+    assert result == {"async": {"x": 1}}
+
+
+def test_async_invoke_raises_when_depth_exceeded() -> None:
+    agent = SimpleNamespace()
+    calls = {"n": 0}
+
+    async def _ainvoke(_payload, **_):
+        calls["n"] += 1
+        return await agent.ainvoke({})
+
+    agent.ainvoke = _ainvoke
+    install_delegation_guard(agent, max_depth=3)
+
+    with pytest.raises(GovernanceBlockException):
+        asyncio.run(agent.ainvoke({}))
+    assert calls["n"] == 3
+
+
+def test_sync_and_async_share_one_depth_counter() -> None:
+    """A coroutine that falls through to sync ``invoke`` increments the same counter."""
+    agent = _make_dual_agent()
+    calls = {"n": 0}
+
+    def _invoke(_payload, **_):
+        calls["n"] += 1
+        # Sync self-recursion through the same guarded attribute.
+        return agent.invoke({})
+
+    async def _ainvoke(_payload, **_):
+        calls["n"] += 1
+        # Cross-mode: async entry falls through to the sync path.
+        return agent.invoke({})
+
+    agent.invoke = _invoke
+    agent.ainvoke = _ainvoke
+    install_delegation_guard(agent, max_depth=2)
+
+    with pytest.raises(GovernanceBlockException):
+        asyncio.run(agent.ainvoke({}))
+    # ainvoke (depth=1) → invoke (depth=2) → invoke (depth=3, blocked).
+    # The guard rejects the third call before _invoke runs, so calls=2.
+    assert calls["n"] == 2
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle — install / uninstall semantics.
+# ---------------------------------------------------------------------------
+
+
+def test_install_is_idempotent() -> None:
+    agent = _make_sync_agent()
+    install_delegation_guard(agent, max_depth=5)
+    wrapped_once = agent.invoke
+    install_delegation_guard(agent, max_depth=5)
+    assert agent.invoke is wrapped_once, "second install must not re-wrap"
+
+
+def test_uninstall_restores_originals_for_both_methods() -> None:
+    agent = _make_dual_agent()
+    original_invoke = agent.invoke
+    original_ainvoke = agent.ainvoke
+    install_delegation_guard(agent, max_depth=5)
+    assert agent.invoke is not original_invoke
+    assert agent.ainvoke is not original_ainvoke
+
+    uninstall_delegation_guard(agent)
+    assert agent.invoke is original_invoke
+    assert agent.ainvoke is original_ainvoke
+    assert not getattr(agent, "_delegation_wrapped", False)
+
+
+def test_uninstall_safe_on_unguarded_agent() -> None:
+    agent = _make_sync_agent()
+    # Should not raise; should leave agent unchanged.
+    uninstall_delegation_guard(agent)
+    assert callable(agent.invoke)
+
+
+# ---------------------------------------------------------------------------
+# Edge cases.
+# ---------------------------------------------------------------------------
+
+
+def test_agent_without_invoke_methods_is_noop() -> None:
+    """Agents without any invokable method must not crash the install."""
+    agent = SimpleNamespace(unrelated="value")
+    install_delegation_guard(agent, max_depth=5)
+    assert not getattr(agent, "_delegation_wrapped", False)
+
+
+def test_env_var_max_depth_override(monkeypatch: pytest.MonkeyPatch) -> None:
+    """``UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH`` overrides the default."""
+    monkeypatch.setenv("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH", "1")
+    agent = SimpleNamespace()
+    calls = {"n": 0}
+
+    def _invoke(_payload, **_):
+        calls["n"] += 1
+        return agent.invoke({})
+
+    agent.invoke = _invoke
+    install_delegation_guard(agent)  # picks up env
+
+    with pytest.raises(GovernanceBlockException):
+        agent.invoke({})
+    assert calls["n"] == 1, "max_depth=1 should allow exactly one call"
+
+
+def test_invalid_env_var_falls_back_to_default(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH", "not-a-number")
+    agent = _make_sync_agent()
+    # Should not raise on install — falls back silently to the default.
+    install_delegation_guard(agent)
+    assert os.environ.get("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH") == "not-a-number"
+    assert callable(agent.invoke)
+
+
+# ---------------------------------------------------------------------------
+# Leak / scaling — pins the shared-ContextVar design.
+# ---------------------------------------------------------------------------
+
+
+def test_install_does_not_allocate_per_agent_contextvars() -> None:
+    """N installs must not grow the module's ContextVar registry by N.
+
+    The old implementation allocated a ``ContextVar`` per agent. Since
+    ContextVar instances are interned by the interpreter and never GC'd,
+    that was an unbounded leak. The current design holds a single
+    module-level ContextVar of ``dict[id(agent), int]``.
+    """
+    from uipath.runtime.governance import delegation_guard as dg
+
+    # Snapshot the single shared ContextVar.
+    shared_var = dg._DELEGATION_DEPTHS
+
+    for _ in range(100):
+        agent = _make_sync_agent()
+        install_delegation_guard(agent, max_depth=3)
+        uninstall_delegation_guard(agent)
+
+    # The module-level ContextVar is unchanged — same instance, no new
+    # ContextVars were allocated.
+    assert dg._DELEGATION_DEPTHS is shared_var
+
+
+def test_two_agents_have_independent_depth_counters() -> None:
+    """Exhausting one agent's depth limit doesn't leak into another agent.
+
+    Both agents share the single module-level ContextVar but the dict
+    inside isolates them via ``id(agent)``.
+    """
+    from uipath.runtime.governance import delegation_guard as dg
+
+    agent_a = SimpleNamespace()
+    calls_a = {"n": 0}
+
+    def _invoke_a(_payload, **_):
+        calls_a["n"] += 1
+        return agent_a.invoke({})  # self-recursion until limit
+
+    agent_a.invoke = _invoke_a
+
+    agent_b = _make_sync_agent()
+
+    install_delegation_guard(agent_a, max_depth=2)
+    install_delegation_guard(agent_b, max_depth=2)
+
+    # Drive agent_a to its limit.
+    with pytest.raises(GovernanceBlockException):
+        agent_a.invoke({})
+    assert calls_a["n"] == 2
+
+    # agent_b is a fresh chain in the same context. Its depth counter
+    # is keyed by id(agent_b), so agent_a's exhausted state doesn't
+    # affect it. Without the per-agent keying, agent_b would inherit
+    # whatever depth was last set in this context.
+    assert agent_b.invoke({"x": 1}) == {"sync": {"x": 1}}
+
+    # After both calls, the ContextVar should be back to its initial
+    # state — either unset (LookupError) or holding an empty dict. The
+    # set/reset pairs each guarded call cleaned up after itself.
+    try:
+        depths = dg._DELEGATION_DEPTHS.get()
+    except LookupError:
+        depths = {}
+    assert depths.get(id(agent_a), 0) == 0
+    assert depths.get(id(agent_b), 0) == 0
+
+
+def test_uninstall_clears_agent_depth_entry() -> None:
+    """After uninstall, the agent's id is no longer in the depths dict.
+
+    Prevents ``id(agent)`` reuse — Python recycles ids after GC — from
+    mis-attributing a future agent's count to this one.
+    """
+    from uipath.runtime.governance import delegation_guard as dg
+
+    agent = _make_sync_agent()
+    install_delegation_guard(agent, max_depth=5)
+    # Enter the guard once so the agent gets a depth entry.
+    agent.invoke({})
+    # invoke completed -> token reset -> entry should be back to 0 or
+    # absent. We re-enter manually to plant a non-zero entry.
+    agent_key = id(agent)
+    dg._DELEGATION_DEPTHS.set({agent_key: 3})
+    assert dg._DELEGATION_DEPTHS.get().get(agent_key) == 3
+
+    uninstall_delegation_guard(agent)
+    # Uninstall pops the entry from the current context.
+    assert agent_key not in dg._DELEGATION_DEPTHS.get()
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
new file mode 100644
index 0000000..e3e6b88
--- /dev/null
+++ b/tests/test_evaluator.py
@@ -0,0 +1,401 @@
+"""Tests for the audit + enforcement behavior of GovernanceEvaluator.
+
+The evaluator owns three responsibilities that used to be scattered
+across wrapper.py and adapter callbacks:
+
+1. DISABLED enforcement mode short-circuits — no rules evaluated, no
+   audit events emitted, no exceptions raised.
+2. AUDIT mode evaluates rules and emits audit events, but transforms
+   matched DENY actions into AUDIT so execution continues.
+3. ENFORCE mode evaluates, emits audit, and raises
+   :class:`GovernanceBlockException` when a DENY rule matches.
+
+Plus a fail-safe contract: a misbehaving audit sink must not stop
+evaluation from completing or propagate as an exception.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import patch
+
+import pytest
+from uipath.core.governance.exceptions import GovernanceBlockException
+from uipath.core.governance.models import Action, LifecycleHook
+
+from tests._helpers import reset_enforcement_mode
+from uipath.runtime.governance.audit import (
+    AuditEvent,
+    AuditSink,
+    EventType,
+    get_audit_manager,
+    reset_audit_manager,
+)
+from uipath.runtime.governance.config import (
+    EnforcementMode,
+    set_enforcement_mode,
+)
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+)
+
+# ---------------------------------------------------------------------------
+# Test helpers
+# ---------------------------------------------------------------------------
+
+
+class _CapturingSink(AuditSink):
+    """Audit sink that records every event for assertions."""
+
+    def __init__(self) -> None:
+        self.events: list[AuditEvent] = []
+
+    @property
+    def name(self) -> str:
+        return "capturing"
+
+    def emit(self, event: AuditEvent) -> None:
+        self.events.append(event)
+
+
+def _deny_rule_on_input_contains(needle: str) -> Rule:
+    """Build a rule that DENIES when agent_input contains ``needle``."""
+    return Rule(
+        rule_id="TEST-01",
+        name="Test deny on input",
+        clause="A.1.1",
+        hook=LifecycleHook.BEFORE_AGENT,
+        action=Action.DENY,
+        checks=[
+            Check(
+                conditions=[
+                    Condition(
+                        operator="contains",
+                        field="agent_input",
+                        value=needle,
+                    )
+                ],
+                action=Action.DENY,
+                message=f"Input must not contain {needle!r}",
+            )
+        ],
+    )
+
+
+def _build_index_with(rule: Rule) -> PolicyIndex:
+    """Wrap a single rule in a one-pack PolicyIndex."""
+    idx = PolicyIndex()
+    idx.add_pack(
+        PolicyPack(
+            name="test_pack",
+            version="1.0",
+            description="test",
+            rules=[rule],
+        )
+    )
+    return idx
+
+
+def _ctx(agent_input: str) -> CheckContext:
+    return CheckContext(
+        hook=LifecycleHook.BEFORE_AGENT,
+        agent_name="test-agent",
+        runtime_id="run-1",
+        trace_id="trace-1",
+        agent_input=agent_input,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def capturing_audit():
+    """Replace the global audit manager with a fresh one wired to a capturing sink.
+
+    Yields the sink so tests can inspect emitted events. Restores the
+    global manager on teardown.
+    """
+    reset_audit_manager()
+    manager = get_audit_manager()
+    # Default sinks (traces / console) are noisy here — drop them.
+    for existing_name in list(manager.list_sinks()):
+        manager.unregister_sink(existing_name)
+    sink = _CapturingSink()
+    manager.register_sink(sink)
+    # Force synchronous emission so assertions don't race the worker thread.
+    manager._async_mode = False
+    yield sink
+    reset_audit_manager()
+
+
+@pytest.fixture(autouse=True)
+def _reset_enforcement_mode():
+    """Each test gets a clean enforcement-mode slate."""
+    reset_enforcement_mode()
+    yield
+    reset_enforcement_mode()
+
+
+# ---------------------------------------------------------------------------
+# DISABLED mode
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_mode_short_circuits_with_empty_record(capturing_audit):
+    """DISABLED returns an empty AuditRecord and emits nothing."""
+    set_enforcement_mode(EnforcementMode.DISABLED)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    audit = evaluator.evaluate(_ctx("definitely contains secret"))
+
+    assert audit.evaluations == []
+    assert audit.final_action == Action.ALLOW
+    assert audit.metadata["enforcement_mode"] == "disabled"
+    assert capturing_audit.events == []
+
+
+def test_disabled_mode_does_not_raise_on_deny_match(capturing_audit):
+    """Even when a DENY rule WOULD match, DISABLED never raises."""
+    set_enforcement_mode(EnforcementMode.DISABLED)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("blocked"))
+    )
+
+    # Must not raise.
+    evaluator.evaluate(_ctx("this is blocked"))
+
+
+# ---------------------------------------------------------------------------
+# AUDIT mode
+# ---------------------------------------------------------------------------
+
+
+def test_audit_mode_transforms_deny_to_audit(capturing_audit):
+    """AUDIT mode evaluates rules but never returns a DENY final_action."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    audit = evaluator.evaluate(_ctx("contains secret data"))
+
+    assert len(audit.evaluations) == 1
+    assert audit.evaluations[0].matched is True
+    assert audit.evaluations[0].action == Action.DENY  # raw rule action preserved
+    assert audit.final_action == Action.AUDIT  # mode-adjusted
+    assert audit.metadata["audit_mode_would_deny"] is True
+
+
+def test_audit_mode_does_not_raise_on_deny_match(capturing_audit):
+    """AUDIT mode never raises GovernanceBlockException, even on a DENY hit."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("blocked"))
+    )
+
+    evaluator.evaluate(_ctx("this is blocked"))  # must not raise
+
+
+def test_audit_mode_emits_per_rule_and_summary_events(capturing_audit):
+    """One rule_evaluation event per rule + one hook_summary per evaluate()."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    evaluator.evaluate(_ctx("contains secret"))
+
+    rule_events = [
+        e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    summary_events = [
+        e for e in capturing_audit.events if e.event_type == EventType.HOOK_END
+    ]
+    assert len(rule_events) == 1
+    assert rule_events[0].hook == "BEFORE_AGENT"
+    assert rule_events[0].data["rule_id"] == "TEST-01"
+    assert rule_events[0].data["matched"] is True
+    assert rule_events[0].data["action"] == "deny"
+
+    assert len(summary_events) == 1
+    assert summary_events[0].data["matched_rules"] == 1
+    assert summary_events[0].data["final_action"] == "audit"
+    assert summary_events[0].data["enforcement_mode"] == "audit"
+
+
+def test_audit_mode_unmatched_rule_logged_as_allow(capturing_audit):
+    """Unmatched rules still emit a rule_evaluation event with action='allow'."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    evaluator.evaluate(_ctx("benign user query"))
+
+    rule_events = [
+        e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    assert len(rule_events) == 1
+    assert rule_events[0].data["matched"] is False
+    assert rule_events[0].data["action"] == "allow"
+
+
+# ---------------------------------------------------------------------------
+# ENFORCE mode
+# ---------------------------------------------------------------------------
+
+
+def test_enforce_mode_raises_on_deny_match(capturing_audit):
+    """ENFORCE mode raises GovernanceBlockException when a DENY rule matches."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("blocked"))
+    )
+
+    with pytest.raises(GovernanceBlockException) as exc_info:
+        evaluator.evaluate(_ctx("input is blocked"))
+
+    exc = exc_info.value
+    assert exc.rule_id == "TEST-01"
+    assert exc.rule_name == "Test deny on input"
+    assert exc.audit_record is not None
+    assert exc.audit_record.final_action == Action.DENY
+
+
+def test_enforce_mode_emits_audit_before_raising(capturing_audit):
+    """The audit trail must be emitted even when the call raises."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("blocked"))
+    )
+
+    with pytest.raises(GovernanceBlockException):
+        evaluator.evaluate(_ctx("contains blocked"))
+
+    rule_events = [
+        e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION
+    ]
+    summary_events = [
+        e for e in capturing_audit.events if e.event_type == EventType.HOOK_END
+    ]
+    assert len(rule_events) == 1
+    assert summary_events[0].data["final_action"] == "deny"
+    assert summary_events[0].data["enforcement_mode"] == "enforce"
+
+
+def test_enforce_mode_returns_record_when_no_rule_matches(capturing_audit):
+    """No DENY hit → no raise; the AuditRecord is returned normally."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("blocked"))
+    )
+
+    audit = evaluator.evaluate(_ctx("benign query"))
+
+    assert audit.final_action == Action.ALLOW
+    assert audit.evaluations[0].matched is False
+
+
+# ---------------------------------------------------------------------------
+# Sink-failure isolation
+# ---------------------------------------------------------------------------
+
+
+def test_sink_failure_does_not_propagate_or_block_evaluation(capturing_audit):
+    """A broken sink must not make evaluate() raise or lose its return value.
+
+    The contract: AuditManager wraps each sink's emit() in try/except with
+    a per-sink failure counter (circuit-breaker), so an exception inside a
+    sink never propagates back to the evaluator.
+    """
+
+    class _BrokenSink(AuditSink):
+        @property
+        def name(self) -> str:
+            return "broken"
+
+        def emit(self, event: AuditEvent) -> None:
+            raise RuntimeError("sink broke")
+
+    manager = get_audit_manager()
+    manager.register_sink(_BrokenSink())
+
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    # Must complete without raising even with a broken sink registered.
+    audit = evaluator.evaluate(_ctx("contains secret"))
+
+    assert audit.final_action == Action.AUDIT
+    # The non-broken capturing sink still got its events.
+    assert any(
+        e.event_type == EventType.RULE_EVALUATION for e in capturing_audit.events
+    )
+
+
+def test_unavailable_audit_manager_is_swallowed():
+    """If get_audit_manager() itself raises, _emit_audit must swallow it."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(
+        _build_index_with(_deny_rule_on_input_contains("secret"))
+    )
+
+    with patch(
+        "uipath.runtime.governance.native.evaluator.get_audit_manager",
+        side_effect=RuntimeError("manager unavailable"),
+    ):
+        # Must complete, return record, and not raise.
+        audit = evaluator.evaluate(_ctx("contains secret"))
+
+    assert audit.final_action == Action.AUDIT
+    assert audit.evaluations[0].matched is True
+
+
+# ---------------------------------------------------------------------------
+# Protocol conformance smoke test
+# ---------------------------------------------------------------------------
+
+
+def test_governance_evaluator_satisfies_evaluator_protocol():
+    """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected.
+
+    Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol —
+    an explicit assignment to the protocol-typed variable documents the
+    structural contract.
+    """
+    from uipath.core.adapters import EvaluatorProtocol
+
+    evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex())
+    assert isinstance(evaluator, EvaluatorProtocol)
+
+
+def test_evaluator_protocol_methods_resolvable_on_concrete():
+    """Every method the protocol declares must be callable on the concrete impl."""
+    from uipath.core.adapters import EvaluatorProtocol
+
+    evaluator: Any = GovernanceEvaluator(PolicyIndex())
+    for method_name in (
+        "evaluate_before_agent",
+        "evaluate_after_agent",
+        "evaluate_before_model",
+        "evaluate_after_model",
+        "evaluate_tool_call",
+        "evaluate_after_tool",
+    ):
+        assert callable(getattr(evaluator, method_name))
+    # The variable annotation also asserts type compatibility at runtime
+    # because EvaluatorProtocol is @runtime_checkable.
+    assert isinstance(evaluator, EvaluatorProtocol)
diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py
new file mode 100644
index 0000000..f4021db
--- /dev/null
+++ b/tests/test_evaluator_operators.py
@@ -0,0 +1,680 @@
+"""Tests for ``GovernanceEvaluator`` operators and field resolution.
+
+Covers each operator implemented in :meth:`_apply_operator` plus the
+``_check_*`` helper functions (vader, encoding, entropy, incident,
+commitment) and the ``evaluate_*`` dispatchers.
+"""
+
+from __future__ import annotations
+
+import pytest
+from uipath.core.governance.models import Action, LifecycleHook
+
+from tests._helpers import reset_enforcement_mode
+from uipath.runtime.governance.config import (
+    EnforcementMode,
+    set_enforcement_mode,
+)
+from uipath.runtime.governance.native.evaluator import (
+    _INCIDENT_PATTERNS,
+    GovernanceEvaluator,
+)
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _evaluator() -> GovernanceEvaluator:
+    """Build a GovernanceEvaluator with an empty PolicyIndex (operators only)."""
+    return GovernanceEvaluator(policy_index=PolicyIndex())
+
+
+def _ctx(**fields) -> CheckContext:
+    """Construct a CheckContext with sensible defaults plus overrides."""
+    defaults = dict(
+        hook=LifecycleHook.AFTER_MODEL,
+        agent_name="agent",
+        runtime_id="rt-1",
+        trace_id="tr-1",
+    )
+    defaults.update(fields)
+    return CheckContext(**defaults)
+
+
+def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule:
+    return Rule(
+        rule_id="r1",
+        name="r1",
+        clause="",
+        hook=LifecycleHook.AFTER_MODEL,
+        action=Action.AUDIT,
+        checks=[
+            Check(
+                conditions=[
+                    Condition(operator=operator, field=field, value=value, negate=negate)
+                ],
+            )
+        ],
+    )
+
+
+@pytest.fixture(autouse=True)
+def _isolate_mode() -> None:
+    reset_enforcement_mode()
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    yield
+    reset_enforcement_mode()
+
+
+# ---------------------------------------------------------------------------
+# Field resolution — _get_field_value
+# ---------------------------------------------------------------------------
+
+
+def test_get_field_value_top_level_attr() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="hello")
+    assert ev._get_field_value("model_output", ctx) == "hello"
+
+
+def test_get_field_value_dotted_path_into_dict() -> None:
+    ev = _evaluator()
+    ctx = _ctx(session_state={"tool_calls": 7})
+    assert ev._get_field_value("session_state.tool_calls", ctx) == 7
+
+
+def test_get_field_value_missing_segment_returns_none() -> None:
+    ev = _evaluator()
+    ctx = _ctx()
+    assert ev._get_field_value("nonexistent", ctx) is None
+    assert ev._get_field_value("session_state.absent", ctx) is None
+
+
+# ---------------------------------------------------------------------------
+# Existence / guardrail_fallback (special-cased before the None check)
+# ---------------------------------------------------------------------------
+
+
+def test_exists_true_when_value_present() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="x")
+    assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True
+
+
+def test_exists_false_when_missing() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("exists", None, None) is False
+
+
+def test_not_exists_inverse() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("not_exists", None, None) is True
+    assert ev._apply_operator("not_exists", "x", None) is False
+
+
+def test_guardrail_fallback_mapped_and_disabled_fires() -> None:
+    ev = _evaluator()
+    result = ev._apply_operator(
+        "guardrail_fallback",
+        None,
+        {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"},
+    )
+    assert result is True
+
+
+@pytest.mark.parametrize(
+    "cfg",
+    [
+        {"mapped_to_uipath": False, "policy_enabled": False},
+        {"mapped_to_uipath": True, "policy_enabled": True},
+        {"mapped_to_uipath": False, "policy_enabled": True},
+    ],
+)
+def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("guardrail_fallback", None, cfg) is False
+
+
+def test_guardrail_fallback_non_dict_value_silent() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("guardrail_fallback", None, "string") is False
+
+
+# ---------------------------------------------------------------------------
+# None-field short-circuit (everything except exists / guardrail_fallback)
+# ---------------------------------------------------------------------------
+
+
+def test_other_operators_short_circuit_when_field_is_none() -> None:
+    ev = _evaluator()
+    for op in ("contains", "regex", "in_list", "gt"):
+        assert ev._apply_operator(op, None, "anything") is False, op
+
+
+# ---------------------------------------------------------------------------
+# Numeric operators
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "op,lhs,rhs,expected",
+    [
+        ("gt", 5, 3, True),
+        ("gt", 3, 5, False),
+        ("gt", 3, 3, False),
+        ("gte", 3, 3, True),
+        ("gte", 2, 3, False),
+        ("lt", 1, 3, True),
+        ("lt", 3, 3, False),
+        ("lte", 3, 3, True),
+        ("lte", 4, 3, False),
+    ],
+)
+def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None:
+    assert _evaluator()._apply_operator(op, lhs, rhs) is expected
+
+
+def test_numeric_operators_handle_string_coercion() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("gt", "5", "3") is True
+
+
+def test_numeric_operators_return_false_on_uncoercible() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("gt", "not-a-number", 3) is False
+    assert ev._apply_operator("gt", 3, "not-a-number") is False
+
+
+# ---------------------------------------------------------------------------
+# String operators
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "op,lhs,rhs,expected",
+    [
+        ("equals", "abc", "abc", True),
+        ("equals", "abc", "ABC", False),  # equals is case-sensitive
+        ("eq", "x", "x", True),
+        ("not_equals", "abc", "xyz", True),
+        ("ne", "x", "x", False),
+        ("contains", "Hello World", "world", True),  # case-insensitive
+        ("contains", "Hello", "xyz", False),
+        ("not_contains", "Hello", "xyz", True),
+        ("not_contains", "Hello", "hello", False),
+    ],
+)
+def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None:
+    assert _evaluator()._apply_operator(op, lhs, rhs) is expected
+
+
+def test_regex_matches_pattern() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True
+
+
+def test_regex_matches_alias() -> None:
+    """``matches`` is documented as a synonym for ``regex``."""
+    ev = _evaluator()
+    assert ev._apply_operator("matches", "abc-123", r"\d+") is True
+
+
+def test_regex_invalid_pattern_returns_false() -> None:
+    """Malformed regex is logged and silently returns False."""
+    ev = _evaluator()
+    assert ev._apply_operator("regex", "anything", "(unclosed") is False
+
+
+# ---------------------------------------------------------------------------
+# List operators
+# ---------------------------------------------------------------------------
+
+
+def test_in_list_membership() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True
+    assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False
+
+
+def test_in_list_non_list_value_returns_false() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("in_list", "x", "not a list") is False
+
+
+def test_not_in_list_inverse() -> None:
+    ev = _evaluator()
+    assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True
+    assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False
+
+
+def test_not_in_list_non_list_value_returns_true() -> None:
+    """``not_in_list`` against a non-list value safely returns True
+    (nothing is in a non-list)."""
+    ev = _evaluator()
+    assert ev._apply_operator("not_in_list", "x", "not a list") is True
+
+
+# ---------------------------------------------------------------------------
+# Unknown operator
+# ---------------------------------------------------------------------------
+
+
+def test_unknown_operator_returns_false() -> None:
+    """Unknown operator strings log a debug message and return False."""
+    ev = _evaluator()
+    assert ev._apply_operator("never_heard_of_this", "x", "y") is False
+
+
+# ---------------------------------------------------------------------------
+# Negate flag — flips the result
+# ---------------------------------------------------------------------------
+
+
+def test_condition_negate_flips_result() -> None:
+    ev = _evaluator()
+    ctx = _ctx(model_output="hello")
+    # contains "hello" → matches; negate inverts to False.
+    cond = Condition(
+        operator="contains", field="model_output", value="hello", negate=True,
+    )
+    assert ev._evaluate_condition(cond, ctx) is False
+    cond2 = Condition(
+        operator="contains", field="model_output", value="world", negate=True,
+    )
+    assert ev._evaluate_condition(cond2, ctx) is True
+
+
+# ---------------------------------------------------------------------------
+# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions
+# ---------------------------------------------------------------------------
+
+
+def test_empty_check_conditions_always_match() -> None:
+    """A check with no conditions trivially matches — surfaces rule shape bugs."""
+    ev = _evaluator()
+    check = Check(conditions=[], logic="all")
+    matched, _ = ev._evaluate_check(check, _ctx())
+    assert matched is True
+
+
+def test_check_logic_all_requires_every_condition() -> None:
+    ev = _evaluator()
+    check = Check(
+        conditions=[
+            Condition(operator="contains", field="model_output", value="a"),
+            Condition(operator="contains", field="model_output", value="missing"),
+        ],
+        logic="all",
+    )
+    matched, _ = ev._evaluate_check(check, _ctx(model_output="a only"))
+    assert matched is False
+
+
+def test_check_logic_any_requires_one_condition() -> None:
+    ev = _evaluator()
+    check = Check(
+        conditions=[
+            Condition(operator="contains", field="model_output", value="present"),
+            Condition(operator="contains", field="model_output", value="absent"),
+        ],
+        logic="any",
+    )
+    matched, detail = ev._evaluate_check(check, _ctx(model_output="present text"))
+    assert matched is True
+    # detail is the check's message on match; empty by default in our builder.
+    assert detail == ""
+
+
+# ---------------------------------------------------------------------------
+# VADER sentiment
+# ---------------------------------------------------------------------------
+
+
+def test_vader_concern_negative_text_fires() -> None:
+    """A clearly-negative sentence trips the default threshold of -0.3."""
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "I absolutely hate this terrible, awful product.", {"threshold": -0.3}
+        )
+        is True
+    )
+
+
+def test_vader_concern_positive_text_does_not_fire() -> None:
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "This is wonderful and I love it!", {"threshold": -0.3}
+        )
+        is False
+    )
+
+
+def test_vader_concern_empty_text_silent() -> None:
+    assert GovernanceEvaluator._check_vader_concern("", {}) is False
+    assert GovernanceEvaluator._check_vader_concern("   ", {}) is False
+
+
+def test_vader_concern_threshold_as_scalar() -> None:
+    """``params`` may be a bare number; the operator coerces."""
+    assert (
+        GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True
+    )
+
+
+def test_vader_concern_invalid_threshold_falls_back() -> None:
+    """Non-numeric scalar params fall back to the documented default."""
+    # "garbage" -> default -0.3 → should still classify clear negative
+    assert (
+        GovernanceEvaluator._check_vader_concern(
+            "I hate this awful, terrible thing", "garbage"
+        )
+        is True
+    )
+
+
+# ---------------------------------------------------------------------------
+# Encoding integrity
+# ---------------------------------------------------------------------------
+
+
+def test_encoding_concern_clean_text_silent() -> None:
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            "Just a normal English sentence with no corruption.", {}
+        )
+        is False
+    )
+
+
+def test_encoding_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_encoding_concern("", {}) is False
+
+
+def test_encoding_concern_replacement_chars_fire() -> None:
+    """U+FFFD replacement chars are a strong corruption signal."""
+    text = "Hello � � world"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+def test_encoding_concern_mojibake_bigrams_fire() -> None:
+    """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape."""
+    text = "Ã© Ã© hello Ã©"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+def test_encoding_concern_hex_escape_literals_fire() -> None:
+    """Literal ``\\xHH`` sequences mean raw bytes leaked into a string."""
+    text = r"Hello \x80 \x81 \x82 world"
+    assert (
+        GovernanceEvaluator._check_encoding_concern(
+            text, {"min_corruption_events": 2}
+        )
+        is True
+    )
+
+
+# ---------------------------------------------------------------------------
+# Entropy (stdlib only — deterministic)
+# ---------------------------------------------------------------------------
+
+
+def test_entropy_concern_normal_english_does_not_fire() -> None:
+    """English prose entropy lands ~3.5–4.5 bits/byte — inside default range."""
+    text = "The quick brown fox jumps over the lazy dog." * 5
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5})
+        is False
+    )
+
+
+def test_entropy_concern_low_entropy_fires() -> None:
+    """Highly repetitive text approaches 0 bits/byte."""
+    text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5})
+        is True
+    )
+
+
+def test_entropy_concern_high_entropy_fires() -> None:
+    """Random-ish bytes approach 8 bits/byte."""
+    # Build text with many distinct chars to push entropy high.
+    text = "".join(chr(c) for c in range(32, 127)) * 5
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0})
+        is True
+    )
+
+
+def test_entropy_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_entropy_concern("", {}) is False
+
+
+def test_entropy_concern_non_dict_params_uses_defaults() -> None:
+    """Non-dict params don't crash; defaults apply."""
+    # Normal English prose still won't trip the default min=1.5, max=7.5 range.
+    text = "The quick brown fox jumps over the lazy dog."
+    assert (
+        GovernanceEvaluator._check_entropy_concern(text, "garbage") is False
+    )
+
+
+# ---------------------------------------------------------------------------
+# Incident taxonomy (regex-based, deterministic)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "text,expected_category",
+    [
+        ("I cannot help with that.", "safety_refusal"),
+        ("I'm sorry, but I cannot answer.", "safety_refusal"),
+        ("500 internal server error", "tool_failure"),
+        ("Connection refused", "tool_failure"),
+        ("timed out", "tool_failure"),
+        ("401 unauthorized", "auth_failure"),
+        ("authentication failed", "auth_failure"),
+        ("429", "quota_exceeded"),
+        ("rate limit exceeded", "quota_exceeded"),
+        ("I made that up", "hallucination"),
+        ("I don't actually know", "hallucination"),
+    ],
+)
+def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None:
+    """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar."""
+    assert expected_category in _INCIDENT_PATTERNS
+    assert GovernanceEvaluator._check_incident_concern(text, {}) is True
+
+
+def test_incident_concern_unmatched_silent() -> None:
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "All systems operating normally.", {}
+        )
+        is False
+    )
+
+
+def test_incident_concern_empty_silent() -> None:
+    assert GovernanceEvaluator._check_incident_concern("", {}) is False
+
+
+def test_incident_concern_category_filter() -> None:
+    """Limit scanning to a subset of categories via ``categories`` param."""
+    # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled,
+    # the scanner should miss it.
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "401 unauthorized", {"categories": ["quota_exceeded"]}
+        )
+        is False
+    )
+    # With auth_failure enabled, it fires.
+    assert (
+        GovernanceEvaluator._check_incident_concern(
+            "401 unauthorized", {"categories": ["auth_failure"]}
+        )
+        is True
+    )
+
+
+def test_incident_concern_unknown_category_silently_dropped() -> None:
+    """Categories the system doesn't know about are silently ignored."""
+    # Only the unknown category is requested — falls back to no categories,
+    # so even matching text doesn't fire.
+    result = GovernanceEvaluator._check_incident_concern(
+        "401 unauthorized", {"categories": ["unknown_cat_xyz"]}
+    )
+    assert result is False
+
+
+# ---------------------------------------------------------------------------
+# evaluate_* dispatchers — verify they build the right CheckContext
+# ---------------------------------------------------------------------------
+
+
+def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]:
+    """Patch evaluate() to capture the context it receives instead of running rules."""
+    captured: dict = {}
+    ev = _evaluator()
+
+    def _fake_evaluate(ctx):  # type: ignore[no-untyped-def]
+        captured["ctx"] = ctx
+        from datetime import datetime, timezone
+
+        from uipath.core.governance.models import AuditRecord
+
+        return AuditRecord(
+            timestamp=datetime.now(timezone.utc),
+            agent_name=ctx.agent_name,
+            runtime_id=ctx.runtime_id,
+            trace_id=ctx.trace_id,
+            hook=ctx.hook,
+            evaluations=[],
+            final_action=Action.ALLOW,
+        )
+
+    ev.evaluate = _fake_evaluate  # type: ignore[assignment]
+    return ev, captured
+
+
+def test_evaluate_before_agent_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_before_agent(
+        agent_input="user-text",
+        agent_name="a",
+        runtime_id="r",
+        trace_id="t",
+        model_name="gpt-5",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.BEFORE_AGENT
+    assert ctx.agent_input == "user-text"
+    assert ctx.model_name == "gpt-5"
+
+
+def test_evaluate_after_agent_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_agent(
+        agent_output="reply", agent_name="a", runtime_id="r", trace_id="t",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_AGENT
+    assert ctx.agent_output == "reply"
+
+
+def test_evaluate_before_model_carries_messages() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_before_model(
+        model_input="prompt",
+        agent_name="a",
+        runtime_id="r",
+        trace_id="t",
+        messages=[{"role": "user", "content": "hi"}],
+        model_name="gpt-5",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.BEFORE_MODEL
+    assert ctx.model_input == "prompt"
+    assert ctx.messages == [{"role": "user", "content": "hi"}]
+
+
+def test_evaluate_after_model_builds_context() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_model(
+        model_output="resp", agent_name="a", runtime_id="r", trace_id="t",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_MODEL
+    assert ctx.model_output == "resp"
+
+
+def test_evaluate_tool_call_carries_args() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_tool_call(
+        tool_name="search",
+        tool_args={"q": "x"},
+        agent_name="a",
+        runtime_id="r",
+        trace_id="t",
+        session_state={"tool_calls": 1},
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.TOOL_CALL
+    assert ctx.tool_name == "search"
+    assert ctx.tool_args == {"q": "x"}
+    assert ctx.session_state == {"tool_calls": 1}
+
+
+def test_evaluate_after_tool_carries_result() -> None:
+    ev, captured = _record_context_evaluator()
+    ev.evaluate_after_tool(
+        tool_name="search",
+        tool_result="some-data",
+        agent_name="a",
+        runtime_id="r",
+        trace_id="t",
+    )
+    ctx = captured["ctx"]
+    assert ctx.hook == LifecycleHook.AFTER_TOOL
+    assert ctx.tool_name == "search"
+    assert ctx.tool_result == "some-data"
+
+
+# ---------------------------------------------------------------------------
+# DISABLED mode — evaluate() short-circuits without emitting audit
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_mode_returns_empty_audit_record() -> None:
+    """DISABLED mode short-circuits the rule loop and audit emission."""
+    set_enforcement_mode(EnforcementMode.DISABLED)
+
+    rule = _rule_with_condition("contains", "model_output", "anything")
+    pack = PolicyPack(name="p", version="1", description="", rules=[rule])
+    idx = PolicyIndex()
+    idx.add_pack(pack)
+    ev = GovernanceEvaluator(policy_index=idx)
+
+    audit = ev.evaluate(_ctx(model_output="contains anything"))
+    assert audit.final_action == Action.ALLOW
+    assert audit.evaluations == []
diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py
new file mode 100644
index 0000000..79e8971
--- /dev/null
+++ b/tests/test_guardrail_compensation.py
@@ -0,0 +1,870 @@
+"""Tests for compensating governance calls to /runtime/govern.
+
+The compensating call is fire-and-forget: the server runs the disabled
+guardrail AND writes the audit trace itself, so we don't parse the
+response. These tests cover:
+
+- payload + header composition,
+- URL resolution off the shared backend base URL,
+- error swallowing (no exception escapes, warning is logged),
+- evaluator integration (a fired ``guardrail_fallback`` rule kicks off
+  the call on a background daemon thread).
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+import time
+from types import SimpleNamespace
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+from uipath.core.governance.models import Action, LifecycleHook
+
+from tests._helpers import reset_enforcement_mode
+from uipath.runtime.governance.config import (
+    EnforcementMode,
+    set_enforcement_mode,
+)
+from uipath.runtime.governance.native import guardrail_compensation
+from uipath.runtime.governance.native.backend_client import (
+    USER_AGENT,
+    governance_request_headers,
+)
+from uipath.runtime.governance.native.evaluator import GovernanceEvaluator
+from uipath.runtime.governance.native.guardrail_compensation import (
+    _resolve_trace_id,
+    disabled_guardrails,
+    request_governance,
+)
+from uipath.runtime.governance.native.models import (
+    Check,
+    CheckContext,
+    Condition,
+    PolicyIndex,
+    PolicyPack,
+    Rule,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _mock_response(status: int = 200) -> MagicMock:
+    """urlopen()-compatible context manager mock."""
+    response = MagicMock()
+    response.status = status
+    response.read.return_value = b""  # body is not consumed by fire-and-forget
+    response.__enter__.return_value = response
+    response.__exit__.return_value = False
+    return response
+
+
+def _rules(*validators: str, rule_id: str = "R1", rule_name: str = "n", pack: str = "p"):
+    """Build the per-rule metadata list the compensation API now takes."""
+    return [
+        {
+            "ruleId": rule_id,
+            "ruleName": rule_name,
+            "packName": pack,
+            "validator": v,
+        }
+        for v in validators
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _reset_enforcement_mode():
+    reset_enforcement_mode()
+    yield
+    reset_enforcement_mode()
+
+
+@pytest.fixture
+def _govern_env(monkeypatch):
+    """Provide the env vars that request_governance requires.
+
+    The compensating call mirrors the policy fetch — it skips when
+    ``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` /
+    ``UIPATH_ACCESS_TOKEN`` are missing (sending without a bearer
+    token would generate a guaranteed 401 per call). Tests that need
+    the network path to actually fire must opt into this fixture.
+    """
+    monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev")
+    monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz")
+    monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token")
+    yield
+
+
+# ---------------------------------------------------------------------------
+# Shared header helper (lives in backend_client; covered here because it's
+# the wire shape both the compensation POST and the policy GET share)
+# ---------------------------------------------------------------------------
+
+
+def test_governance_request_headers_get_shape(monkeypatch):
+    monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
+    headers = governance_request_headers()
+    assert headers == {"Accept": "application/json", "User-Agent": USER_AGENT}
+
+
+def test_governance_request_headers_post_shape(monkeypatch):
+    monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
+    headers = governance_request_headers(json_body=True)
+    assert headers == {
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "User-Agent": USER_AGENT,
+    }
+
+
+def test_governance_request_headers_includes_authorization_when_token_set(
+    monkeypatch,
+):
+    monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "abc.def.ghi")
+    headers = governance_request_headers(json_body=True)
+    assert headers["Authorization"] == "Bearer abc.def.ghi"
+
+
+def test_governance_request_headers_user_agent_is_browser_shaped(monkeypatch):
+    monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
+    headers = governance_request_headers()
+    assert headers["User-Agent"].startswith("Mozilla/5.0")
+    assert "Chrome/" in headers["User-Agent"]
+
+
+# ---------------------------------------------------------------------------
+# request_governance — fire-and-forget contract
+# ---------------------------------------------------------------------------
+
+
+def test_request_governance_empty_types_short_circuits_without_call():
+    with patch.object(
+        guardrail_compensation.urllib.request, "urlopen"
+    ) as mock_urlopen:
+        result = request_governance(
+            [], {}, "before_model", "t1", "2026-06-06T00:00:00Z", "agent", "rt"
+        )
+    assert result is None
+    mock_urlopen.assert_not_called()
+
+
+def test_request_governance_posts_expected_payload_and_returns_none(
+    monkeypatch, _govern_env
+):
+    rules = [
+        {
+            "ruleId": "R-PII",
+            "ruleName": "PII guardrail",
+            "packName": "AITL",
+            "validator": "pii_detection",
+        },
+        {
+            "ruleId": "R-HARM",
+            "ruleName": "Harmful content",
+            "packName": "AITL",
+            "validator": "harmful_content",
+        },
+    ]
+    # Job context is resolved from the environment at call time; pin it so
+    # the assertion is deterministic and exercises the new payload keys.
+    monkeypatch.setattr(
+        guardrail_compensation,
+        "resolve_job_context",
+        lambda: {"folderKey": "folder-1", "jobKey": "job-1"},
+    )
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        return_value=_mock_response(),
+    ) as mock_urlopen:
+        result = request_governance(
+            rules,
+            {"content": "hello"},
+            "before_model",
+            "trace-1",
+            "2026-06-06T00:00:00Z",
+            "langchain",
+            "patch-langchain",
+        )
+
+    assert result is None  # fire-and-forget
+
+    request_arg = mock_urlopen.call_args.args[0]
+    assert request_arg.get_method() == "POST"
+
+    sent = json.loads(request_arg.data.decode("utf-8"))
+    assert sent == {
+        # distinct validators drive the guardrail API call
+        "type": ["pii_detection", "harmful_content"],
+        # per-rule metadata drives one trace record per rule
+        "rules": rules,
+        "data": {"content": "hello"},
+        "hook": "before_model",
+        "traceId": "trace-1",
+        "src_timestamp": "2026-06-06T00:00:00Z",
+        "agentName": "langchain",
+        "runtimeId": "patch-langchain",
+        "folderKey": "folder-1",
+        "jobKey": "job-1",
+    }
+
+
+def test_request_governance_sends_shared_headers(_govern_env):
+    """Headers must come from the shared helper — UA + Accept + Content-Type + Auth."""
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        return_value=_mock_response(),
+    ) as mock_urlopen:
+        request_governance(
+            _rules("x"), {}, "before_model", "t", "ts", "a", "r"
+        )
+
+    request_arg = mock_urlopen.call_args.args[0]
+    # urllib title-cases header keys on the Request object.
+    assert request_arg.get_header("Accept") == "application/json"
+    assert request_arg.get_header("Content-type") == "application/json"
+    assert request_arg.get_header("User-agent") == USER_AGENT
+    # Bearer is required (see ``test_request_governance_skipped_when_token_missing``).
+    assert request_arg.get_header("Authorization") == "Bearer test-token"
+    # Tenant header must travel on the compensating POST (same as the
+    # policy GET) — the agenticgovernance ingress validates it.
+    assert request_arg.get_header("X-uipath-internal-tenantid") == "tenant-xyz"
+
+
+def test_request_governance_includes_bearer_token_when_set(monkeypatch, _govern_env):
+    monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "the-token")
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        return_value=_mock_response(),
+    ) as mock_urlopen:
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+
+    request_arg = mock_urlopen.call_args.args[0]
+    assert request_arg.get_header("Authorization") == "Bearer the-token"
+
+
+def test_request_governance_skipped_when_token_missing(monkeypatch):
+    """Missing bearer → skip cleanly instead of sending a guaranteed-401 request.
+
+    Sending without a token would produce a 401 per compensation event
+    and pollute logs. Mirrors the org-id / tenant-id skip paths above.
+    """
+    monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev")
+    monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz")
+    monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
+    with patch.object(
+        guardrail_compensation.urllib.request, "urlopen"
+    ) as mock_urlopen:
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+    assert not mock_urlopen.called, (
+        "request_governance must NOT POST when bearer token is missing"
+    )
+
+
+def test_request_governance_skipped_when_org_id_missing(monkeypatch):
+    """Without an org id, we cannot build the URL — skip the call entirely."""
+    monkeypatch.delenv("UIPATH_ORGANIZATION_ID", raising=False)
+    monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz")
+    with patch.object(
+        guardrail_compensation.urllib.request, "urlopen"
+    ) as mock_urlopen:
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+    mock_urlopen.assert_not_called()
+
+
+def test_request_governance_skipped_when_tenant_id_missing(monkeypatch):
+    """Without a tenant id, the server's tenant header would be invalid."""
+    monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev")
+    monkeypatch.delenv("UIPATH_TENANT_ID", raising=False)
+    with patch.object(
+        guardrail_compensation.urllib.request, "urlopen"
+    ) as mock_urlopen:
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+    mock_urlopen.assert_not_called()
+
+
+def test_request_governance_swallows_network_error(_govern_env):
+    """A network error must not propagate. (Log emission is logger-config
+    dependent and is verified manually — the test-isolation behavior of
+    pytest's caplog conflicts with the runtime's log interceptor.)"""
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        side_effect=OSError("connection refused"),
+    ):
+        result = request_governance(
+            _rules("pii_detection"),
+            {},
+            "before_model",
+            "t",
+            "ts",
+            "langchain",
+            "patch-langchain",
+        )
+
+    assert result is None
+
+
+def test_request_governance_swallows_unexpected_exception(_govern_env):
+    """Even a programmer-error inside urlopen must not propagate."""
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        side_effect=RuntimeError("boom"),
+    ):
+        assert (
+            request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+            is None
+        )
+
+
+def test_request_governance_does_not_read_response_body(_govern_env):
+    """Fire-and-forget: we must not consume the response body."""
+    response = _mock_response()
+    with patch.object(
+        guardrail_compensation.urllib.request, "urlopen", return_value=response
+    ):
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+    response.read.assert_not_called()
+
+
+def test_request_governance_url_is_org_scoped(monkeypatch, _govern_env):
+    """URL must include the org segment and the agenticgovernance_ prefix.
+
+    Mirrors the policy fetch URL shape — the agenticgovernance ingress
+    requires both segments; without them the request lands on a route
+    that doesn't exist (404 / wrong service).
+    """
+    monkeypatch.delenv("UIPATH_GOVERNANCE_BACKEND_URL", raising=False)
+    monkeypatch.setenv("UIPATH_URL", "https://cloud.uipath.com/my-org/my-tenant")
+    with patch.object(
+        guardrail_compensation.urllib.request,
+        "urlopen",
+        return_value=_mock_response(),
+    ) as mock_urlopen:
+        request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
+
+    # org_id="appsdev" comes from the _govern_env fixture (UIPATH_ORGANIZATION_ID),
+    # not from UIPATH_URL — same env source as the policy fetch.
+    assert (
+        mock_urlopen.call_args.args[0].full_url
+        == "https://cloud.uipath.com/appsdev/agenticgovernance_/api/v1/runtime/govern"
+    )
+
+
+# ---------------------------------------------------------------------------
+# submit_compensation — bounded background pool
+# ---------------------------------------------------------------------------
+
+
+def test_submit_compensation_empty_types_short_circuits():
+    """submit_compensation with no types is a no-op (no semaphore taken)."""
+    from uipath.runtime.governance.native.guardrail_compensation import (
+        submit_compensation,
+    )
+
+    # Patch the executor to a MagicMock so we'd notice any spurious submit.
+    with patch.object(guardrail_compensation, "_pool") as mock_pool:
+        submit_compensation([], {}, "before_model", "t", "ts", "a", "r")
+    mock_pool.submit.assert_not_called()
+
+
+def test_submit_compensation_routes_through_pool():
+    """A non-empty types list submits a single task to the pool."""
+    from uipath.runtime.governance.native.guardrail_compensation import (
+        submit_compensation,
+    )
+
+    with patch.object(guardrail_compensation, "_pool") as mock_pool:
+        submit_compensation(
+            _rules("pii_detection"),
+            {"content": "x"},
+            "before_model",
+            "trace-1",
+            "ts",
+            "agent",
+            "run",
+        )
+    mock_pool.submit.assert_called_once()
+
+
+def test_submit_compensation_drops_when_pool_saturated(monkeypatch):
+    """When the in-flight semaphore is exhausted, the call is dropped + logged."""
+    from uipath.runtime.governance.native.guardrail_compensation import (
+        submit_compensation,
+    )
+
+    # Force the semaphore into "exhausted" state.
+    drained = threading.BoundedSemaphore(1)
+    drained.acquire()  # value is now 0; next acquire(blocking=False) returns False
+    monkeypatch.setattr(guardrail_compensation, "_inflight", drained)
+
+    with patch.object(guardrail_compensation, "_pool") as mock_pool:
+        submit_compensation(
+            _rules("pii_detection"),
+            {},
+            "before_model",
+            "trace-1",
+            "ts",
+            "agent",
+            "run",
+        )
+
+    mock_pool.submit.assert_not_called()
+
+
+def test_submit_compensation_swallows_pool_shutdown_runtimeerror(monkeypatch):
+    """If the pool was shut down at process exit, submit must not raise."""
+    from uipath.runtime.governance.native.guardrail_compensation import (
+        submit_compensation,
+    )
+
+    # Fresh semaphore so we don't taint other tests.
+    monkeypatch.setattr(
+        guardrail_compensation, "_inflight", threading.BoundedSemaphore(4)
+    )
+
+    class _ShutdownPool:
+        def submit(self, fn, *args, **kwargs):  # noqa: ARG002
+            raise RuntimeError("cannot schedule new futures after shutdown")
+
+    monkeypatch.setattr(guardrail_compensation, "_pool", _ShutdownPool())
+
+    # Must not raise.
+    submit_compensation(
+        _rules("x"), {}, "before_model", "t", "ts", "a", "r"
+    )
+
+
+# ---------------------------------------------------------------------------
+# disabled_guardrails
+# ---------------------------------------------------------------------------
+
+
+def test_disabled_guardrails_extracts_validators_for_fired_rules():
+    cond = SimpleNamespace(
+        operator="guardrail_fallback",
+        value={
+            "validator": "pii_detection",
+            "mapped_to_uipath": True,
+            "policy_enabled": False,
+        },
+    )
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])])
+    audit = SimpleNamespace(
+        evaluations=[
+            SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail")
+        ]
+    )
+    policy_index = SimpleNamespace(
+        get_rule=lambda rid: rule if rid == "R1" else None
+    )
+
+    assert disabled_guardrails(audit, policy_index) == [
+        {
+            "ruleId": "R1",
+            "ruleName": "PII guardrail",
+            "packName": "",
+            "validator": "pii_detection",
+        }
+    ]
+
+
+def test_disabled_guardrails_skips_unmatched_evaluations():
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: None)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+def test_disabled_guardrails_skips_non_guardrail_conditions():
+    cond = SimpleNamespace(operator="regex", value="some-pattern")
+    rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])])
+    audit = SimpleNamespace(
+        evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")]
+    )
+    policy_index = SimpleNamespace(get_rule=lambda rid: rule)
+    assert disabled_guardrails(audit, policy_index) == []
+
+
+# ---------------------------------------------------------------------------
+# Evaluator integration: a guardrail_fallback rule kicks off the compensation
+# ---------------------------------------------------------------------------
+
+
+def _guardrail_fallback_rule() -> Rule:
+    """A rule whose only check is a guardrail_fallback condition.
+
+    Mirrors what ``_build_check`` produces for a YAML
+    ``type: guardrail_fallback`` entry with the guardrail mapped to
+    UiPath but disabled.
+    """
+    return Rule(
+        rule_id="UIP-GR-01",
+        name="PII guardrail (UiPath-mapped, disabled)",
+        clause="UiPath-Mapped Guardrail",
+        hook=LifecycleHook.BEFORE_MODEL,
+        action=Action.AUDIT,
+        checks=[
+            Check(
+                conditions=[
+                    Condition(
+                        operator="guardrail_fallback",
+                        field="",
+                        value={
+                            "validator": "pii_detection",
+                            "mapped_to_uipath": True,
+                            "policy_enabled": False,
+                        },
+                    )
+                ],
+                action=Action.AUDIT,
+                message="PII guardrail disabled",
+            )
+        ],
+    )
+
+
+def _build_index_with(rule: Rule) -> PolicyIndex:
+    idx = PolicyIndex()
+    idx.add_pack(
+        PolicyPack(
+            name="test_pack",
+            version="1.0",
+            description="test",
+            rules=[rule],
+        )
+    )
+    return idx
+
+
+def test_evaluator_dispatches_compensation_for_fired_guardrail():
+    """A matched guardrail_fallback rule must trigger request_governance."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule()))
+
+    called = threading.Event()
+    captured: dict[str, Any] = {}
+
+    def _spy(**kwargs: Any) -> None:
+        captured.update(kwargs)
+        called.set()
+
+    ctx = CheckContext(
+        hook=LifecycleHook.BEFORE_MODEL,
+        agent_name="agent-x",
+        runtime_id="run-1",
+        trace_id="trace-1",
+        model_input="contact jane@acme.com",
+    )
+
+    with patch(
+        "uipath.runtime.governance.native.evaluator.submit_compensation", _spy
+    ):
+        audit = evaluator.evaluate(ctx)
+
+        assert called.wait(timeout=1.0), (
+            "Expected request_governance to be called on a background thread"
+        )
+
+    assert audit.final_action == Action.AUDIT
+    assert audit.rules_matched == 1
+    assert captured["rules"] == [
+        {
+            "ruleId": "UIP-GR-01",
+            "ruleName": "PII guardrail (UiPath-mapped, disabled)",
+            "packName": "test_pack",
+            "validator": "pii_detection",
+        }
+    ]
+    assert captured["data"] == {"content": "contact jane@acme.com"}
+    assert captured["hook"] == "before_model"
+    assert captured["trace_id"] == "trace-1"
+    assert captured["agent_name"] == "agent-x"
+    assert captured["runtime_id"] == "run-1"
+    assert isinstance(captured["src_timestamp"], str)
+    assert "T" in captured["src_timestamp"]
+
+
+def test_evaluator_does_not_dispatch_when_guardrail_is_enabled():
+    rule = _guardrail_fallback_rule()
+    rule.checks[0].conditions[0].value["policy_enabled"] = True  # type: ignore[index]
+
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(_build_index_with(rule))
+
+    called = threading.Event()
+
+    def _spy(**kwargs: Any) -> None:
+        called.set()
+
+    ctx = CheckContext(
+        hook=LifecycleHook.BEFORE_MODEL,
+        agent_name="agent-x",
+        runtime_id="run-1",
+        trace_id="trace-1",
+        model_input="hi",
+    )
+
+    with patch(
+        "uipath.runtime.governance.native.evaluator.submit_compensation", _spy
+    ):
+        audit = evaluator.evaluate(ctx)
+        time.sleep(0.05)
+
+    assert not called.is_set()
+    assert audit.rules_matched == 0
+
+
+def test_evaluator_does_not_dispatch_when_not_mapped_to_uipath():
+    rule = _guardrail_fallback_rule()
+    rule.checks[0].conditions[0].value["mapped_to_uipath"] = False  # type: ignore[index]
+    rule.checks[0].conditions[0].value["policy_enabled"] = False  # type: ignore[index]
+
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(_build_index_with(rule))
+
+    called = threading.Event()
+
+    def _spy(**kwargs: Any) -> None:
+        called.set()
+
+    ctx = CheckContext(
+        hook=LifecycleHook.BEFORE_MODEL,
+        agent_name="agent-x",
+        runtime_id="run-1",
+        trace_id="trace-1",
+        model_input="hi",
+    )
+
+    with patch(
+        "uipath.runtime.governance.native.evaluator.submit_compensation", _spy
+    ):
+        evaluator.evaluate(ctx)
+        time.sleep(0.05)
+
+    assert not called.is_set()
+
+
+def test_evaluator_compensation_dispatch_swallows_thread_errors():
+    """If request_governance raises, the background thread must absorb it."""
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule()))
+
+    def _raising_spy(**kwargs: Any) -> None:
+        raise RuntimeError("network down")
+
+    ctx = CheckContext(
+        hook=LifecycleHook.BEFORE_MODEL,
+        agent_name="agent-x",
+        runtime_id="run-1",
+        trace_id="trace-1",
+        model_input="hi",
+    )
+
+    with patch(
+        "uipath.runtime.governance.native.evaluator.submit_compensation",
+        _raising_spy,
+    ):
+        audit = evaluator.evaluate(ctx)
+        time.sleep(0.05)
+
+    assert audit.final_action == Action.AUDIT
+    assert audit.rules_matched == 1
+
+
+def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule():
+    """Python must not emit a per-rule audit trace for ``guardrail_fallback``.
+
+    The governance-server emits the trace in response to the
+    ``/runtime/govern`` POST; emitting one here too would produce a
+    duplicate. The rule still appears in the AuditRecord (so
+    ``disabled_guardrails`` can find it) and the compensation thread
+    still fires — only the per-rule ``rule_evaluation`` event is
+    suppressed, and the hook summary's counts exclude it.
+    """
+    from uipath.runtime.governance.audit import (
+        AuditEvent,
+        AuditSink,
+        EventType,
+        get_audit_manager,
+        reset_audit_manager,
+    )
+
+    class _CapturingSink(AuditSink):
+        def __init__(self) -> None:
+            self.events: list[AuditEvent] = []
+
+        @property
+        def name(self) -> str:
+            return "capturing"
+
+        def emit(self, event: AuditEvent) -> None:
+            self.events.append(event)
+
+    reset_audit_manager()
+    try:
+        manager = get_audit_manager()
+        for existing in list(manager.list_sinks()):
+            manager.unregister_sink(existing)
+        sink = _CapturingSink()
+        manager.register_sink(sink)
+        manager._async_mode = False  # synchronous emission for assertions
+
+        set_enforcement_mode(EnforcementMode.AUDIT)
+        evaluator = GovernanceEvaluator(
+            _build_index_with(_guardrail_fallback_rule())
+        )
+
+        ctx = CheckContext(
+            hook=LifecycleHook.BEFORE_MODEL,
+            agent_name="agent-x",
+            runtime_id="run-1",
+            trace_id="trace-1",
+            model_input="hi",
+        )
+
+        # Stub the network call so it doesn't actually post; we're
+        # asserting on the Python-emitted trace events, not on whether
+        # /runtime/govern was reached.
+        with patch(
+            "uipath.runtime.governance.native.evaluator.submit_compensation",
+            lambda **kwargs: None,
+        ):
+            audit = evaluator.evaluate(ctx)
+            time.sleep(0.05)  # let the daemon thread land
+
+        # The rule still matched and is in the audit record …
+        assert audit.rules_matched == 1
+        assert any(
+            ev.matched and ev.rule_id == "UIP-GR-01" for ev in audit.evaluations
+        )
+
+        # … but NO rule_evaluation event for it was emitted by Python.
+        rule_events = [
+            e for e in sink.events if e.event_type == EventType.RULE_EVALUATION
+        ]
+        assert not any(
+            e.data.get("rule_id") == "UIP-GR-01" for e in rule_events
+        ), "guardrail_fallback rule must not emit a Python-side audit trace"
+
+        # The hook summary's counts must also exclude the fallback rule
+        # (so total_rules / matched_rules match what was actually emitted).
+        summaries = [
+            e for e in sink.events if e.event_type == EventType.HOOK_END
+        ]
+        assert len(summaries) == 1
+        assert summaries[0].data["total_rules"] == 0
+        assert summaries[0].data["matched_rules"] == 0
+    finally:
+        reset_audit_manager()
+
+
+# ---------------------------------------------------------------------------
+# _resolve_trace_id — must capture the live trace on the caller thread
+# (the /govern call later runs on a worker thread with no OTel context).
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_trace_id_prefers_env_over_active_span(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """UIPATH_TRACE_ID wins over a live span — this is the binding fix.
+
+    The native audit spans are exported under UIPATH_TRACE_ID (the platform
+    rebinds spans to the agent's run trace), so the server-written
+    compensation records must land on that same id, not the live OTel
+    span's id.
+    """
+    from opentelemetry.sdk.trace import TracerProvider
+
+    monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001")
+    tracer = TracerProvider().get_tracer("test")
+    with tracer.start_as_current_span("root"):
+        assert _resolve_trace_id("fallback-id") == "env-trace-0001"
+
+
+def test_resolve_trace_id_falls_back_to_active_span_when_env_unset(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """With UIPATH_TRACE_ID unset, the live span's trace id is used."""
+    from opentelemetry.sdk.trace import TracerProvider
+
+    monkeypatch.delenv("UIPATH_TRACE_ID", raising=False)
+    tracer = TracerProvider().get_tracer("test")
+    with tracer.start_as_current_span("root") as span:
+        expected = format(span.get_span_context().trace_id, "032x")
+        result = _resolve_trace_id("fallback-id")
+        assert result == expected
+        assert len(result) == 32  # dashless OTel hex, not a dashed uuid
+
+
+def test_resolve_trace_id_uses_fallback_without_context(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """With no active span and no UIPATH_TRACE_ID env, fallback wins."""
+    # Outside any active span and with the env trace id unset, neither
+    # source can supply an id, so the fallback must be returned.
+    monkeypatch.delenv("UIPATH_TRACE_ID", raising=False)
+    assert _resolve_trace_id("fallback-id") == "fallback-id"
+
+
+def test_submit_compensation_captures_live_trace_before_thread_hop():
+    """End-to-end thread-boundary proof for the binding fix.
+
+    ``submit_compensation`` runs on the caller (hook) thread, then hands the
+    ``/govern`` call to a background worker pool. This test asserts BOTH
+    halves of why the resolve must happen at the entry:
+
+    1. On the **worker thread**, the OTel context is gone — resolving there
+       would miss the live span (so the early capture is mandatory).
+    2. Despite that, ``request_governance`` (on the worker) receives the
+       **live span's** trace id, not the stale fallback we passed in —
+       proving it was captured on the caller thread before the hop.
+    """
+    from opentelemetry.sdk.trace import TracerProvider
+
+    tracer = TracerProvider().get_tracer("test")
+
+    done = threading.Event()
+    captured: dict[str, Any] = {}
+
+    def _spy(**kwargs: Any) -> None:
+        # This runs on the background worker thread.
+        captured["trace_id"] = kwargs["trace_id"]
+        # Prove the worker has NO live context: if we resolved *here*, the
+        # sentinel would survive untouched.
+        captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS")
+        done.set()
+
+    with patch.object(guardrail_compensation, "request_governance", _spy):
+        with tracer.start_as_current_span("agent-run") as span:
+            expected = format(span.get_span_context().trace_id, "032x")
+            guardrail_compensation.submit_compensation(
+                rules=_rules("pii_detection"),
+                data={"content": "contact jane@acme.com"},
+                hook="before_model",
+                trace_id="stale-fallback",  # must be overridden by the live trace
+                src_timestamp="2026-06-06T00:00:00Z",
+                agent_name="agent",
+                runtime_id="rt",
+            )
+        assert done.wait(timeout=2.0), "compensation worker never ran"
+
+    # (1) worker thread could not see the span — fell back to the sentinel
+    assert captured["worker_resolves_to"] == "WORKER-MISS"
+    # (2) but the value it received is the live span trace, captured pre-hop
+    assert captured["trace_id"] == expected
+    assert captured["trace_id"] != "stale-fallback"
diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py
new file mode 100644
index 0000000..e163932
--- /dev/null
+++ b/tests/test_text_extraction.py
@@ -0,0 +1,307 @@
+"""Tests for ``_extract_governable_text`` content extraction.
+
+Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent``
+and ``_check_after_agent``. Pulls clean text out of structured shapes
+(dicts, list-of-blocks, pydantic models) instead of letting dict-repr
+noise leak into the regex-scanned blob.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import pytest
+
+# The wrapper lands in a later slice of the governance stack; skip (don't
+# error at collection) when it isn't present yet.
+_wrapper = pytest.importorskip(
+    "uipath.runtime.governance.wrapper",
+    reason="governance wrapper not yet present in this slice",
+)
+_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP
+_extract_governable_text = _wrapper._extract_governable_text
+
+
+def test_plain_string_passes_through() -> None:
+    assert _extract_governable_text("hello world") == "hello world"
+
+
+def test_none_returns_empty() -> None:
+    assert _extract_governable_text(None) == ""
+
+
+def test_dict_with_content_key_extracts_content_first() -> None:
+    """The classic coded-agent output shape — content comes through clean."""
+    out = _extract_governable_text(
+        {"content": "Estimated cost: $780", "_meta": {"id": "abc"}}
+    )
+    assert out.startswith("Estimated cost: $780")
+    # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``.
+    assert "{'content'" not in out
+    assert "'_meta'" not in out
+
+
+def test_dict_priority_keys_lead() -> None:
+    """``content`` / ``text`` / etc. lead before remaining keys."""
+    out = _extract_governable_text(
+        {"trailing_meta": "noise-meta", "content": "primary-text"}
+    )
+    assert out.index("primary-text") < out.index("noise-meta")
+
+
+def test_list_of_text_blocks_concatenates() -> None:
+    """Anthropic-style content blocks."""
+    out = _extract_governable_text(
+        [
+            {"type": "text", "text": "first part"},
+            {"type": "image", "source": {"data": "..."}},
+            {"type": "text", "text": "second part"},
+        ]
+    )
+    assert "first part" in out
+    assert "second part" in out
+
+
+def test_openai_function_call_shape_extracts_arguments() -> None:
+    """``arguments`` field on OpenAI-style function-call blocks."""
+    out = _extract_governable_text(
+        [
+            {
+                "type": "function_call",
+                "name": "end_execution",
+                "arguments": '{"content":"Cost: $1,200"}',
+                "id": "fc_abc",
+            }
+        ]
+    )
+    assert "Cost: $1,200" in out
+
+
+def test_numeric_scalars_are_skipped() -> None:
+    """Numbers / booleans aren't governance text — they shouldn't pad the blob."""
+    out = _extract_governable_text(
+        {"content": "hello", "count": 42, "ok": True, "rate": 3.14}
+    )
+    assert out == "hello"
+
+
+def test_pydantic_like_model_dump_is_walked() -> None:
+    """Anything with ``model_dump()`` is walked as its dict form."""
+
+    class Stub:
+        def model_dump(self) -> dict:
+            return {"content": "from pydantic"}
+
+    assert _extract_governable_text(Stub()) == "from pydantic"
+
+
+def test_dataclass_via_dict_method() -> None:
+    """Objects exposing a ``dict()`` callable also walk via that path."""
+
+    class Stub:
+        def dict(self) -> dict:
+            return {"content": "from dict"}
+
+    assert _extract_governable_text(Stub()) == "from dict"
+
+
+def test_plain_object_attribute_fallback() -> None:
+    """Public attributes on opaque objects feed the walker."""
+
+    @dataclass
+    class Result:
+        content: str
+        _private: str = "ignored"
+
+    out = _extract_governable_text(Result(content="visible"))
+    assert "visible" in out
+    assert "ignored" not in out
+
+
+def test_cycle_in_structure_does_not_recurse_forever() -> None:
+    a: dict = {"content": "outer"}
+    b: dict = {"loop": a}
+    a["loop"] = b
+    # Should return without recursing infinitely.
+    out = _extract_governable_text(a)
+    assert "outer" in out
+
+
+def test_text_is_capped_at_budget() -> None:
+    """Long content is truncated so a runaway payload can't dominate scans."""
+    big = "x" * (_GOVERNANCE_TEXT_CAP + 1000)
+    out = _extract_governable_text(big)
+    assert len(out) == _GOVERNANCE_TEXT_CAP
+
+
+def test_nested_dict_content_extracted() -> None:
+    """LangGraph-style state with messages nested under a key."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "hi"},
+                {"role": "assistant", "content": "Cost: $50"},
+            ]
+        }
+    )
+    assert "Cost: $50" in out
+
+
+def test_unknown_block_type_with_no_text_returns_empty() -> None:
+    """Image-only block with no text payload contributes nothing."""
+    out = _extract_governable_text(
+        [{"type": "image", "source": {"type": "base64", "data": "..."}}]
+    )
+    # Could be empty or contain just the base64 data — but should NOT
+    # contain Python dict syntax characters that the old path emitted.
+    assert "{'type'" not in out
+
+
+# ---------------------------------------------------------------------------
+# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat).
+# ---------------------------------------------------------------------------
+
+
+def test_budget_cap_is_64k() -> None:
+    """Documents the cap so a future drop won't go unnoticed."""
+    assert _GOVERNANCE_TEXT_CAP == 64000
+
+
+# ---------------------------------------------------------------------------
+# Reverse list iteration — latest entry gets the budget first.
+# ---------------------------------------------------------------------------
+
+
+def test_lists_are_walked_in_reverse() -> None:
+    """Latest list entry leads the extracted blob.
+
+    Critical for chat history: the new user message lives at the end of
+    the messages list and must be visible even when prior turns would
+    otherwise fill the budget first.
+    """
+    out = _extract_governable_text(
+        [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}]
+    )
+    assert out.index("latest") < out.index("middle") < out.index("earliest")
+
+
+def test_long_chat_history_keeps_latest_user_message() -> None:
+    """A long history must not push the latest message out of the budget.
+
+    Regression for the prior 8K-cap + forward-walk combination, which
+    silently dropped the latest user message once the conversation
+    grew past ~7,800 chars of prior content.
+    """
+    bulky_prior = "x" * 2000
+    messages = [{"role": "user", "content": bulky_prior}] * 40  # ~80K chars
+    messages.append({"role": "user", "content": "Cost: $1,200 — latest"})
+
+    out = _extract_governable_text({"messages": messages})
+    assert "Cost: $1,200 — latest" in out
+
+
+# ---------------------------------------------------------------------------
+# latest_only — BEFORE_AGENT in a conversational agent
+# ---------------------------------------------------------------------------
+
+
+def test_latest_only_extracts_just_the_last_list_item() -> None:
+    """``latest_only=True`` drops every list entry but the last one."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "old message"},
+                {"role": "assistant", "content": "old response"},
+                {"role": "user", "content": "Cost: $1,200"},
+            ]
+        },
+        latest_only=True,
+    )
+    assert "Cost: $1,200" in out
+    assert "old message" not in out
+    assert "old response" not in out
+
+
+def test_latest_only_resets_inside_chosen_item() -> None:
+    """Multi-block content inside the latest message is still walked fully.
+
+    ``latest_only`` reduces the OUTER list (chat history) to its last
+    entry, but multi-block content (text + tool_call + thinking)
+    inside that latest message must still be extracted in full —
+    otherwise we'd lose answer text that arrives in a non-final block.
+    """
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "old"},
+                {
+                    "role": "assistant",
+                    "content": [
+                        {"type": "text", "text": "part A"},
+                        {
+                            "type": "function_call",
+                            "arguments": '{"answer":"part B"}',
+                        },
+                    ],
+                },
+            ]
+        },
+        latest_only=True,
+    )
+    assert "part A" in out
+    assert "part B" in out
+    assert "old" not in out
+
+
+def test_latest_only_top_level_list() -> None:
+    """``latest_only`` applies when the input itself is a list."""
+    out = _extract_governable_text(
+        [
+            {"content": "history item 1"},
+            {"content": "history item 2"},
+            {"content": "latest input"},
+        ],
+        latest_only=True,
+    )
+    assert "latest input" in out
+    assert "history item 1" not in out
+    assert "history item 2" not in out
+
+
+def test_latest_only_default_false_still_walks_all() -> None:
+    """Default behavior unchanged — AFTER_AGENT etc. still see everything."""
+    out = _extract_governable_text(
+        {
+            "messages": [
+                {"role": "user", "content": "first"},
+                {"role": "user", "content": "second"},
+            ]
+        }
+    )
+    assert "first" in out
+    assert "second" in out
+
+
+def test_latest_only_empty_list_is_empty() -> None:
+    """Empty history → empty extraction."""
+    assert _extract_governable_text({"messages": []}, latest_only=True) == ""
+
+
+def test_messages_is_a_priority_content_key() -> None:
+    """``messages`` (plural) leads ahead of non-priority keys.
+
+    Without ``messages`` in the priority list, an input that also
+    carries siblings like ``thread_id`` / ``metadata`` could siphon
+    budget before the actual chat history is walked.
+    """
+    out = _extract_governable_text(
+        {
+            "thread_id": "abc-xyz",
+            "metadata": {"foo": "bar"},
+            "messages": [{"role": "user", "content": "primary content"}],
+        }
+    )
+    assert "primary content" in out
+    assert out.index("primary content") < (
+        out.find("abc-xyz") if "abc-xyz" in out else len(out)
+    )
diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py
new file mode 100644
index 0000000..9dfc676
--- /dev/null
+++ b/tests/test_traces_severity.py
@@ -0,0 +1,227 @@
+"""Tests for trace-span verbosity / status semantics.
+
+``TracesAuditSink`` emits an OpenTelemetry span for every governance
+hook end and every rule evaluation. The contract:
+
+- Matched non-allow rules carry a ``verbosityLevel`` span attribute
+  (UiPath Orchestrator log levels: 3=Warning, 4=Error). Platform default
+  is 2 (Information); we only emit this attribute when a violation
+  warrants Warning or Error. OTel ``StatusCode`` only has OK / ERROR /
+  UNSET, so verbosityLevel is the channel that distinguishes
+  "audit-mode advisory violation" from "actually blocked the agent".
+- ``verbosityLevel = 4`` (Error) and ``StatusCode.ERROR`` fire **only**
+  when the runtime actually blocked the agent — enforce mode AND the
+  rule's action is ``deny`` or ``escalate``.
+- ``verbosityLevel = 3`` (Warning) and ``Status.UNSET`` for advisory
+  violations — audit mode (any non-allow action), or audit-action rules
+  even in enforce mode. The agent didn't fail; surfacing Status.ERROR
+  would falsely paint a successful run as a failure.
+- Hook spans never set Status, regardless of enforcement mode or
+  final_action. They're summary containers; verbosityLevel belongs on
+  the individual rule span that fired.
+- ``allow`` actions and unmatched evaluations leave Status at UNSET and
+  do not emit a verbosityLevel attribute (platform default applies).
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from tests._helpers import reset_enforcement_mode
+from uipath.runtime.governance.audit.base import AuditEvent, EventType
+from uipath.runtime.governance.audit.traces import TracesAuditSink
+from uipath.runtime.governance.config import (
+    EnforcementMode,
+    set_enforcement_mode,
+)
+
+
+@pytest.fixture
+def captured_span(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
+    """Wire ``TracesAuditSink`` to a mock tracer and return the span mock."""
+    span = MagicMock(name="span")
+    tracer = MagicMock(name="tracer")
+    tracer.start_as_current_span.return_value.__enter__.return_value = span
+    tracer.start_as_current_span.return_value.__exit__.return_value = False
+    monkeypatch.setattr(TracesAuditSink, "_get_tracer", lambda self: tracer)
+    return span
+
+
+@pytest.fixture(autouse=True)
+def _reset_mode() -> None:
+    """Each test selects its own enforcement mode explicitly."""
+    reset_enforcement_mode()
+    yield
+    reset_enforcement_mode()
+
+
+def _hook_event(final_action: str, mode: str = "audit") -> AuditEvent:
+    return AuditEvent(
+        event_type=EventType.HOOK_END,
+        agent_name="agent",
+        hook="after_model",
+        data={
+            "total_rules": 1,
+            "matched_rules": 1 if final_action != "allow" else 0,
+            "final_action": final_action,
+            "enforcement_mode": mode,
+        },
+    )
+
+
+def _rule_event(matched: bool, action: str) -> AuditEvent:
+    return AuditEvent(
+        event_type=EventType.RULE_EVALUATION,
+        agent_name="agent",
+        hook="after_model",
+        data={
+            "rule_id": "A.10.4",
+            "rule_name": "commitment-language",
+            "pack_name": "iso42001",
+            "matched": matched,
+            "action": action,
+            "status": "MATCHED" if matched else "PASS",
+            "detail": "Customer-binding commitment detected.",
+        },
+    )
+
+
+def _span_attrs(span: MagicMock) -> dict[str, object]:
+    """Return a mapping of attribute name → value for set_attribute calls."""
+    attrs: dict[str, object] = {}
+    for call in span.set_attribute.call_args_list:
+        key, value = call.args
+        attrs[key] = value
+    return attrs
+
+
+# ---------------------------------------------------------------------------
+# Hook span — never marked ERROR
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "final_action,mode",
+    [
+        ("deny", "enforce"),
+        ("deny", "audit"),
+        ("audit", "audit"),
+        ("escalate", "audit"),
+        ("allow", "audit"),
+    ],
+)
+def test_hook_span_never_sets_error(
+    captured_span: MagicMock, final_action: str, mode: str
+) -> None:
+    """Hook spans are summary containers — they never carry an ERROR Status."""
+    sink = TracesAuditSink()
+    sink.emit(_hook_event(final_action=final_action, mode=mode))
+    assert not captured_span.set_status.called, (
+        f"Hook span should never set_status; called with "
+        f"final_action={final_action!r}, mode={mode!r}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Rule span — enforce-mode actually-blocking violations
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("action", ["deny", "escalate"])
+def test_enforce_mode_blocking_violation_is_error(
+    captured_span: MagicMock, action: str
+) -> None:
+    """Enforce mode + deny/escalate = real failure → verbosityLevel=4 + Status.ERROR."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action=action))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 4
+    assert "severity" not in attrs
+    assert "governance.severity" not in attrs
+
+    assert captured_span.set_status.called, (
+        f"Status.ERROR must fire for enforce-mode {action} violation"
+    )
+    (status_arg,) = captured_span.set_status.call_args.args
+    from opentelemetry.trace import Status, StatusCode
+
+    assert isinstance(status_arg, Status)
+    assert status_arg.status_code is StatusCode.ERROR
+    assert "commitment-language" in status_arg.description
+    assert action in status_arg.description
+
+
+# ---------------------------------------------------------------------------
+# Rule span — advisory violations (audit mode, or audit-action rules)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("action", ["deny", "audit", "escalate"])
+def test_audit_mode_violation_is_warning(
+    captured_span: MagicMock, action: str
+) -> None:
+    """Audit mode never blocks → verbosityLevel=3, Status.UNSET.
+
+    Surfacing Status.ERROR for an audit-mode violation would falsely
+    mark the agent's run as failed when the runtime intentionally
+    let it through.
+    """
+    set_enforcement_mode(EnforcementMode.AUDIT)
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action=action))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 3
+    assert "severity" not in attrs
+    assert "governance.severity" not in attrs
+
+    assert not captured_span.set_status.called, (
+        f"Audit-mode {action} violation must NOT set Status.ERROR"
+    )
+
+
+def test_enforce_mode_audit_action_is_warning(captured_span: MagicMock) -> None:
+    """Enforce mode + action=audit is still advisory → verbosityLevel=3.
+
+    An ``audit`` action means "log this match but don't block" even
+    when the policy is in enforce mode. The runtime doesn't block;
+    verbosity stays Warning.
+    """
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="audit"))
+
+    attrs = _span_attrs(captured_span)
+    assert attrs.get("verbosityLevel") == 3
+    assert not captured_span.set_status.called
+
+
+# ---------------------------------------------------------------------------
+# Rule span — no violation, no verbosityLevel attribute (platform default = 2)
+# ---------------------------------------------------------------------------
+
+
+def test_unmatched_rule_no_verbosity_no_error(captured_span: MagicMock) -> None:
+    """Unmatched evaluations are quiet: no verbosityLevel attr, no Status."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=False, action="deny"))
+
+    attrs = _span_attrs(captured_span)
+    assert "verbosityLevel" not in attrs
+    assert not captured_span.set_status.called
+
+
+def test_matched_allow_action_no_verbosity(captured_span: MagicMock) -> None:
+    """A rule whose action is 'allow' is an explicit non-violation."""
+    set_enforcement_mode(EnforcementMode.ENFORCE)
+    sink = TracesAuditSink()
+    sink.emit(_rule_event(matched=True, action="allow"))
+
+    attrs = _span_attrs(captured_span)
+    assert "verbosityLevel" not in attrs
+    assert not captured_span.set_status.called