diff --git a/src/uipath/runtime/governance/audit/__init__.py b/src/uipath/runtime/governance/audit/__init__.py new file mode 100644 index 0000000..6f7ecc5 --- /dev/null +++ b/src/uipath/runtime/governance/audit/__init__.py @@ -0,0 +1,70 @@ +"""Audit sink framework for governance events. + +This module provides a pluggable audit system that supports multiple +output destinations (sinks) for governance events. Events are emitted +to all registered sinks, allowing flexible audit trail configuration. + +Usage:: + + from uipath.runtime.governance.audit import get_audit_manager, AuditEvent + + # Get the global audit manager + manager = get_audit_manager() + + # Emit an event (goes to all registered sinks) + manager.emit(AuditEvent( + event_type="rule_evaluation", + trace_id="abc-123", + agent_name="my-agent", + data={"rule_id": "ASI-01", "matched": True}, + )) + + # Register a custom sink + manager.register_sink(MyCustomSink()) + +Built-in sinks: + +- :class:`TracesAuditSink` – OpenTelemetry spans for Orchestrator Traces UI +- :class:`ConsoleAuditSink` – stderr output for debugging + +Sink registration: + +- The ``traces`` sink (OpenTelemetry spans → Orchestrator audit UI) is + **platform-mandated** and always registered. It cannot be disabled by + a developer-side env var — governance is platform-owned. +- The ``console`` sink is a developer aid for local debugging and is + opt-in via env var. + +Environment variables (developer-facing, console only): + +- ``UIPATH_AUDIT_VERBOSE`` – verbose console output. +- ``UIPATH_GOVERNANCE_CONSOLE_LOG`` – enable the console sink. +""" + +from .base import ( + AuditEvent, + AuditManager, + AuditSink, + EventType, + get_audit_manager, + reset_audit_manager, +) +from .console import ConsoleAuditSink +from .factory import create_sink +from .traces import TracesAuditSink + +__all__ = [ + # Core classes + "AuditEvent", + "AuditManager", + "AuditSink", + "EventType", + # Global manager + "get_audit_manager", + "reset_audit_manager", + # Factory + "create_sink", + # Built-in sinks + "ConsoleAuditSink", + "TracesAuditSink", +] diff --git a/src/uipath/runtime/governance/audit/base.py b/src/uipath/runtime/governance/audit/base.py new file mode 100644 index 0000000..5fbdf0b --- /dev/null +++ b/src/uipath/runtime/governance/audit/base.py @@ -0,0 +1,730 @@ +"""Base classes and models for the audit sink framework. + +This module provides the core abstractions for the governance audit system: +- AuditEvent: The data model for audit events +- EventType: Constants for common event types +- AuditSink: Abstract base class for sink implementations +- AuditManager: Central hub for routing events to sinks + +The AuditManager uses a background thread to process events asynchronously, +avoiding blocking the main agent execution path during audit trace HTTP calls. +""" + +from __future__ import annotations + +import atexit +import json +import logging +import os +import queue +import threading +from abc import ABC, abstractmethod +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + pass + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Audit Event Model +# ============================================================================= + + +@dataclass +class AuditEvent: + """Generic audit event that can be sent to any sink. + + Attributes: + event_type: Type of event (e.g., "rule_evaluation", "hook_summary") + timestamp: When the event occurred (auto-set if not provided) + trace_id: Trace identifier for correlation + agent_name: Name of the agent being governed + hook: Lifecycle hook where event occurred (optional) + data: Event-specific data dictionary + metadata: Additional metadata for filtering/routing + """ + + event_type: str + trace_id: str = "" + agent_name: str = "unknown" + hook: str = "" + data: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for serialization.""" + result = asdict(self) + result["timestamp"] = self.timestamp.isoformat() + return result + + def to_json(self) -> str: + """Convert to JSON string.""" + return json.dumps(self.to_dict()) + + +class EventType: + """Constants for common event types.""" + + RULE_EVALUATION = "rule_evaluation" + HOOK_START = "hook_start" + HOOK_END = "hook_end" + SESSION_START = "session_start" + SESSION_END = "session_end" + POLICY_VIOLATION = "policy_violation" + POLICY_ALLOW = "policy_allow" + PACKS_LOADED = "packs_loaded" + + +# ============================================================================= +# Audit Sink Base Class +# ============================================================================= + + +class AuditSink(ABC): + """Abstract base class for audit output destinations. + + Subclass this to create custom audit sinks. Each sink receives + all audit events and decides how to handle them. + + Example: + class SlackAuditSink(AuditSink): + def __init__(self, webhook_url: str): + self.webhook_url = webhook_url + self._name = "slack" + + @property + def name(self) -> str: + return self._name + + def emit(self, event: AuditEvent) -> None: + if event.data.get("matched") and event.data.get("action") == "deny": + # Send to Slack on violations + requests.post(self.webhook_url, json=event.to_dict()) + + def flush(self) -> None: + pass + """ + + @property + @abstractmethod + def name(self) -> str: + """Unique name for this sink.""" + pass + + @abstractmethod + def emit(self, event: AuditEvent) -> None: + """Emit an audit event to this sink. + + Args: + event: The audit event to emit + + Note: + Implementations should handle errors gracefully and not + raise exceptions that would disrupt governance evaluation. + """ + pass + + def flush(self) -> None: + """Flush any buffered events. + + Override if sink buffers events before writing. + """ + return + + def close(self) -> None: + """Clean up resources. + + Override if sink holds resources that need cleanup. + """ + return + + def accepts(self, event: AuditEvent) -> bool: + """Check if this sink should receive the event. + + Override to filter events. Default accepts all events. + + Args: + event: The audit event to check + + Returns: + True if sink should receive event, False to skip + """ + return True + + +# ============================================================================= +# Audit Manager +# ============================================================================= + + +class AuditManager: + """Manages multiple audit sinks and routes events to them. + + The AuditManager is the central hub for audit events. It maintains + a list of registered sinks and broadcasts events to all of them. + + Thread Safety: + Events are queued and processed by a background thread, making + emit() non-blocking. This avoids blocking agent execution during + audit trace HTTP calls. + """ + + # Trip a sink after this many consecutive emit failures (circuit-breaker). + _SINK_FAILURE_THRESHOLD = 10 + # Bound the async queue so a stuck sink can't grow memory without limit. + # Matches the order of magnitude of a long-running agent's per-session + # audit volume; on overflow the oldest event is dropped (loss visible + # via stats.events_dropped). + _DEFAULT_QUEUE_MAXSIZE = 10_000 + + def __init__( + self, + async_mode: bool = True, + queue_maxsize: int = _DEFAULT_QUEUE_MAXSIZE, + ) -> None: + """Initialize the audit manager. + + Args: + async_mode: If True (default), events are processed in a background + thread. If False, events are processed synchronously. + queue_maxsize: Max queued events in async mode. On overflow the + oldest queued event is dropped to make room. + """ + self._sinks: list[AuditSink] = [] + # Single lock guards _sinks, _sink_failures, _tripped_sinks, + # _event_count, _error_count, _dropped_count — every counter and + # collection that the worker thread and emit-caller mutate. + self._sinks_lock = threading.Lock() + # Per-sink consecutive-failure counter, keyed by sink name. + self._sink_failures: dict[str, int] = {} + self._tripped_sinks: set[str] = set() + self._event_count = 0 + self._error_count = 0 + self._dropped_count = 0 + self._async_mode = async_mode + self._pid = os.getpid() + + # Background processing + self._queue: queue.Queue[AuditEvent | None] = queue.Queue(maxsize=queue_maxsize) + self._worker_thread: threading.Thread | None = None + self._shutdown = threading.Event() + + if self._async_mode: + self._start_worker() + + def _start_worker(self) -> None: + """Start the background worker thread.""" + if self._worker_thread is not None and self._worker_thread.is_alive(): + return + + self._shutdown.clear() + self._worker_thread = threading.Thread( + target=self._worker_loop, + name="governance-audit-worker", + daemon=True, + ) + self._worker_thread.start() + logger.debug("Background audit worker started") + + def _worker_loop(self) -> None: + """Background worker loop that processes queued events.""" + while not self._shutdown.is_set(): + # Wait for an event with a timeout so we can re-check shutdown. + try: + event = self._queue.get(timeout=0.5) + except queue.Empty: + continue + # Every successful get() must be paired with exactly one + # task_done() — including the shutdown sentinel and the case + # where _emit_sync raises — otherwise unfinished_tasks never + # drains and flush()/join() hangs. + try: + if event is None: + # Shutdown signal + break + self._emit_sync(event) + except Exception as e: + logger.warning("Audit worker error: %s", e) + finally: + self._queue.task_done() + + # Drain remaining events on shutdown + self._drain_queue() + + def _drain_queue(self) -> None: + """Process any remaining events in the queue.""" + while True: + try: + event = self._queue.get_nowait() + except queue.Empty: + break + # As in _worker_loop: pair every get() with one task_done(), + # even when _emit_sync raises, so shutdown accounting is sound. + try: + if event is not None: + self._emit_sync(event) + except Exception as e: + logger.warning("Audit drain error: %s", e) + finally: + self._queue.task_done() + + def _emit_sync(self, event: AuditEvent) -> None: + """Emit event synchronously to all sinks (called from worker thread).""" + with self._sinks_lock: + sinks = list(self._sinks) + tripped = set(self._tripped_sinks) + for sink in sinks: + if sink.name in tripped: + continue + try: + if sink.accepts(event): + sink.emit(event) + # Success — reset failure counter for this sink. + with self._sinks_lock: + if self._sink_failures.get(sink.name): + self._sink_failures[sink.name] = 0 + except Exception as e: + with self._sinks_lock: + self._error_count += 1 + fails = self._sink_failures.get(sink.name, 0) + 1 + self._sink_failures[sink.name] = fails + tripped_now = fails >= self._SINK_FAILURE_THRESHOLD + if tripped_now: + self._tripped_sinks.add(sink.name) + if tripped_now: + logger.error( + "Audit sink '%s' tripped after %d consecutive failures; " + "will be skipped for the rest of this process. Last error: %s", + sink.name, + fails, + e, + ) + else: + logger.warning( + "Audit sink '%s' failed to emit event (%d/%d): %s", + sink.name, + fails, + self._SINK_FAILURE_THRESHOLD, + e, + ) + + def register_sink(self, sink: AuditSink) -> None: + """Register an audit sink. + + Args: + sink: The sink to register + + Note: + Duplicate sinks (same name) are ignored. + The circuit-breaker failure counter is cleared so a freshly + registered sink doesn't inherit a previous instance's tripped + state. ``unregister_sink`` already clears these, but the + defensive reset here guards against external manipulation + of the internal counters (tests, future callers). + """ + with self._sinks_lock: + if any(s.name == sink.name for s in self._sinks): + logger.debug("Sink '%s' already registered, skipping", sink.name) + return + self._sinks.append(sink) + self._sink_failures.pop(sink.name, None) + self._tripped_sinks.discard(sink.name) + logger.info("Registered audit sink: %s", sink.name) + + def unregister_sink(self, name: str) -> bool: + """Unregister an audit sink by name. + + Args: + name: Name of the sink to remove + + Returns: + True if sink was removed, False if not found + """ + sink_to_close: AuditSink | None = None + with self._sinks_lock: + for i, sink in enumerate(self._sinks): + if sink.name == name: + sink_to_close = sink + del self._sinks[i] + self._sink_failures.pop(name, None) + self._tripped_sinks.discard(name) + break + if sink_to_close is not None: + try: + sink_to_close.close() + except Exception as e: + logger.warning("Audit sink '%s' failed to close: %s", name, e) + logger.info("Unregistered audit sink: %s", name) + return True + return False + + def get_sink(self, name: str) -> AuditSink | None: + """Get a registered sink by name.""" + with self._sinks_lock: + for sink in self._sinks: + if sink.name == name: + return sink + return None + + def list_sinks(self) -> list[str]: + """Get names of all registered sinks.""" + with self._sinks_lock: + return [s.name for s in self._sinks] + + def emit(self, event: AuditEvent) -> None: + """Emit an audit event to all registered sinks. + + In async mode (default), this queues the event for background + processing and returns immediately. This avoids blocking the + main agent execution path during audit trace HTTP calls. + + On post-fork callers (worker process inheriting the parent's + manager), the queue is reinitialized and the worker thread + re-spawned before enqueue — otherwise events would silently + accumulate in a queue no one is draining. + + Args: + event: The audit event to emit + """ + self._ensure_alive_after_fork() + + with self._sinks_lock: + self._event_count += 1 + + if self._async_mode: + # Non-blocking enqueue with drop-oldest backpressure: if the + # worker is wedged on a slow sink, this keeps memory bounded + # rather than growing without limit. The dropped count is + # surfaced via ``stats``. + try: + self._queue.put_nowait(event) + except queue.Full: + try: + self._queue.get_nowait() + self._queue.task_done() + except queue.Empty: + pass + with self._sinks_lock: + self._dropped_count += 1 + try: + self._queue.put_nowait(event) + except queue.Full: + # Worker is so far behind that the queue refilled + # between get_nowait and put_nowait — give up on + # this event rather than block. + pass + else: + # Synchronous processing + self._emit_sync(event) + + def _ensure_alive_after_fork(self) -> None: + """Reset queue and respawn worker if we're in a forked child.""" + current_pid = os.getpid() + if current_pid == self._pid: + return + # Child process inherited a dead worker_thread reference and a + # queue the parent owned. Rebuild both so child events drain. + self._pid = current_pid + self._queue = queue.Queue(maxsize=self._queue.maxsize) + self._shutdown = threading.Event() + self._worker_thread = None + if self._async_mode: + self._start_worker() + + def emit_rule_evaluation( + self, + rule_id: str, + rule_name: str, + pack_name: str, + hook: str, + matched: bool, + action: str, + detail: str = "", + agent_name: str = "agent", + trace_id: str = "", + description: str = "", + ) -> None: + """Convenience method to emit a rule evaluation event.""" + self.emit( + AuditEvent( + event_type=EventType.RULE_EVALUATION, + trace_id=trace_id, + agent_name=agent_name, + hook=hook, + data={ + "rule_id": rule_id, + "rule_name": rule_name, + "pack_name": pack_name, + "matched": matched, + "action": action, + "detail": detail, + "description": description, + "status": "MATCHED" if matched else "PASS", + }, + ) + ) + + def emit_hook_summary( + self, + hook: str, + agent_name: str, + total_rules: int, + matched_rules: int, + final_action: str, + trace_id: str = "", + enforcement_mode: str = "audit", + ) -> None: + """Convenience method to emit a hook summary event.""" + self.emit( + AuditEvent( + event_type=EventType.HOOK_END, + trace_id=trace_id, + agent_name=agent_name, + hook=hook, + data={ + "total_rules": total_rules, + "matched_rules": matched_rules, + "final_action": final_action, + "enforcement_mode": enforcement_mode, + }, + ) + ) + + def emit_session_start( + self, + session_id: str, + agent_name: str, + packs: list[str], + enforcement_mode: str = "audit", + ) -> None: + """Convenience method to emit a session start event.""" + self.emit( + AuditEvent( + event_type=EventType.SESSION_START, + trace_id=session_id, + agent_name=agent_name, + data={ + "session_id": session_id, + "packs": packs, + "enforcement_mode": enforcement_mode, + }, + ) + ) + + def emit_session_end( + self, + session_id: str, + agent_name: str, + total_evaluations: int, + rules_matched: int, + rules_denied: int, + ) -> None: + """Convenience method to emit a session end event.""" + self.emit( + AuditEvent( + event_type=EventType.SESSION_END, + trace_id=session_id, + agent_name=agent_name, + data={ + "session_id": session_id, + "total_evaluations": total_evaluations, + "rules_matched": rules_matched, + "rules_denied": rules_denied, + }, + ) + ) + + def flush(self, timeout: float = 5.0) -> None: + """Flush all pending events and sinks. + + In async mode, polls the queue until it drains or ``timeout`` + seconds elapse, whichever comes first. ``queue.Queue.join`` has + no timeout argument — using it would block indefinitely on a + wedged sink, which defeats the bounded-shutdown contract that + :func:`_cleanup_audit_manager` relies on at process exit. + + Args: + timeout: Maximum seconds to wait for queue to drain (default 5.0) + """ + if self._async_mode: + import time + + deadline = time.monotonic() + max(0.0, timeout) + poll_interval = min(0.05, timeout) if timeout > 0 else 0.0 + while time.monotonic() < deadline: + try: + if self._queue.unfinished_tasks == 0: + break + except Exception: # noqa: BLE001 - queue introspection is best-effort + break + time.sleep(poll_interval) + else: + # Loop didn't break — drain timed out. Log so a wedged + # sink is surfaced rather than swallowed. + try: + pending = self._queue.unfinished_tasks + except Exception: # noqa: BLE001 + pending = -1 + if pending: + logger.warning( + "Audit queue did not drain within %.2fs " + "(unfinished tasks=%s); sink may be wedged", + timeout, pending, + ) + + with self._sinks_lock: + sinks = list(self._sinks) + for sink in sinks: + try: + sink.flush() + except Exception as e: + logger.warning("Audit sink '%s' failed to flush: %s", sink.name, e) + + def close(self) -> None: + """Close all sinks and release resources. + + Stops the background worker thread and drains any remaining events. + Shutdown is bounded: ``_shutdown`` is the primary signal the + worker polls; the sentinel ``None`` enqueue is best-effort. If + the queue is full and the worker is wedged on a slow sink, + ``put_nowait`` fails fast rather than hanging process exit. + """ + if self._async_mode and self._worker_thread is not None: + # Signal shutdown first so the worker's next queue.get() loop + # iteration exits even if we can't enqueue the sentinel. + self._shutdown.set() + try: + self._queue.put_nowait(None) # Wake up worker + except queue.Full: + # Queue saturated by a stuck sink; the worker will see + # _shutdown on its next loop iteration once whatever it's + # blocked on completes (or the 2s join timeout fires). + logger.debug( + "Audit queue full at shutdown; relying on _shutdown signal" + ) + + # Wait for worker to finish (with timeout) + if self._worker_thread.is_alive(): + self._worker_thread.join(timeout=2.0) + + logger.debug("Background audit worker stopped") + + with self._sinks_lock: + sinks = list(self._sinks) + self._sinks.clear() + self._sink_failures.clear() + self._tripped_sinks.clear() + for sink in sinks: + try: + sink.close() + except Exception as e: + logger.warning("Audit sink '%s' failed to close: %s", sink.name, e) + + @property + def stats(self) -> dict[str, Any]: + """Get audit statistics.""" + with self._sinks_lock: + sink_names = [s.name for s in self._sinks] + event_count = self._event_count + error_count = self._error_count + dropped_count = self._dropped_count + return { + "sinks": len(sink_names), + "sink_names": sink_names, + "events_emitted": event_count, + "events_queued": self._queue.qsize() if self._async_mode else 0, + "events_dropped": dropped_count, + "errors": error_count, + "async_mode": self._async_mode, + } + + +# ============================================================================= +# Global Audit Manager +# ============================================================================= + +_audit_manager: AuditManager | None = None +_atexit_registered = False + + +def _cleanup_audit_manager() -> None: + """Cleanup handler called at process exit.""" + global _audit_manager + if _audit_manager is not None: + try: + _audit_manager.flush(timeout=2.0) + _audit_manager.close() + except Exception: + pass + + +def get_audit_manager() -> AuditManager: + """Get or create the global audit manager. + + On first call, initializes sinks based on environment configuration. + The manager uses a background thread for async event processing. + + Returns: + The global AuditManager instance + """ + global _audit_manager, _atexit_registered + + if _audit_manager is None: + # Check if async mode should be disabled (for testing or debugging) + async_mode = os.getenv("UIPATH_AUDIT_SYNC", "false").lower() != "true" + _audit_manager = AuditManager(async_mode=async_mode) + _configure_default_sinks(_audit_manager) + + # Register cleanup handler + if not _atexit_registered: + atexit.register(_cleanup_audit_manager) + _atexit_registered = True + + return _audit_manager + + +def _configure_default_sinks(manager: AuditManager) -> None: + """Configure default sinks. + + The traces sink (OpenTelemetry spans to the Orchestrator audit UI) + is **platform-mandated** and is always registered — no developer-side + env var can disable it. This preserves the principle that governance + is platform-owned and developers cannot bypass the audit trail. + + The console sink is a developer aid for local debugging and is + opt-in via ``UIPATH_GOVERNANCE_CONSOLE_LOG=true``. + """ + from .factory import create_sink + + sink_names: list[str] = ["traces"] # mandatory — platform-controlled + + if os.getenv("UIPATH_GOVERNANCE_CONSOLE_LOG", "false").lower() == "true": + sink_names.append("console") + + for sink_name in sink_names: + sink = create_sink(sink_name) + if sink: + manager.register_sink(sink) + logger.info("Audit sink registered: %s", sink_name) + + logger.info("Governance audit sinks configured: %s", ", ".join(sink_names)) + + +def reset_audit_manager() -> None: + """Reset the global audit manager (for testing). + + Flushes pending events and stops the background worker before resetting. + """ + global _audit_manager + if _audit_manager: + try: + _audit_manager.flush(timeout=1.0) + except Exception: + pass + _audit_manager.close() + _audit_manager = None diff --git a/src/uipath/runtime/governance/audit/console.py b/src/uipath/runtime/governance/audit/console.py new file mode 100644 index 0000000..3d28a57 --- /dev/null +++ b/src/uipath/runtime/governance/audit/console.py @@ -0,0 +1,130 @@ +"""Console audit sink for human-readable output. + +This sink writes audit events to stderr in a human-readable format, +useful for debugging and development. +""" + +from __future__ import annotations + +import json +import sys + +from .base import AuditEvent, AuditSink, EventType + + +class ConsoleAuditSink(AuditSink): + """Audit sink that writes to console (stderr). + + Useful for debugging and development. Output is human-readable. + + Args: + verbose: If True, show all events. If False, only show matches. + """ + + def __init__(self, verbose: bool = False) -> None: + """Configure the sink's verbosity (verbose shows every event).""" + self._verbose = verbose + + @property + def name(self) -> str: + """Constant sink identifier.""" + return "console" + + def accepts(self, event: AuditEvent) -> bool: + """Filter to matched rules and lifecycle events unless verbose.""" + if self._verbose: + return True + # Only show matched rules and important events + if event.event_type == EventType.RULE_EVALUATION: + return event.data.get("matched", False) + return event.event_type in ( + EventType.SESSION_START, + EventType.SESSION_END, + EventType.HOOK_END, + EventType.POLICY_VIOLATION, + ) + + def emit(self, event: AuditEvent) -> None: + """Write the event to stderr using the appropriate formatter.""" + if event.event_type == EventType.RULE_EVALUATION: + self._emit_rule_evaluation(event) + elif event.event_type == EventType.HOOK_END: + self._emit_hook_summary(event) + elif event.event_type == EventType.SESSION_START: + self._emit_session_start(event) + elif event.event_type == EventType.SESSION_END: + self._emit_session_end(event) + else: + self._emit_generic(event) + + def _emit_rule_evaluation(self, event: AuditEvent) -> None: + data = event.data + matched = data.get("matched", False) + status = "MATCHED" if matched else "PASS" + rule_id = data.get("rule_id", "?") + rule_name = data.get("rule_name", "?") + action = data.get("action", "?").upper() + detail = data.get("detail", "") + + if matched: + print( + f"[GOVERNANCE] [{status}] {rule_id} | {rule_name} | " + f"action={action} | {detail}", + file=sys.stderr, + flush=True, + ) + elif self._verbose: + print( + f"[GOVERNANCE] [{status}] {rule_id} | {rule_name}", + file=sys.stderr, + flush=True, + ) + + def _emit_hook_summary(self, event: AuditEvent) -> None: + data = event.data + hook = event.hook + total = data.get("total_rules", 0) + matched = data.get("matched_rules", 0) + action = data.get("final_action", "allow").upper() + mode = data.get("enforcement_mode", "audit") + + if mode == "audit" and action == "DENY": + action = "AUDIT (would deny)" + + print( + f"[GOVERNANCE] HOOK: {hook} | rules={total} | matched={matched} | " + f"action={action}", + file=sys.stderr, + flush=True, + ) + + def _emit_session_start(self, event: AuditEvent) -> None: + data = event.data + packs = data.get("packs", []) + mode = data.get("enforcement_mode", "audit") + print( + f"[GOVERNANCE] Session started | agent={event.agent_name} | " + f"packs={','.join(packs)} | mode={mode}", + file=sys.stderr, + flush=True, + ) + + def _emit_session_end(self, event: AuditEvent) -> None: + data = event.data + total = data.get("total_evaluations", 0) + matched = data.get("rules_matched", 0) + denied = data.get("rules_denied", 0) + print( + f"[GOVERNANCE] Session ended | evaluations={total} | " + f"matched={matched} | denied={denied}", + file=sys.stderr, + flush=True, + ) + + def _emit_generic(self, event: AuditEvent) -> None: + print( + f"[GOVERNANCE] {event.event_type} | {event.agent_name} | " + f"{json.dumps(event.data)}", + file=sys.stderr, + flush=True, + ) diff --git a/src/uipath/runtime/governance/audit/factory.py b/src/uipath/runtime/governance/audit/factory.py new file mode 100644 index 0000000..1c8e248 --- /dev/null +++ b/src/uipath/runtime/governance/audit/factory.py @@ -0,0 +1,45 @@ +"""Factory function for creating audit sinks by name. + +This module provides the create_sink function used by the AuditManager +to instantiate sinks based on environment configuration. +""" + +from __future__ import annotations + +import logging +import os + +from .base import AuditSink + +logger = logging.getLogger(__name__) + + +def create_sink(name: str) -> AuditSink | None: + """Create an audit sink by name. + + Args: + name: Name of the sink to create (``traces`` or ``console``). + + Returns: + The created sink, or ``None`` if the name is unknown. + + Supported sinks: + - ``traces``: OpenTelemetry spans for Orchestrator Traces UI + - ``console``: human-readable stderr output + """ + name = name.lower() + + if name == "traces": + from .traces import TracesAuditSink + + return TracesAuditSink() + + elif name == "console": + from .console import ConsoleAuditSink + + verbose = os.getenv("UIPATH_AUDIT_VERBOSE", "false").lower() == "true" + return ConsoleAuditSink(verbose=verbose) + + else: + logger.warning("Unknown audit sink: %s", name) + return None diff --git a/src/uipath/runtime/governance/audit/traces.py b/src/uipath/runtime/governance/audit/traces.py new file mode 100644 index 0000000..81de1e4 --- /dev/null +++ b/src/uipath/runtime/governance/audit/traces.py @@ -0,0 +1,268 @@ +"""OpenTelemetry traces audit sink for Orchestrator integration. + +This sink creates OpenTelemetry spans for governance events, which +appear in the UiPath Orchestrator Traces UI for observability. +""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +from uipath.runtime.governance.native.backend_client import ( + ENV_FOLDER_KEY, + ENV_JOB_KEY, + ENV_ORGANIZATION_ID, + ENV_TENANT_ID, + ENV_TRACE_ID, +) + +from .base import AuditEvent, AuditSink, EventType + +logger = logging.getLogger(__name__) + +# Value for the ``type`` / ``span_type`` span attributes on every +# governance span. Matches ``SpanType.AGENT_RUN`` in uipath-agents-python +# — we use the string literal here (not a cross-package import) to keep +# uipath-runtime free of a uipath-agents dependency. If the agents-side +# registry adds new values, this constant is the single place to update. +SPAN_TYPE_AGENT_RUN = "agentRun" + +# Identifies this auditor on every governance span. Lets a downstream +# consumer distinguish traces emitted by the Python in-runtime governance +# checker from those produced by the governance-server (or any future +# language-specific governance SDK). Set as the ``source`` span +# attribute on every governance trace span. +GOVERNANCE_SOURCE = "governance-checker-python" + + +class TracesAuditSink(AuditSink): + """Audit sink that creates OpenTelemetry spans. + + Spans appear in UiPath Orchestrator Traces UI, providing structured + data for each governance evaluation. + """ + + def __init__(self) -> None: + """Initialize the sink with a deferred tracer and zero span count.""" + self._tracer: Any = None # Can be None, Tracer, or False + self._spans_created = 0 + + @property + def name(self) -> str: + """Constant sink identifier.""" + return "traces" + + def _get_tracer(self) -> Any: + """Get or create the OpenTelemetry tracer.""" + if self._tracer is None: + try: + from opentelemetry import trace + + self._tracer = trace.get_tracer("uipath.governance") + logger.info("OpenTelemetry tracer initialized for governance traces") + except ImportError: + # OpenTelemetry is supplied transitively by uipath-core; an + # ImportError here means the host install is broken or + # governance is running outside the UiPath SDK environment. + logger.warning( + "OpenTelemetry not available - governance traces disabled. " + "OTel is normally provided by uipath-core; reinstall the SDK." + ) + self._tracer = False + return self._tracer if self._tracer else None + + def _get_uipath_trace_id(self) -> str | None: + """Get the trace id from the environment.""" + return os.environ.get(ENV_TRACE_ID) + + def _get_uipath_context(self) -> dict[str, str]: + """Get UiPath context attributes from the environment.""" + context = {} + organization_id = os.environ.get(ENV_ORGANIZATION_ID) + if organization_id: + context["uipath.organization_id"] = organization_id + tenant_id = os.environ.get(ENV_TENANT_ID) + if tenant_id: + context["uipath.tenant_id"] = tenant_id + folder_key = os.environ.get(ENV_FOLDER_KEY) + if folder_key: + context["uipath.folder_key"] = folder_key + job_key = os.environ.get(ENV_JOB_KEY) + if job_key: + context["uipath.job_key"] = job_key + return context + + def emit(self, event: AuditEvent) -> None: + """Create a span for RULE_EVALUATION or HOOK_END events; drop others.""" + if event.event_type == EventType.RULE_EVALUATION: + self._emit_rule_span(event) + elif event.event_type == EventType.HOOK_END: + self._emit_hook_span(event) + + def _emit_hook_span(self, event: AuditEvent) -> None: + """Create a span for a hook summary (always emitted for each governance check).""" + tracer = self._get_tracer() + if tracer is None: + return + + try: + from opentelemetry import context + + data = event.data + hook = event.hook or "unknown" + span_name = f"governance.{hook.lower()}" + + # Use the current OTel context if one is active; otherwise start a + # root span. A previous version fabricated a random parent + # span_id when only a trace_id was known, which produced orphan + # parents the backend could never resolve. The governance span + # now correctly appears as a child of whichever span is current + # (e.g. the runtime's root span) or as a fresh root. + ctx = context.get_current() + uipath_trace_id = event.trace_id or self._get_uipath_trace_id() + + with tracer.start_as_current_span(span_name, context=ctx) as span: + # Required for Orchestrator Traces + span.set_attribute("type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN) + # Identifies which agent emitted this audit trace. Lets + # downstream consumers (Orchestrator Traces UI, audit + # dashboards) filter governance spans by producer when + # multiple SDKs / governance backends co-exist. + span.set_attribute("source", GOVERNANCE_SOURCE) + span.set_attribute("uipath.custom_instrumentation", True) + if uipath_trace_id: + span.set_attribute("uipath.trace_id", uipath_trace_id) + + # UiPath context + for key, value in self._get_uipath_context().items(): + span.set_attribute(key, value) + + # Hook summary attributes + span.set_attribute("governance.hook", hook) + span.set_attribute("governance.total_rules", data.get("total_rules", 0)) + span.set_attribute( + "governance.matched_rules", data.get("matched_rules", 0) + ) + span.set_attribute( + "governance.final_action", data.get("final_action", "allow") + ) + span.set_attribute( + "governance.enforcement_mode", data.get("enforcement_mode", "audit") + ) + span.set_attribute("governance.agent_name", event.agent_name) + + # Hook spans are summary containers — they're left at + # Status.UNSET regardless of final_action. Severity is + # carried by the per-rule spans (see _emit_rule_span); + # marking the hook span as ERROR would falsely paint + # the entire lifecycle phase as failed when only a + # specific rule fired underneath. + + self._spans_created += 1 + + except Exception as e: + logger.warning("Failed to create governance hook span: %s", e) + + def _emit_rule_span(self, event: AuditEvent) -> None: + """Create a span for a rule evaluation.""" + tracer = self._get_tracer() + if tracer is None: + return + + try: + from opentelemetry import context + + data = event.data + rule_id = data.get("rule_id", "unknown") + span_name = f"governance.rule.{rule_id}" + + # See note in _emit_hook_span: rely on the current OTel context + # rather than fabricating a remote-parent span_id. + ctx = context.get_current() + uipath_trace_id = event.trace_id or self._get_uipath_trace_id() + + with tracer.start_as_current_span(span_name, context=ctx) as span: + # Required for Orchestrator Traces + span.set_attribute("type", SPAN_TYPE_AGENT_RUN) + span.set_attribute("span_type", SPAN_TYPE_AGENT_RUN) + # Identifies which agent emitted this audit trace. Lets + # downstream consumers (Orchestrator Traces UI, audit + # dashboards) filter governance spans by producer when + # multiple SDKs / governance backends co-exist. + span.set_attribute("source", GOVERNANCE_SOURCE) + span.set_attribute("uipath.custom_instrumentation", True) + if uipath_trace_id: + span.set_attribute("uipath.trace_id", uipath_trace_id) + + # UiPath context + for key, value in self._get_uipath_context().items(): + span.set_attribute(key, value) + + # Governance attributes + span.set_attribute("governance.rule_id", rule_id) + span.set_attribute("governance.rule_name", data.get("rule_name", "")) + span.set_attribute("governance.pack_name", data.get("pack_name", "")) + span.set_attribute("governance.hook", event.hook) + span.set_attribute("governance.matched", data.get("matched", False)) + span.set_attribute("governance.action", data.get("action", "allow")) + span.set_attribute("governance.status", data.get("status", "PASS")) + span.set_attribute("governance.agent_name", event.agent_name) + + detail = data.get("detail", "") + if detail: + span.set_attribute("governance.detail", detail[:500]) + + # Severity for matched non-allow rules is carried by the + # platform-standard ``verbosityLevel`` span field (UiPath + # Orchestrator log levels: 3=Warning, 4=Error). Default + # platform verbosity is 2 (Information), so we only set + # this attribute when there's a violation worth flagging. + # + # - Audit mode (and any audit-action rule even in + # enforce mode): runtime did NOT block the agent → + # verbosityLevel=3 (Warning), Status stays UNSET. The + # agent's span shouldn't be marked failed just because + # an advisory rule fired. + # - Enforce mode + deny / escalate: runtime actually + # blocked → verbosityLevel=4 (Error) + Status.ERROR. + # The agent span genuinely failed. + action_str = data.get("action", "allow").lower() + if data.get("matched") and action_str != "allow": + from uipath.runtime.governance.config import ( + EnforcementMode, + get_enforcement_mode, + ) + + mode = get_enforcement_mode() + will_block = ( + mode == EnforcementMode.ENFORCE + and action_str in {"deny", "escalate"} + ) + span.set_attribute("verbosityLevel", 4 if will_block else 3) + if will_block: + try: + from opentelemetry.trace import Status, StatusCode + + span.set_status( + Status( + StatusCode.ERROR, + f"Policy violation: " + f"{data.get('rule_name', rule_id)} " + f"(action={action_str})", + ) + ) + except ImportError: + pass + + self._spans_created += 1 + + except Exception as e: + logger.warning("Failed to create governance span: %s", e) + + @property + def spans_created(self) -> int: + """Number of spans created.""" + return self._spans_created diff --git a/src/uipath/runtime/governance/delegation_guard.py b/src/uipath/runtime/governance/delegation_guard.py new file mode 100644 index 0000000..18a4aa5 --- /dev/null +++ b/src/uipath/runtime/governance/delegation_guard.py @@ -0,0 +1,263 @@ +"""Delegation depth guard. + +Patches an agent's ``invoke`` method to track recursion depth and raise +a ``GovernanceBlockException`` when the configured maximum is exceeded. +This prevents runaway sub-agent chains. +""" + +from __future__ import annotations + +import asyncio +import functools +import logging +import os +from contextvars import ContextVar, Token +from typing import Any + +from uipath.core.governance.exceptions import ( + GovernanceBlockException, + GovernanceViolation, +) + +logger = logging.getLogger(__name__) + +_DEFAULT_MAX_DELEGATION_DEPTH = 25 +_ENV_MAX_DELEGATION_DEPTH = "UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH" + +# Single module-level ContextVar holding per-agent delegation depths +# keyed by ``id(agent)``. Each install / uninstall pair shares this one +# ContextVar instead of allocating a new one per agent — the interpreter +# interns ContextVars and never GCs them, so per-agent allocation was an +# unbounded leak in long-running hosts (every `install_delegation_guard` +# call permanently grew the interpreter's ContextVar registry). +# +# Per-context isolation (asyncio task / thread) still works the standard +# ContextVar way: each context sees its own copy of the depths dict, and +# nested invokes use ``set`` / ``reset`` for LIFO depth tracking. The +# dict itself is copied on every increment (copy-on-write) so concurrent +# contexts don't share state through a mutable mapping. +_DELEGATION_DEPTHS: ContextVar[dict[int, int]] = ContextVar( + "_uipath_delegation_depths" +) + + +def _current_depth(agent_key: int) -> int: + """Return the current depth for ``agent_key`` in this context.""" + try: + return _DELEGATION_DEPTHS.get().get(agent_key, 0) + except LookupError: + return 0 + + +def _enter_depth_if_under( + agent_key: int, max_depth: int +) -> tuple[int, Token[dict[int, int]] | None]: + """Attempt to increment depth for ``agent_key``. + + Returns ``(new_depth, token)`` where ``token`` is ``None`` if the + new depth would exceed ``max_depth`` — caller raises and does not + need to clean up. On success, caller must reset via ``token``. + """ + try: + depths = _DELEGATION_DEPTHS.get() + except LookupError: + depths = {} + new_depth = depths.get(agent_key, 0) + 1 + if new_depth > max_depth: + return new_depth, None + new_depths = dict(depths) + new_depths[agent_key] = new_depth + token = _DELEGATION_DEPTHS.set(new_depths) + return new_depth, token + + +def _exit_depth(token: Token[dict[int, int]]) -> None: + """Undo a successful :func:`_enter_depth_if_under` call. + + Tolerates cross-context resets (token created in a different + context — happens when a child task awaits an agent invoke) by + accepting the leak rather than crashing the agent on dispose. + """ + try: + _DELEGATION_DEPTHS.reset(token) + except (ValueError, LookupError): + logger.debug("Delegation depth reset from foreign context") + + +def _resolve_max_depth() -> int: + """Read max-depth from env at install time, falling back to default on parse error. + + Called once from :func:`install_delegation_guard`; the resolved value is + captured per agent (``resolved_max``), so changing the env var after the + guard is installed has no effect on already-wrapped agents. + """ + raw = os.getenv(_ENV_MAX_DELEGATION_DEPTH) + if raw is None: + return _DEFAULT_MAX_DELEGATION_DEPTH + try: + return int(raw) + except ValueError: + logger.warning( + "Invalid %s=%r; using default %d", + _ENV_MAX_DELEGATION_DEPTH, + raw, + _DEFAULT_MAX_DELEGATION_DEPTH, + ) + return _DEFAULT_MAX_DELEGATION_DEPTH + + +def _build_violation(current: int, resolved_max: int) -> GovernanceBlockException: + """Build the depth-exceeded exception (shared by sync and async guards).""" + return GovernanceBlockException.from_violation( + GovernanceViolation( + rule_id="ASI-02", + rule_name="Excessive Agency", + detail=f"Delegation depth {current} exceeds max {resolved_max}", + ) + ) + + +def _wrap_invoke(original: Any, agent_key: int, resolved_max: int) -> Any: + """Return a depth-guarded wrapper matching the sync/async shape of ``original``. + + Coroutine functions get an ``async def`` wrapper so the returned object + is itself an awaitable — wrapping with a sync function would return an + un-awaited coroutine and silently bypass the guard entirely. + + Depth lives in the module-level :data:`_DELEGATION_DEPTHS` ContextVar + keyed by ``agent_key`` (``id(agent)``), so every guarded agent shares + the same ContextVar instance and the interpreter's ContextVar + registry doesn't grow with each install. + """ + if asyncio.iscoroutinefunction(original): + + @functools.wraps(original) + async def _guarded_async(input_data: Any, **kwargs: Any) -> Any: + current, token = _enter_depth_if_under(agent_key, resolved_max) + if token is None: + raise _build_violation(current, resolved_max) + try: + return await original(input_data, **kwargs) + finally: + _exit_depth(token) + + return _guarded_async + + @functools.wraps(original) + def _guarded_sync(input_data: Any, **kwargs: Any) -> Any: + current, token = _enter_depth_if_under(agent_key, resolved_max) + if token is None: + raise _build_violation(current, resolved_max) + try: + return original(input_data, **kwargs) + finally: + _exit_depth(token) + + return _guarded_sync + + +# Method names we guard on the agent. ``ainvoke`` is required because +# LangChain / LangGraph / LlamaIndex agents expose it as the primary +# async entrypoint; wrapping only ``invoke`` would let async callers +# bypass the depth check entirely. A single ContextVar is shared across +# both so an async call that internally falls through to sync ``invoke`` +# still increments the same counter. +_GUARDED_METHODS = ("invoke", "ainvoke") + + +def install_delegation_guard(agent: Any, max_depth: int | None = None) -> None: + """Patch the agent's invoke methods to enforce a maximum delegation depth. + + Patches both ``invoke`` and ``ainvoke`` when present; each wrapper + matches the sync/async shape of the original so awaitables stay + awaitable. No-op when neither attribute exists or the agent has + already been guarded. + + Per-call-chain depth is tracked in a single :class:`contextvars.ContextVar` + shared across both methods so an ``ainvoke`` that internally calls + ``invoke`` still increments the same counter. Concurrent invokes on + the same agent (across threads or asyncio tasks) keep separate + counters because ContextVar values are per-context. + + Originals are stashed on the agent under + ``_uipath_original_`` so :func:`uninstall_delegation_guard` + can restore them on dispose. + """ + if max_depth is None: + max_depth = _resolve_max_depth() + if getattr(agent, "_delegation_wrapped", False): + return + + originals = { + name: getattr(agent, name, None) + for name in _GUARDED_METHODS + if callable(getattr(agent, name, None)) + } + if not originals: + return + + agent_key = id(agent) + resolved_max = max_depth + + patched: list[str] = [] + for name, original in originals.items(): + try: + setattr(agent, name, _wrap_invoke(original, agent_key, resolved_max)) + setattr(agent, f"_uipath_original_{name}", original) + patched.append(name) + except (AttributeError, TypeError) as exc: + # Some agent objects expose `invoke` via __getattr__ or via a + # slot/descriptor that can't be re-assigned. Skip those — + # better to guard partial coverage than to crash the runtime. + logger.debug("Could not patch %s on agent: %s", name, exc) + + if not patched: + # Nothing was actually wrapped — don't mark the agent as guarded, + # or a later retry / uninstall would wrongly assume methods were + # patched. + logger.debug("Delegation guard patched no methods; leaving agent unguarded") + return + + agent._delegation_wrapped = True + logger.debug( + "Delegation guard installed (max=%d, methods=%s)", + resolved_max, + patched, + ) + + +def uninstall_delegation_guard(agent: Any) -> None: + """Restore the agent's invoke methods if a delegation guard was installed. + + Safe to call on agents that were never guarded. Also clears the + agent's entry from the current context's depth map — ``id(agent)`` + is reused by Python after GC, so a stale entry could mis-attribute + a future agent's count to this one. + """ + if not getattr(agent, "_delegation_wrapped", False): + return + for name in _GUARDED_METHODS: + attr = f"_uipath_original_{name}" + original = getattr(agent, attr, None) + if original is not None: + try: + setattr(agent, name, original) + except Exception as exc: # noqa: BLE001 - dispose path; never raise + logger.debug("Could not restore original %s: %s", name, exc) + try: + delattr(agent, attr) + except AttributeError: + pass + agent._delegation_wrapped = False + # Drop the agent's depth entry in the current context. Best-effort + # — if dispose runs from a different context than where the depth + # was set, the foreign context still owns its own copy and will + # discard it when it ends. + agent_key = id(agent) + try: + depths = _DELEGATION_DEPTHS.get() + except LookupError: + return + if agent_key in depths: + new_depths = {k: v for k, v in depths.items() if k != agent_key} + _DELEGATION_DEPTHS.set(new_depths) diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py new file mode 100644 index 0000000..c7671b6 --- /dev/null +++ b/src/uipath/runtime/governance/native/__init__.py @@ -0,0 +1,51 @@ +"""Native UiPath governance policy evaluator. + +YAML-defined rules evaluated in-process at each agent lifecycle hook. +Reads policies from the UiPath governance backend +(``GET /api/v1/policy``) at startup and runs the deterministic +detectors backing ISO 42001 controls. + +This subpackage owns: + +- :class:`GovernanceEvaluator` – the evaluator implementation. +- The native policy model: :class:`Rule`, :class:`Check`, + :class:`Condition`, :class:`PolicyIndex`. +- Policy fetch + YAML compilation plumbing. + +Shared output types (``Action``, ``AuditRecord``, …) live in +:mod:`uipath.core.governance`. +""" + +from .evaluator import GovernanceEvaluator +from .loader import ( + get_policy_index, + load_policy_index, + prefetch_policy_index, + reset_policy_index, +) +from .models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, + Severity, +) + +__all__ = [ + "GovernanceEvaluator", + # Loader + "get_policy_index", + "load_policy_index", + "prefetch_policy_index", + "reset_policy_index", + # Native policy model + "Check", + "CheckContext", + "Condition", + "PolicyIndex", + "PolicyPack", + "Rule", + "Severity", +] diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py new file mode 100644 index 0000000..80f8394 --- /dev/null +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -0,0 +1,1083 @@ +"""Governance rule evaluator.""" + +from __future__ import annotations + +import logging +import math +import re +from collections import Counter +from datetime import datetime, timezone +from functools import lru_cache +from typing import Any + +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import ( + Action, + AuditRecord, + LifecycleHook, + RuleEvaluation, +) + +from uipath.runtime.governance.audit import get_audit_manager +from uipath.runtime.governance.config import EnforcementMode, get_enforcement_mode +from uipath.runtime.governance.native.guardrail_compensation import ( + disabled_guardrails, + submit_compensation, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + Rule, +) + +logger = logging.getLogger(__name__) + + +def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]: + """Build the ``data`` payload for the /runtime/govern compensating call. + + The server runs the guardrail check against the same content the + evaluator was looking at — so we forward whichever + :class:`CheckContext` field is populated for the active hook. Fields + not relevant to the hook are omitted to keep the payload tight. + """ + if context.hook in (LifecycleHook.BEFORE_AGENT,): + return {"content": context.agent_input} + if context.hook in (LifecycleHook.AFTER_AGENT,): + return {"content": context.agent_output} + if context.hook in (LifecycleHook.BEFORE_MODEL,): + payload: dict[str, Any] = {"content": context.model_input} + if context.messages: + payload["messages"] = context.messages + return payload + if context.hook in (LifecycleHook.AFTER_MODEL,): + return {"content": context.model_output} + if context.hook in (LifecycleHook.TOOL_CALL,): + return {"tool_name": context.tool_name, "tool_args": context.tool_args} + if context.hook in (LifecycleHook.AFTER_TOOL,): + return {"tool_name": context.tool_name, "tool_result": context.tool_result} + # Memory-write and unknown hooks: pass an empty content so the + # server still receives a structurally-valid payload. + return {"content": ""} + + +@lru_cache(maxsize=256) +def _compile_regex(pattern: str) -> re.Pattern[str] | None: + """Compile and cache a regex pattern. + + Args: + pattern: The regex pattern string + + Returns: + Compiled pattern or None if invalid + """ + try: + return re.compile(pattern) + except re.error as e: + logger.warning("Invalid regex pattern '%s': %s", pattern, e) + return None + + +# --- vaderSentiment: lazy-imported singleton --- +# Hard dependency, but lazy-loaded to keep import-time cost off the +# critical path. The except branch is defence against a corrupted +# install (file present in METADATA but module unimportable) — the +# operator no-ops rather than crashing the agent. +_VADER_UNINITIALIZED = object() +_vader_analyzer: Any = _VADER_UNINITIALIZED + + +def _get_vader_analyzer() -> Any: + """Return a cached SentimentIntensityAnalyzer, or None if unavailable.""" + global _vader_analyzer + if _vader_analyzer is _VADER_UNINITIALIZED: + try: + from vaderSentiment.vaderSentiment import ( # type: ignore[import-untyped] + SentimentIntensityAnalyzer, + ) + + _vader_analyzer = SentimentIntensityAnalyzer() + except ImportError: + logger.error( + "vaderSentiment failed to import despite being a hard dependency; " + "sentiment_concern checks will not fire. Reinstall uipath-core." + ) + _vader_analyzer = None + return _vader_analyzer + + +# --- chardet: lazy-imported module for encoding integrity (A.7.4) --- +# Hard dependency, lazy-loaded for symmetry with the other library +# wrappers. The except branch covers corrupted installs only. +_CHARDET_UNINITIALIZED = object() +_chardet_module: Any = _CHARDET_UNINITIALIZED + + +def _get_chardet() -> Any: + """Return the chardet module, or None if unavailable.""" + global _chardet_module + if _chardet_module is _CHARDET_UNINITIALIZED: + try: + import chardet + + _chardet_module = chardet + except ImportError: + logger.error( + "chardet failed to import despite being a hard dependency; " + "encoding_concern confidence check will not fire (stdlib " + "signals still apply). Reinstall uipath-core." + ) + _chardet_module = None + return _chardet_module + + +# --- Static patterns for encoding_concern (A.7.4) --- +# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when +# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252. +_MOJIBAKE_BIGRAMS: tuple[str, ...] = ( + "é", + "è", + "â", + "à ", + "ù", + "î", + "ô", + "ç", # accented vowels + "Ä", + "Ö", + "Ü", + "ß", # German umlauts / eszett + "’", + "“", + "â€\x9d", + "–", + "—", + "•", # smart quotes / dashes + "£", + "°", + "§", + "¶", + "©", + "®", # NBSP-leading symbols + "ï¿", + "¿½", # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1) + "ï»", + "»¿", # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1) +) + +# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw +# bytes leaked through a string layer rather than being decoded. +_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}") + + +# --- Static patterns for incident_concern (A.8.4) --- +# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape +# (categorical types over stack/status), but for string payloads from +# model output / tool result rather than exception objects. +_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "safety_refusal": [ + re.compile( + r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)" + r"\s+(?:help|assist|provide|answer|do\s+that))\b" + ), + re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"), + re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"), + ], + "tool_failure": [ + re.compile( + r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)" + ), + re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"), + re.compile(r"(?i)\b(timed?\s*out|timeout)\b"), + ], + "auth_failure": [ + re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"), + re.compile( + r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b" + ), + ], + "quota_exceeded": [ + re.compile(r"\b(429)\b"), + re.compile( + r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b" + ), + ], + "hallucination": [ + re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"), + re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"), + ], +} + +# --- Static patterns for commitment_concern (A.10.4) --- +# Commitment-language signals. The verb pattern covers both first-person +# promise verbs ("we will refund") and formal-business commitment markers +# common in proposal / SOW outputs ("Cost: $X", "fixed scope", +# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and +# deadline signals combine via OR semantics — see +# :meth:`_check_commitment_concern`. +_COMMITMENT_VERB_PATTERN = re.compile( + r"(?i)(" + # First-person promise / liability verbs + r"\brefund\b|\breimburse\b|" + r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|" + r"\bsla\b|" + r"\bwaive[d]?\b|" + r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|" + r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|" + r"handover|hand\s+over|ship)\b|" + # Proposal / SOW commitment markers + r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|" + r"\bcost\s*:\s*\$?\d|" + r"\bquote\s*:\s*\$?\d|" + r"\bdeliverables?\b|" + r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|" + r"\bI\s+propose\b" + r")" +) +# Currency-anchored amount detection. Requires a currency marker adjacent +# to the number so URL fragments (e.g. ``/667851``) don't false-positive. +# Covers symbol-then-number ($780) and number-then-code (780 USD). +# +# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched +# here — they fire on benign status / progress text ("75% complete", +# "99.9% uptime") under OR semantics. Real percentage-bearing +# commitments ("we'll give you a 20% discount", "refund 100%") still +# fire via the verb pattern. +_COMMITMENT_AMOUNT_FALLBACK = re.compile( + r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?" + r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|" + r"dollars?|euros?|pounds?|yen|rupees?)\b" +) +_COMMITMENT_DEADLINE_PATTERN = re.compile( + r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b" + r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b" +) + + +class GovernanceEvaluator: + """Evaluates governance rules against check contexts. + + Supports two enforcement modes: + - AUDIT: Log all violations but never block (DENY becomes AUDIT in final action) + - ENFORCE: Actually block on DENY rules + + Default mode is AUDIT for safety. + """ + + def __init__( + self, + policy_index: PolicyIndex, + mode: EnforcementMode | None = None, + ) -> None: + """Initialize with a compiled policy index and optional mode override.""" + self._policy_index = policy_index + self._mode = mode + + @property + def policy_index(self) -> PolicyIndex: + """Return the compiled policy index this evaluator runs against.""" + return self._policy_index + + @property + def mode(self) -> EnforcementMode: + """Get the enforcement mode (uses config default if not set).""" + if self._mode is not None: + return self._mode + return get_enforcement_mode() + + @mode.setter + def mode(self, value: EnforcementMode) -> None: + """Set the enforcement mode.""" + self._mode = value + + def is_audit_mode(self) -> bool: + """Check if running in audit-only mode.""" + return self.mode == EnforcementMode.AUDIT + + def is_enforce_mode(self) -> bool: + """Check if running in enforce mode (will block on DENY).""" + return self.mode == EnforcementMode.ENFORCE + + def evaluate(self, context: CheckContext) -> AuditRecord: + """Evaluate rules registered for ``context.hook`` against the context. + + Only rules whose ``hook`` field matches the current lifecycle hook + are evaluated — a ``tool_call`` rule does not fire on + ``before_model``, and vice versa. This avoids running checks + against fields the context cannot provide and keeps the audit + stream scoped to the active phase. + + The final action depends on the enforcement mode: + - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted. + - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block) + - ENFORCE mode: DENY rules result in DENY action AND a + :class:`GovernanceBlockException` is raised. + + Audit events (per-rule + hook summary) are emitted via the + global :func:`get_audit_manager` so callers do not need to do + any emission themselves. + + Args: + context: The check context with hook and content + + Returns: + AuditRecord with all evaluations and final action. + + Raises: + GovernanceBlockException: In ENFORCE mode when a DENY rule matches. + """ + mode = self.mode + if mode == EnforcementMode.DISABLED: + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + trace_id=context.trace_id, + hook=context.hook, + evaluations=[], + final_action=Action.ALLOW, + metadata={**context.metadata, "enforcement_mode": mode.value}, + ) + + rules = self._policy_index.get_rules_for_hook(context.hook) + + evaluations: list[RuleEvaluation] = [] + raw_action = Action.ALLOW # The action before mode adjustment + deny_would_fire = False # Track if DENY would have fired + + for rule in rules: + if not rule.enabled: + continue + + evaluation = self._evaluate_rule(rule, context) + evaluations.append(evaluation) + + if evaluation.matched: + # Take the most restrictive action. Use evaluation.action + # (which already folds in per-check overrides), not + # rule.action, so check-level overrides are honored here too. + eval_action = evaluation.action + if eval_action == Action.DENY: + raw_action = Action.DENY + deny_would_fire = True + elif eval_action == Action.ESCALATE and raw_action != Action.DENY: + raw_action = Action.ESCALATE + elif eval_action == Action.AUDIT and raw_action == Action.ALLOW: + raw_action = Action.AUDIT + + # Apply enforcement mode + final_action = self._apply_enforcement_mode(raw_action) + + # Build metadata with mode info + record_metadata = dict(context.metadata) + record_metadata["enforcement_mode"] = mode.value + if deny_would_fire and self.is_audit_mode(): + record_metadata["audit_mode_would_deny"] = True + + audit = AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + trace_id=context.trace_id, + hook=context.hook, + evaluations=evaluations, + final_action=final_action, + metadata=record_metadata, + ) + + self._emit_audit(audit, mode) + + # For any guardrail mapped to UiPath but currently disabled, hand + # the disabled guardrails to the governance-server's + # /runtime/govern endpoint. The SERVER runs the guardrail check + # AND writes the trace (the payload carries traceId / src_timestamp + # / hook / agent so it can correlate) — the agent does NOT emit a + # trace itself, to avoid double-writing. Fire-and-forget on a + # daemon thread so a slow or unreachable endpoint never blocks + # the agent. + self._dispatch_compensation(audit, context) + + if final_action == Action.DENY: + raise GovernanceBlockException.from_audit_record(audit) + + return audit + + def _dispatch_compensation( + self, audit: AuditRecord, context: CheckContext + ) -> None: + """Schedule compensating governance for any matched fallback rules. + + Hands the call to the bounded background pool in + :func:`uipath.runtime.governance.native.guardrail_compensation.submit_compensation`. + That helper owns concurrency, queue caps, exception isolation, + and graceful process-exit cancellation — this method just + builds the payload, logs the summary, and submits. + """ + try: + disabled = disabled_guardrails(audit, self._policy_index) + if not disabled: + return + + validators = [rule["validator"] for rule in disabled] + + # Surface the disabled-guardrail fire-up: how many rules + # triggered the compensating call, and which validators + # they map to (e.g. pii_detection / prompt_injection / + # harmful_content). One line per dispatch so an operator + # can see the volume + breakdown at a glance. + logger.info( + "Compensating governance triggered: hook=%s, count=%d, validators=[%s]", + audit.hook.value, + len(disabled), + ", ".join(validators), + ) + + submit_compensation( + rules=disabled, + data=_compensation_data_for_hook(context), + hook=audit.hook.value, + trace_id=audit.trace_id, + src_timestamp=audit.timestamp.isoformat(), + agent_name=audit.agent_name, + runtime_id=audit.runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Failed to dispatch compensating governance call: %s", exc + ) + + def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: + """Emit per-rule and hook-summary events to the global audit manager. + + Failure-isolated: audit-sink errors must never break evaluation. + Sink-level circuit breaking is handled inside :class:`AuditManager`. + """ + try: + manager = get_audit_manager() + except Exception as exc: # pragma: no cover - defensive + logger.debug("Audit manager unavailable; skipping emission: %s", exc) + return + + hook_name = audit.hook.name + + # ``guardrail_fallback`` rules are server-traced: the agent POSTs + # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and + # the governance-server emits the audit event with the actual + # validator verdict. Emitting a Python-side ``rule_evaluation`` + # event here would produce a duplicate trace carrying no + # verdict, so filter these rules out of every event the Python + # evaluator emits (per-rule AND the hook summary's counts). + emittable = [ + ev for ev in audit.evaluations + if not self._is_guardrail_fallback_rule(ev.rule_id) + ] + + for evaluation in emittable: + manager.emit_rule_evaluation( + rule_id=evaluation.rule_id, + rule_name=evaluation.rule_name, + pack_name=evaluation.pack_name, + hook=hook_name, + matched=evaluation.matched, + action=evaluation.action.value if evaluation.matched else "allow", + detail=evaluation.detail, + agent_name=audit.agent_name, + trace_id=audit.trace_id, + description=evaluation.description, + ) + + manager.emit_hook_summary( + hook=hook_name, + agent_name=audit.agent_name, + total_rules=len(emittable), + matched_rules=sum(1 for ev in emittable if ev.matched), + final_action=audit.final_action.value, + trace_id=audit.trace_id, + enforcement_mode=mode.value, + ) + + def _is_guardrail_fallback_rule(self, rule_id: str) -> bool: + """Return True if the rule is a UiPath-compensating fallback rule. + + Such rules carry a ``guardrail_fallback`` condition; their audit + trace is emitted by the governance-server in response to the + ``/runtime/govern`` POST, so the Python evaluator must not emit + a duplicate trace for them. + """ + rule = self._policy_index.get_rule(rule_id) + if rule is None: + return False + for check in rule.checks: + for cond in check.conditions: + if cond.operator == "guardrail_fallback": + return True + return False + + def _apply_enforcement_mode(self, raw_action: Action) -> Action: + """Apply enforcement mode to the raw action. + + In AUDIT mode: + - DENY becomes AUDIT (log but don't block) + - ESCALATE becomes AUDIT (log but don't escalate) + - AUDIT stays AUDIT + - ALLOW stays ALLOW + + In ENFORCE mode: + - All actions pass through unchanged + """ + if self.mode == EnforcementMode.AUDIT: + if raw_action in (Action.DENY, Action.ESCALATE): + return Action.AUDIT + return raw_action + + def evaluate_before_agent( + self, + agent_input: str, + agent_name: str, + runtime_id: str, + trace_id: str, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + agent_input=agent_input, + model_name=model_name, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_agent( + self, + agent_output: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + agent_output=agent_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_before_model( + self, + model_input: str, + agent_name: str, + runtime_id: str, + trace_id: str, + messages: list[dict[str, Any]] | None = None, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + model_input=model_input, + model_name=model_name, + messages=messages or [], + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_model( + self, + model_output: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + model_output=model_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_tool_call( + self, + tool_name: str, + tool_args: dict[str, Any], + agent_name: str, + runtime_id: str, + trace_id: str, + session_state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate TOOL_CALL rules.""" + context = CheckContext( + hook=LifecycleHook.TOOL_CALL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + tool_name=tool_name, + tool_args=tool_args, + session_state=session_state or {}, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_tool( + self, + tool_name: str, + tool_result: str, + agent_name: str, + runtime_id: str, + trace_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_TOOL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_TOOL, + agent_name=agent_name, + runtime_id=runtime_id, + trace_id=trace_id, + tool_name=tool_name, + tool_result=tool_result, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: + """Evaluate a single rule against the context.""" + if not rule.checks: + # No checks = always matches (for audit-only rules) + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=True, + detail="Rule has no conditions (always matches)", + pack_name=rule.pack_name, + action=rule.action, + description=rule.description, + ) + + check_results: list[dict[str, Any]] = [] + any_check_matched = False + # Resolve the rule's action from the MATCHED checks so per-check + # `action` overrides take effect. ``Check.action`` defaults to the + # rule's action (see _yaml_to_index), so for rules without an + # override this equals ``rule.action`` exactly. Take the most + # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW), + # mirroring evaluate()'s cross-rule aggregation. + matched_action = Action.ALLOW + + for check in rule.checks: + matched, detail = self._evaluate_check(check, context) + check_results.append( + { + "matched": matched, + "detail": detail, + "action": check.action.value, + } + ) + if matched: + any_check_matched = True + if check.action == Action.DENY: + matched_action = Action.DENY + elif ( + check.action == Action.ESCALATE + and matched_action != Action.DENY + ): + matched_action = Action.ESCALATE + elif ( + check.action == Action.AUDIT + and matched_action == Action.ALLOW + ): + matched_action = Action.AUDIT + + # Surface the FIRST matched check's message; falls back to the + # first check's detail (empty string when none matched) for + # backward compatibility with rules that have a single check. + first_matched_detail = next( + (cr["detail"] for cr in check_results if cr["matched"]), + check_results[0]["detail"] if check_results else "", + ) + + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=any_check_matched, + detail=first_matched_detail, + pack_name=rule.pack_name, + action=matched_action if any_check_matched else Action.ALLOW, + description=rule.description, + check_results=check_results, + ) + + def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]: + """Evaluate a single check against the context.""" + if not check.conditions: + return True, "No conditions (always matches)" + + results = [] + for condition in check.conditions: + matched = self._evaluate_condition(condition, context) + results.append(matched) + + if check.logic == "any": + final_match = any(results) + else: # "all" is default + final_match = all(results) + + detail = check.message if final_match else "" + return final_match, detail + + def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool: + """Evaluate a single condition against the context.""" + field_value = self._get_field_value(condition.field, context) + result = self._apply_operator(condition.operator, field_value, condition.value) + + if condition.negate: + result = not result + + return result + + def _get_field_value(self, field: str, context: CheckContext) -> Any: + """Get a field value from the context.""" + parts = field.split(".") + + # Start with context + value: Any = context + + for part in parts: + if hasattr(value, part): + value = getattr(value, part) + elif isinstance(value, dict) and part in value: + value = value[part] + else: + return None + + return value + + def _apply_operator( + self, operator: str, field_value: Any, check_value: Any + ) -> bool: + """Apply an operator to compare field value against check value.""" + # Handle existence checks before the None check + if operator == "exists": + return field_value is not None + if operator == "not_exists": + return field_value is None + + # guardrail_fallback fires only when the guardrail is mapped to + # UiPath but its policy is disabled. Config travels in + # ``check_value``; the rule's ``field`` is unused (so + # ``field_value`` is ``None`` here, which is expected — we must + # special-case this before the generic ``None`` short-circuit + # below). + if operator == "guardrail_fallback": + cfg = check_value if isinstance(check_value, dict) else {} + return bool(cfg.get("mapped_to_uipath", False)) and not bool( + cfg.get("policy_enabled", True) + ) + + if field_value is None: + return False + + # Numeric operators don't need stringification — short-circuit + # before `str(field_value)` (expensive for dict / large payloads). + if operator in ("gt", "gte", "lt", "lte"): + try: + lhs = float(field_value) + rhs = float(check_value) + except (ValueError, TypeError): + return False + if operator == "gt": + return lhs > rhs + if operator == "gte": + return lhs >= rhs + if operator == "lt": + return lhs < rhs + return lhs <= rhs + + field_str = str(field_value) + + match operator: + case "equals" | "eq": + return field_str == str(check_value) + + case "not_equals" | "ne": + return field_str != str(check_value) + + case "contains": + return str(check_value).lower() in field_str.lower() + + case "not_contains": + return str(check_value).lower() not in field_str.lower() + + case "regex" | "matches": + compiled = _compile_regex(str(check_value)) + if compiled is None: + return False + return bool(compiled.search(field_str)) + + case "in_list": + if isinstance(check_value, list): + return field_str in check_value + return False + + case "not_in_list": + if isinstance(check_value, list): + return field_str not in check_value + return True + + case "vader_concern": + # VADER compound score <= threshold. + # check_value: dict like {"threshold": -0.3} (default -0.3) + return self._check_vader_concern(field_str, check_value) + + case "encoding_concern": + # chardet-backed encoding integrity check (A.7.4). + # check_value: dict with optional `min_confidence` (default 0.5) + # and `max_replacement_ratio` (default 0.05). + return self._check_encoding_concern(field_str, check_value) + + case "entropy_concern": + # Shannon entropy outside expected range (A.7.4). + # check_value: dict with optional `min` (default 1.5) and + # `max` (default 7.5) bits/byte. Stdlib only. + return self._check_entropy_concern(field_str, check_value) + + case "incident_concern": + # Categorical incident detection (A.8.4). + # check_value: dict with optional `categories` list + # (subset of safety_refusal/tool_failure/auth_failure/ + # quota_exceeded/hallucination). Default: all categories. + return self._check_incident_concern(field_str, check_value) + + case "commitment_concern": + # Customer commitment language detection (A.10.4). + # check_value: dict with optional `require_amount` (default + # True) and `require_deadline` (default False). Fires when + # a commitment verb co-occurs with the configured signals. + return self._check_commitment_concern(field_str, check_value) + + case _: + logger.debug("Unknown operator: %s", operator) + return False + + @staticmethod + def _check_vader_concern(text: str, params: Any) -> bool: + """Return True if VADER compound score on `text` is <= threshold. + + Args: + text: Text to analyse. + params: Either a dict with `threshold` key, or a numeric threshold + directly. Default threshold is -0.3 (clearly-negative). + + Returns: + True iff vaderSentiment is available AND compound score <= threshold. + Returns False on empty input or if the library is not installed — + sentiment checks no-op rather than crash. + """ + if not text or not text.strip(): + return False + + analyzer = _get_vader_analyzer() + if analyzer is None: + return False + + if isinstance(params, dict): + threshold = float(params.get("threshold", -0.3)) + else: + try: + threshold = float(params) + except (TypeError, ValueError): + threshold = -0.3 + + try: + compound = float(analyzer.polarity_scores(text)["compound"]) + except Exception as exc: # pragma: no cover - defensive + logger.debug("VADER analysis failed: %s", exc) + return False + + return compound <= threshold + + @staticmethod + def _check_encoding_concern(text: str, params: Any) -> bool: + r"""Return True if `text` shows encoding integrity issues. + + Sums multiple deterministic corruption signals against text length: + - U+FFFD replacement characters (already-decoded lossy text) + - Literal ``�`` escape sequences carried through a JSON + / repr layer rather than being decoded + - Literal ``\xHH`` hex escapes (raw bytes leaked into a string) + - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``é``, ``’``) + If the corruption ratio exceeds ``max_replacement_ratio`` the + check fires. chardet (when installed) is consulted as a + secondary low-confidence signal. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + min_confidence = float(params.get("min_confidence", 0.5)) + max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05)) + min_corruption_events = int(params.get("min_corruption_events", 2)) + + length = max(len(text), 1) + + replacement_chars = text.count("�") + literal_ufffd_escapes = text.count("\\ufffd") + hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text)) + mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS) + + # Absolute count of distinct corruption *events* (one per + # U+FFFD, one per literal escape sequence, one per mojibake + # bigram). Even diluted by a lot of clean text, a few of these + # in production output is a strong signal. + corruption_events = ( + replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams + ) + if corruption_events >= min_corruption_events: + return True + + # Ratio-based fallback for cases below the absolute floor: still + # catches very short payloads where a single corruption char is + # disproportionate. + # Weight each event by its source-char span so denser corruption + # in shorter text trips the ratio sooner: + # U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars, + # mojibake bigram = 2 chars. + corruption_chars = ( + replacement_chars + + 6 * literal_ufffd_escapes + + 4 * hex_escapes + + 2 * mojibake_bigrams + ) + if corruption_chars / length > max_replacement_ratio: + return True + + # Secondary: chardet on the encoded bytes. For pure str input + # this almost always reports high UTF-8/ASCII confidence (the + # branch is intentionally permissive), but it does catch bytes + # routed through `repr()` or `__str__` of a `bytes` object that + # chardet recognises as a non-UTF8 encoding with low confidence. + chardet = _get_chardet() + if chardet is None: + return False + try: + detection = chardet.detect(text.encode("utf-8", errors="replace")) + confidence = float(detection.get("confidence") or 0.0) + except Exception as exc: # pragma: no cover - defensive + logger.debug("chardet detection failed: %s", exc) + return False + + return confidence < min_confidence + + @staticmethod + def _check_entropy_concern(text: str, params: Any) -> bool: + """Return True if Shannon entropy of `text` is outside an expected range. + + Stdlib-only. Entropy is computed in bits per symbol over byte + frequencies. English prose typically lands ~3.5–4.5 bits/byte; + binary noise approaches 8 bits/byte; constant/repetitive text + approaches 0. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + lo = float(params.get("min", 1.5)) + hi = float(params.get("max", 7.5)) + + data = text.encode("utf-8", errors="replace") + total = len(data) + if total == 0: + return False + + counts = Counter(data) + entropy = 0.0 + for c in counts.values(): + p = c / total + entropy -= p * math.log2(p) + + return entropy < lo or entropy > hi + + @staticmethod + def _check_incident_concern(text: str, params: Any) -> bool: + """Return True if `text` matches any configured incident pattern (A.8.4). + + Categories: safety_refusal, tool_failure, auth_failure, + quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to + restrict; default scans all categories. + """ + if not text or not text.strip(): + return False + + if isinstance(params, dict): + requested = params.get("categories") + else: + requested = None + + if not requested: + categories = list(_INCIDENT_PATTERNS.keys()) + else: + categories = [c for c in requested if c in _INCIDENT_PATTERNS] + + for category in categories: + for pattern in _INCIDENT_PATTERNS[category]: + if pattern.search(text): + return True + return False + + @staticmethod + def _check_commitment_concern(text: str, params: Any) -> bool: + """Return True if `text` carries customer-commitment language (A.10.4). + + OR semantics: a commitment-verb match always fires; when + ``require_amount`` is true, a currency-anchored amount alone also + fires; when ``require_deadline`` is true, a deadline phrase alone + also fires. With both flags false the rule matches on verb only + (verb-only mode). + + The verb pattern covers first-person promise verbs *and* proposal + / SOW commitment markers ("Cost: $X", "fixed scope", + "Deliverables", "Timeline: N days", "I propose"). The amount + pattern requires a currency marker adjacent to the number so URL + fragments don't false-positive. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + require_amount = bool(params.get("require_amount", True)) + require_deadline = bool(params.get("require_deadline", False)) + + verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text)) + + # Verb-only mode: neither supporting signal is enabled. + if not require_amount and not require_deadline: + return verb_match + + amount_match = require_amount and bool( + _COMMITMENT_AMOUNT_FALLBACK.search(text) + ) + deadline_match = require_deadline and bool( + _COMMITMENT_DEADLINE_PATTERN.search(text) + ) + return verb_match or amount_match or deadline_match diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py new file mode 100644 index 0000000..fca63c6 --- /dev/null +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -0,0 +1,433 @@ +"""Compensating governance for disabled centralized guardrails. + +When a ``guardrail_fallback`` rule fires (the guardrail is mapped to +UiPath but the centralized policy is disabled), the framework asks the +governance-server to run the real guardrail check via its +``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint. + +This call is **fire-and-forget**: the server runs the guardrail AND +writes the audit trace from its side. The agent doesn't inspect the +response — it only cares about whether the call reached the server. + +The call also runs on a **bounded background pool** so even an agent +that fires hundreds of compensation events in a session can't pile up +threads or memory. :data:`COMPENSATION_MAX_WORKERS` workers process +the queue, and an in-flight semaphore drops submissions when the pool +is genuinely saturated — at that point the next call is logged and +skipped rather than queued indefinitely. + +URL composition, request headers, org/tenant resolution, and the +request timeout all come from +:mod:`uipath.runtime.governance.native.backend_client` so the policy +fetch and the compensating call share one definition of every +operator-tunable. +""" + +from __future__ import annotations + +import atexit +import json +import logging +import os +import threading +import urllib.error +import urllib.request +from concurrent.futures import ThreadPoolExecutor +from typing import Any, TypedDict + +from uipath.runtime.governance.native.backend_client import ( + BACKEND_REQUEST_TIMEOUT_SECONDS, + COMPENSATION_MAX_WORKERS, + ENV_ACCESS_TOKEN, + ENV_ORGANIZATION_ID, + ENV_TENANT_ID, + ENV_TRACE_ID, + GOVERN_API_PATH, + TENANT_HEADER, + build_governance_url, + governance_request_headers, + resolve_job_context, + resolve_organization_id, + resolve_tenant_id, +) + +logger = logging.getLogger(__name__) + + +# ---------------------------------------------------------------------------- +# Bounded thread pool — caps both concurrent threads AND queued work. +# +# ThreadPoolExecutor alone caps concurrent worker threads, but its internal +# queue is unbounded — a misbehaving agent that fires compensation faster than +# the server can absorb would queue indefinitely (memory pressure). The +# semaphore caps total in-flight submissions (running + queued) at a +# multiple of the worker count. Saturated submissions are dropped with a +# warning. Process exit cancels queued work and lets running tasks finish +# (bounded by their HTTP timeout) via the atexit handler. +# ---------------------------------------------------------------------------- + +_INFLIGHT_OVERSUBSCRIPTION = 4 # queue up to (workers × this many) before dropping +_INFLIGHT_CAP = COMPENSATION_MAX_WORKERS * _INFLIGHT_OVERSUBSCRIPTION + +_pool = ThreadPoolExecutor( + max_workers=COMPENSATION_MAX_WORKERS, + thread_name_prefix="governance-compensation", +) +_inflight = threading.BoundedSemaphore(_INFLIGHT_CAP) + + +@atexit.register +def _shutdown_pool() -> None: + """Cancel queued compensation tasks at process exit. + + ``wait=False`` returns immediately so process shutdown isn't held + up; ``cancel_futures=True`` (Python 3.9+) drops anything not yet + running. Tasks already running finish bounded by their HTTP + timeout (``BACKEND_REQUEST_TIMEOUT_SECONDS``). + """ + try: + _pool.shutdown(wait=False, cancel_futures=True) + except Exception: # noqa: BLE001 - shutdown must never raise from atexit + pass + + +# ---------------------------------------------------------------------------- +# Public API +# ---------------------------------------------------------------------------- + + +class FiredRule(TypedDict): + """Per-rule metadata carried in the /runtime/govern payload. + + One entry per matching ``guardrail_fallback`` condition (in practice + one per rule, since each fallback-rule typically declares a single + such condition). The server uses these to write per-rule LLMOps + trace records (Doc-2 audit structure). + """ + + ruleId: str + ruleName: str + packName: str + validator: str + + +def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: + """Return per-rule metadata for each fired guardrail-fallback rule. + + A guardrail rule fires only when it is mapped to UiPath + (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) — + see the ``guardrail_fallback`` operator. The validator name (e.g. + ``pii_detection``) is read from the rule's ``guardrail_fallback`` + check config and used as the ``type`` of the compensating call. + + One :class:`FiredRule` entry is emitted per matching + ``guardrail_fallback`` condition. Rules in this codebase declare a + single fallback condition each, so the returned list has one entry + per fired rule in practice; multi-condition rules would emit more + than one entry sharing the same ``ruleId``. + + Each entry carries the metadata the server needs to write one + per-rule LLMOps trace record:: + + { + "ruleId": "...", + "ruleName": "...", + "packName": "...", + "validator": "pii_detection", + } + """ + out: list[FiredRule] = [] + for ev in audit.evaluations: + if not ev.matched: + continue + rule = policy_index.get_rule(ev.rule_id) + if rule is None: + continue + for check in rule.checks: + for cond in check.conditions: + if cond.operator != "guardrail_fallback": + continue + if not isinstance(cond.value, dict): + continue + # The ``guardrail_fallback`` operator at evaluation time + # only matches when ``mapped_to_uipath=True`` AND + # ``policy_enabled=False``. We re-check here defensively + # so a future code path that bypasses the evaluator (or + # a multi-condition rule that fired on a sibling check) + # can't trigger a compensation call for a guardrail + # that isn't actually disabled. + if not bool(cond.value.get("mapped_to_uipath", False)): + continue + if bool(cond.value.get("policy_enabled", True)): + continue + validator = str(cond.value.get("validator", "")) + if validator: + out.append( + { + "ruleId": ev.rule_id, + "ruleName": ev.rule_name, + "packName": getattr(rule, "pack_name", "") or "", + "validator": validator, + } + ) + return out + + +def _validators(rules: list[FiredRule]) -> list[str]: + """Distinct validator names from the fired rules, preserving order.""" + return list(dict.fromkeys(r["validator"] for r in rules if r.get("validator"))) + + +def _resolve_trace_id(fallback: str) -> str: + """Resolve the agent's trace id while still on the caller thread. + + MUST be called before the background-pool hop in + :func:`submit_compensation`: the worker thread that issues the + ``/govern`` call has no OpenTelemetry context, so resolving there would + fall back to a detached id — orphaning the server-written compensation + records from the agent's real trace. + + Order: ``UIPATH_TRACE_ID`` env var -> live OTel span trace id + (32-char hex) -> the caller-supplied ``fallback``. + + ``UIPATH_TRACE_ID`` is preferred over the live OTel span because the + native governance audit spans are exported under that id (the platform + rebinds spans to the agent's run trace). The compensation records must + land on the *same* trace, so we use it first. The live OTel span is the + fallback for contexts where the env var isn't set; in conversational + runs the hook thread has no live span anyway, so the env var is what + keeps native + compensation on one trace. + """ + env_trace_id = os.environ.get(ENV_TRACE_ID) + if env_trace_id: + return env_trace_id + + try: + from opentelemetry import trace + + ctx = trace.get_current_span().get_span_context() + if ctx.is_valid: + return format(ctx.trace_id, "032x") + except Exception: # noqa: BLE001 - tracing is best-effort; fall through + pass + + return fallback + + +def submit_compensation( + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + trace_id: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, +) -> None: + """Schedule a /runtime/govern call on the bounded background pool. + + Fire-and-forget. Returns immediately; the call runs on a worker + thread bounded by :data:`COMPENSATION_MAX_WORKERS`. When the + in-flight queue is saturated (cap = workers × oversubscription), + the call is dropped with a warning and the agent continues. + + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; + the validators sent to the guardrail API are derived from it. + + Never raises — including when the pool has already been shut down + by process exit. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + # Resolve the trace id HERE, on the caller (hook) thread where the + # agent's OTel span is still live. The /govern call below runs on a + # background worker (_pool.submit -> _run -> request_governance) where + # that context is gone, so the resolved value is captured now and + # carried into the worker — ensuring the server writes compensation + # records under the agent's real trace, not a detached id. + trace_id = _resolve_trace_id(trace_id) + + if not _inflight.acquire(blocking=False): + logger.warning( + "Compensation pool saturated (>%d in flight); dropping call " + "(validators=[%s])", + _INFLIGHT_CAP, + ", ".join(validators), + ) + return + + def _run() -> None: + try: + request_governance( + rules=rules, + data=data, + hook=hook, + trace_id=trace_id, + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open by contract + logger.warning( + "Compensation worker failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + finally: + _inflight.release() + + try: + _pool.submit(_run) + except RuntimeError as exc: + # Pool was shut down (atexit or test teardown) — release the + # semaphore slot we took and log; never raise. + _inflight.release() + logger.warning( + "Compensation pool unavailable (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + + +def request_governance( + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + trace_id: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, +) -> None: + """Synchronous POST to the org-scoped ``/runtime/govern`` endpoint. + + Most callers should use :func:`submit_compensation` to run this on + the bounded background pool. ``request_governance`` is exposed + directly only for callers that already manage their own + concurrency (and for tests). + + POSTs:: + + { + "type": ["pii_detection", "harmful_content"], + "rules": [ + {"ruleId": "...", "ruleName": "...", + "packName": "...", "validator": "pii_detection"} + ], + "data": {...}, + "hook": "before_model", + "traceId": "...", + "src_timestamp": "...", + "agentName": "...", + "runtimeId": "...", + "folderKey": "...", "jobKey": "...", "processKey": "...", + "referenceId": "...", "agentVersion": "..." + } + + ``type`` (the distinct validators) drives the guardrail API call; + ``rules`` + the job-context fields let the server write one LLMOps + trace record per rule (Doc-2 audit structure). The job-context keys + are included only when resolvable from the environment. + + Skipped if the org or tenant id can't be resolved (no URL / no + header). The server runs the disabled guardrails AND writes the + audit trace itself — the agent does not consume or parse the + response body. The only thing this function reports back is + *whether the call landed*: + + - **Success** → ``INFO`` log ``Govern call has been made``. + - **Failure** → ``WARNING`` log; returns ``None``. + + Never raises. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + org_id = resolve_organization_id() + if not org_id: + logger.warning( + "Govern call skipped: organization id is not available " + "(set %s). validators=[%s]", + ENV_ORGANIZATION_ID, + ", ".join(validators), + ) + return + + tenant_id = resolve_tenant_id() + if not tenant_id: + logger.warning( + "Govern call skipped: tenant id is not available " + "(set %s). validators=[%s]", + ENV_TENANT_ID, + ", ".join(validators), + ) + return + + # Bearer token is required by the backend; sending without one + # produces a 401 per call and pollutes logs. Skip cleanly when the + # token isn't present (e.g. local dev, missing host bootstrap) + # rather than burning quota on guaranteed auth failures. + if not os.environ.get(ENV_ACCESS_TOKEN): + logger.warning( + "Govern call skipped: %s is not set in the environment; " + "compensation requires a bearer token. validators=[%s]", + ENV_ACCESS_TOKEN, + ", ".join(validators), + ) + return + + try: + payload = json.dumps( + { + "type": validators, + "rules": rules, + "data": data, + "hook": hook, + "traceId": trace_id, + "src_timestamp": src_timestamp, + "agentName": agent_name, + "runtimeId": runtime_id, + **resolve_job_context(), + }, + default=str, # coerce any non-JSON-native value safely + ).encode("utf-8") + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Govern call payload serialization failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + return + + url = build_governance_url(org_id, GOVERN_API_PATH) + headers = governance_request_headers(json_body=True) + headers[TENANT_HEADER] = tenant_id + + request = urllib.request.Request( + url, + data=payload, + headers=headers, + method="POST", + ) + try: + with urllib.request.urlopen( # noqa: S310 - URL is built from config + request, timeout=BACKEND_REQUEST_TIMEOUT_SECONDS + ) as response: + logger.info( + "Govern call has been made (status=%s, validators=[%s])", + getattr(response, "status", "?"), + ", ".join(validators), + ) + except Exception as exc: # noqa: BLE001 - fail-and-log + logger.warning( + "Govern call failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) diff --git a/tests/test_audit_console.py b/tests/test_audit_console.py new file mode 100644 index 0000000..8a8cd52 --- /dev/null +++ b/tests/test_audit_console.py @@ -0,0 +1,275 @@ +"""Tests for ``ConsoleAuditSink``. + +The console sink is a developer-aid that writes governance events to +stderr in a human-readable format. Filtering and per-event-type +formatting are the things worth pinning so a non-verbose run doesn't +spam unmatched evaluations. +""" + +from __future__ import annotations + +import pytest + +from uipath.runtime.governance.audit.base import AuditEvent, EventType +from uipath.runtime.governance.audit.console import ConsoleAuditSink + +# --------------------------------------------------------------------------- +# Basic surface +# --------------------------------------------------------------------------- + + +def test_sink_name_is_console() -> None: + assert ConsoleAuditSink().name == "console" + + +def test_default_is_non_verbose() -> None: + """Constructor default keeps the sink quiet (matches-only).""" + sink = ConsoleAuditSink() + unmatched = AuditEvent( + event_type=EventType.RULE_EVALUATION, + data={"matched": False, "rule_id": "A", "rule_name": "n"}, + ) + assert sink.accepts(unmatched) is False + + +# --------------------------------------------------------------------------- +# accepts() — filtering behavior +# --------------------------------------------------------------------------- + + +def test_accepts_verbose_passes_everything() -> None: + sink = ConsoleAuditSink(verbose=True) + assert sink.accepts(AuditEvent(event_type=EventType.RULE_EVALUATION)) is True + assert sink.accepts(AuditEvent(event_type=EventType.HOOK_END)) is True + assert sink.accepts(AuditEvent(event_type=EventType.PACKS_LOADED)) is True + + +def test_accepts_non_verbose_filters_unmatched_rule_eval() -> None: + sink = ConsoleAuditSink(verbose=False) + matched = AuditEvent( + event_type=EventType.RULE_EVALUATION, data={"matched": True} + ) + unmatched = AuditEvent( + event_type=EventType.RULE_EVALUATION, data={"matched": False} + ) + assert sink.accepts(matched) is True + assert sink.accepts(unmatched) is False + + +@pytest.mark.parametrize( + "event_type", + [ + EventType.SESSION_START, + EventType.SESSION_END, + EventType.HOOK_END, + EventType.POLICY_VIOLATION, + ], +) +def test_accepts_non_verbose_passes_lifecycle_events(event_type: str) -> None: + """Lifecycle events flow through even when verbose is off.""" + sink = ConsoleAuditSink(verbose=False) + assert sink.accepts(AuditEvent(event_type=event_type)) is True + + +def test_accepts_non_verbose_drops_other_event_types() -> None: + sink = ConsoleAuditSink(verbose=False) + # PACKS_LOADED isn't in the lifecycle allowlist for non-verbose. + assert sink.accepts(AuditEvent(event_type=EventType.PACKS_LOADED)) is False + + +# --------------------------------------------------------------------------- +# _emit_rule_evaluation +# --------------------------------------------------------------------------- + + +def test_emit_matched_rule_writes_full_line(capsys: pytest.CaptureFixture[str]) -> None: + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.RULE_EVALUATION, + data={ + "matched": True, + "rule_id": "A.10.4", + "rule_name": "commitment-language", + "action": "audit", + "detail": "Customer commitment detected.", + }, + ) + ) + out = capsys.readouterr().err + assert "MATCHED" in out + assert "A.10.4" in out + assert "commitment-language" in out + assert "action=AUDIT" in out + assert "Customer commitment detected." in out + + +def test_emit_unmatched_rule_silent_when_non_verbose( + capsys: pytest.CaptureFixture[str], +) -> None: + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.RULE_EVALUATION, + data={"matched": False, "rule_id": "A", "rule_name": "n"}, + ) + ) + assert capsys.readouterr().err == "" + + +def test_emit_unmatched_rule_prints_pass_when_verbose( + capsys: pytest.CaptureFixture[str], +) -> None: + sink = ConsoleAuditSink(verbose=True) + sink.emit( + AuditEvent( + event_type=EventType.RULE_EVALUATION, + data={"matched": False, "rule_id": "A.1", "rule_name": "rule-one"}, + ) + ) + out = capsys.readouterr().err + assert "PASS" in out + assert "A.1" in out + assert "rule-one" in out + + +# --------------------------------------------------------------------------- +# _emit_hook_summary +# --------------------------------------------------------------------------- + + +def test_emit_hook_summary_basic(capsys: pytest.CaptureFixture[str]) -> None: + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.HOOK_END, + hook="after_model", + data={ + "total_rules": 5, + "matched_rules": 1, + "final_action": "allow", + "enforcement_mode": "audit", + }, + ) + ) + out = capsys.readouterr().err + assert "HOOK: after_model" in out + assert "rules=5" in out + assert "matched=1" in out + assert "action=ALLOW" in out + + +def test_emit_hook_summary_audit_mode_would_deny_marker( + capsys: pytest.CaptureFixture[str], +) -> None: + """In AUDIT mode a DENY action is annotated as 'would deny'. + + Without this, operators reading the console would think a deny + actually fired when the runtime only audited it. + """ + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.HOOK_END, + hook="before_model", + data={ + "total_rules": 1, + "matched_rules": 1, + "final_action": "deny", + "enforcement_mode": "audit", + }, + ) + ) + out = capsys.readouterr().err + assert "AUDIT (would deny)" in out + + +def test_emit_hook_summary_enforce_mode_deny_not_annotated( + capsys: pytest.CaptureFixture[str], +) -> None: + """In ENFORCE mode the 'would deny' annotation is NOT applied.""" + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.HOOK_END, + hook="before_model", + data={ + "total_rules": 1, + "matched_rules": 1, + "final_action": "deny", + "enforcement_mode": "enforce", + }, + ) + ) + out = capsys.readouterr().err + assert "would deny" not in out + assert "action=DENY" in out + + +# --------------------------------------------------------------------------- +# Session start / end +# --------------------------------------------------------------------------- + + +def test_emit_session_start_includes_packs_and_mode( + capsys: pytest.CaptureFixture[str], +) -> None: + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.SESSION_START, + agent_name="my-agent", + data={"packs": ["iso42001", "owasp"], "enforcement_mode": "audit"}, + ) + ) + out = capsys.readouterr().err + assert "Session started" in out + assert "agent=my-agent" in out + assert "iso42001,owasp" in out + assert "mode=audit" in out + + +def test_emit_session_end_counters(capsys: pytest.CaptureFixture[str]) -> None: + sink = ConsoleAuditSink(verbose=False) + sink.emit( + AuditEvent( + event_type=EventType.SESSION_END, + trace_id="trace-abc", + data={ + "total_evaluations": 12, + "rules_matched": 3, + "rules_denied": 1, + }, + ) + ) + out = capsys.readouterr().err + assert "Session ended" in out + assert "evaluations=12" in out + assert "matched=3" in out + assert "denied=1" in out + + +# --------------------------------------------------------------------------- +# Generic / fallback +# --------------------------------------------------------------------------- + + +def test_emit_generic_unknown_event_type(capsys: pytest.CaptureFixture[str]) -> None: + """Anything that isn't a known event type falls through to _emit_generic. + + The generic formatter serializes ``data`` as JSON so operators can + still inspect the payload even for events the sink doesn't know about. + """ + sink = ConsoleAuditSink(verbose=True) + sink.emit( + AuditEvent( + event_type="custom_event", + agent_name="x", + data={"foo": "bar", "n": 1}, + ) + ) + out = capsys.readouterr().err + assert "custom_event" in out + assert "x" in out + assert '"foo": "bar"' in out + assert '"n": 1' in out diff --git a/tests/test_audit_register_sink.py b/tests/test_audit_register_sink.py new file mode 100644 index 0000000..ff03710 --- /dev/null +++ b/tests/test_audit_register_sink.py @@ -0,0 +1,103 @@ +"""Tests for ``AuditManager.register_sink`` failure-counter semantics. + +A re-registered same-name sink must NOT inherit the previous instance's +tripped circuit-breaker state. ``unregister_sink`` already clears these +counters, but ``register_sink`` also clears them on a successful add as +defense-in-depth (covers tests / external callers that touch the +internal counter dicts directly). +""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from uipath.runtime.governance.audit.base import ( + AuditEvent, + AuditManager, + AuditSink, + EventType, +) + + +class _NoopSink(AuditSink): + """Sink that records emit calls and never raises.""" + + def __init__(self, name: str = "test-sink") -> None: + self._name = name + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return self._name + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _event() -> AuditEvent: + return AuditEvent(event_type=EventType.RULE_EVALUATION, agent_name="a") + + +@pytest.fixture +def manager() -> Any: + """Build a fresh, sync-mode AuditManager for the test.""" + return AuditManager(async_mode=False) + + +def test_register_clears_stale_failure_counter(manager: AuditManager) -> None: + """A new sink with a name that previously tripped starts fresh.""" + # Simulate prior instance having tripped the circuit-breaker without + # going through unregister (e.g. test code or external code that + # mutated the counters directly). + manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD + manager._tripped_sinks.add("test-sink") + + new_sink = _NoopSink(name="test-sink") + manager.register_sink(new_sink) + + # Counter and tripped-set must be cleared. + assert manager._sink_failures.get("test-sink", 0) == 0 + assert "test-sink" not in manager._tripped_sinks + + # And the new sink actually receives events (would be skipped if + # still considered tripped). + manager.emit(_event()) + assert len(new_sink.events) == 1 + + +def test_register_does_not_clear_for_duplicate(manager: AuditManager) -> None: + """Re-registering an already-present sink is a no-op (no counter reset).""" + sink = _NoopSink(name="test-sink") + manager.register_sink(sink) + + # Simulate the existing sink having accumulated some failures. + manager._sink_failures["test-sink"] = 3 + + # A second register call with the same name should NOT clear those + # failures — the duplicate-check fires before the reset. + duplicate = _NoopSink(name="test-sink") + manager.register_sink(duplicate) + + assert manager._sink_failures["test-sink"] == 3 + + +def test_unregister_then_register_starts_fresh(manager: AuditManager) -> None: + """The full lifecycle: register → trip → unregister → register again.""" + sink = _NoopSink(name="test-sink") + manager.register_sink(sink) + manager._sink_failures["test-sink"] = manager._SINK_FAILURE_THRESHOLD + manager._tripped_sinks.add("test-sink") + + manager.unregister_sink("test-sink") + # Unregister already clears. + assert "test-sink" not in manager._tripped_sinks + + new_sink = _NoopSink(name="test-sink") + manager.register_sink(new_sink) + assert manager._sink_failures.get("test-sink", 0) == 0 + assert "test-sink" not in manager._tripped_sinks + + manager.emit(_event()) + assert len(new_sink.events) == 1 diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py new file mode 100644 index 0000000..a46149b --- /dev/null +++ b/tests/test_commitment_concern.py @@ -0,0 +1,205 @@ +"""Tests for the commitment_concern check (A.10.4). + +The check now uses OR semantics: a verb match, an amount match, or a +deadline match is each sufficient when its enabling flag is on. With +both flags false the rule matches verb-only. + +The verb pattern also covers proposal / SOW style commitment markers +("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose") +so formal-business commitments without first-person verbs still fire. + +Amount detection requires a currency marker adjacent to the number to +prevent URL fragments (forum-post IDs, image dimensions, etc.) from +false-positiving. +""" + +from __future__ import annotations + +import pytest + +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator + +# --------------------------------------------------------------------------- +# The proposal-style sample that originally slipped through the rule. +# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables", +# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID. +# Triple-quoted so we keep the line breaks the model produced. +# --------------------------------------------------------------------------- +SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851). + +Here's how I propose we turn your software around quickly: + +Plan +- Triage (logs + reproduce) +- Quick stabilization + +Deliverables +- Defect triage report + +Timeline: 4 days total +- Day 1: Triage + reproduction + +Cost: $780 (fixed for the above scope) +""" + + +@pytest.mark.parametrize( + "text", + [ + "Cost: $780 (fixed for the above scope)", + "Deliverables: a, b, c", + "Timeline: 4 days total for the whole engagement", + "I propose we turn this around in a week", + "We will refund the difference", + "I'll deliver the report by Friday", + "the warranty covers parts only", + "fixed price of one hundred dollars", + ], +) +def test_verb_match_alone_fires(text: str) -> None: + """Each verb-style commitment marker fires on its own (verb-only mode).""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": False, "require_deadline": False} + ) + is True + ) + + +def test_full_proposal_sample_fires() -> None: + """The originally-missed proposal output now fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + SAMPLE_PROPOSAL, + {"require_amount": False, "require_deadline": False}, + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "$780", + "We charge USD 1,200 per seat", + "The fee is 500 EUR", + ], +) +def test_amount_alone_fires_when_require_amount_true(text: str) -> None: + """Currency-anchored amount alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "Task is 75% complete.", + "We maintain 99.9% uptime.", + "Battery at 50%.", + "Score: 12%.", + ], +) +def test_bare_percentage_does_not_fire(text: str) -> None: + """Status-only percentages must not trigger commitment_concern. + + Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount + regex, which fired on benign status / progress text. Real + percentage-bearing commitments ("we'll give a 20% discount") + still fire via the verb pattern. + """ + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is False + ) + + +def test_percentage_with_verb_still_fires() -> None: + """A commitment verb co-occurring with a percentage still fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "We will refund 100% of the purchase price.", + {"require_amount": True, "require_deadline": False}, + ) + is True + ) + + +def test_amount_alone_does_not_fire_when_require_amount_false() -> None: + """Amount-only text is silent when require_amount=False and no verb.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "The list price is $780.", + {"require_amount": False, "require_deadline": False}, + ) + is False + ) + + +def test_deadline_alone_fires_when_require_deadline_true() -> None: + """Deadline phrase alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "Will be done within 5 days.", + {"require_amount": False, "require_deadline": True}, + ) + is True + ) + + +def test_url_fragment_digits_do_not_false_positive() -> None: + """A long URL with embedded digits is not a 'commitment'. + + Catches the prior price-parser misbehaviour where Price.fromstring() + picked up forum-post IDs (e.g. ``667851``) and conflated them with + unrelated currency symbols elsewhere in the text. + """ + text = ( + "See https://forum.example.com/t/topic/667851 for details — " + "no commitment language here." + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +@pytest.mark.parametrize( + "text", + [ + "", + " ", + "Just chatting about the weather today.", + "The product is durable and well-made.", + ], +) +def test_no_signal_does_not_fire(text: str) -> None: + """Text without any commitment signal stays silent regardless of flags.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +def test_non_dict_params_treated_as_defaults() -> None: + """``params`` of the wrong type degrades to defaults rather than crashing.""" + assert ( + GovernanceEvaluator._check_commitment_concern("we will refund", None) + is True + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + "no verbs here", "garbage" + ) + is False + ) diff --git a/tests/test_delegation_guard.py b/tests/test_delegation_guard.py new file mode 100644 index 0000000..a1ba432 --- /dev/null +++ b/tests/test_delegation_guard.py @@ -0,0 +1,320 @@ +"""Tests for the async-aware delegation depth guard. + +The guard wraps an agent's ``invoke`` and ``ainvoke`` so a single +ContextVar tracks delegation depth across both sync and async call +chains. The async wrapper must itself be a coroutine — wrapping with a +sync function would return an un-awaited coroutine and silently bypass +the depth check. +""" + +from __future__ import annotations + +import asyncio +import os +from types import SimpleNamespace + +import pytest +from uipath.core.governance.exceptions import GovernanceBlockException + +from uipath.runtime.governance.delegation_guard import ( + install_delegation_guard, + uninstall_delegation_guard, +) + +# --------------------------------------------------------------------------- +# Helpers — minimal agent shapes the guard might encounter in the wild. +# --------------------------------------------------------------------------- + + +def _make_sync_agent() -> SimpleNamespace: + agent = SimpleNamespace() + agent.invoke = lambda payload, **_: {"sync": payload} + return agent + + +def _make_async_agent() -> SimpleNamespace: + agent = SimpleNamespace() + + async def _ainvoke(payload, **_): + return {"async": payload} + + agent.ainvoke = _ainvoke + return agent + + +def _make_dual_agent() -> SimpleNamespace: + """Agent with both sync invoke and async ainvoke (LangGraph React shape).""" + agent = _make_sync_agent() + + async def _ainvoke(payload, **_): + return {"async": payload} + + agent.ainvoke = _ainvoke + return agent + + +# --------------------------------------------------------------------------- +# Sync path — preserves the original behaviour the guard always had. +# --------------------------------------------------------------------------- + + +def test_sync_invoke_passes_through_under_limit() -> None: + agent = _make_sync_agent() + install_delegation_guard(agent, max_depth=3) + assert agent.invoke({"x": 1}) == {"sync": {"x": 1}} + + +def test_sync_invoke_raises_when_depth_exceeded() -> None: + """Recursive sync invokes blow the limit.""" + agent = SimpleNamespace() + calls = {"n": 0} + + def _invoke(_payload, **_): + calls["n"] += 1 + # Recurse into ourselves through the guarded attribute. + return agent.invoke({}) + + agent.invoke = _invoke + install_delegation_guard(agent, max_depth=3) + + with pytest.raises(GovernanceBlockException): + agent.invoke({}) + # Depth check fires inside the wrapper before the original runs, so + # we got exactly max_depth=3 successful entries plus one rejection. + assert calls["n"] == 3 + + +# --------------------------------------------------------------------------- +# Async path — the new shape this change unlocks. +# --------------------------------------------------------------------------- + + +def test_async_wrapper_is_a_coroutine_function() -> None: + """The wrapped ainvoke must itself be awaitable. + + Regression test for the original bug: a sync wrapper around an async + method returned an un-awaited coroutine and silently bypassed the + depth check entirely. + """ + agent = _make_async_agent() + install_delegation_guard(agent, max_depth=3) + assert asyncio.iscoroutinefunction(agent.ainvoke) + + +def test_async_invoke_passes_through_under_limit() -> None: + agent = _make_async_agent() + install_delegation_guard(agent, max_depth=3) + result = asyncio.run(agent.ainvoke({"x": 1})) + assert result == {"async": {"x": 1}} + + +def test_async_invoke_raises_when_depth_exceeded() -> None: + agent = SimpleNamespace() + calls = {"n": 0} + + async def _ainvoke(_payload, **_): + calls["n"] += 1 + return await agent.ainvoke({}) + + agent.ainvoke = _ainvoke + install_delegation_guard(agent, max_depth=3) + + with pytest.raises(GovernanceBlockException): + asyncio.run(agent.ainvoke({})) + assert calls["n"] == 3 + + +def test_sync_and_async_share_one_depth_counter() -> None: + """A coroutine that falls through to sync ``invoke`` increments the same counter.""" + agent = _make_dual_agent() + calls = {"n": 0} + + def _invoke(_payload, **_): + calls["n"] += 1 + # Sync self-recursion through the same guarded attribute. + return agent.invoke({}) + + async def _ainvoke(_payload, **_): + calls["n"] += 1 + # Cross-mode: async entry falls through to the sync path. + return agent.invoke({}) + + agent.invoke = _invoke + agent.ainvoke = _ainvoke + install_delegation_guard(agent, max_depth=2) + + with pytest.raises(GovernanceBlockException): + asyncio.run(agent.ainvoke({})) + # ainvoke (depth=1) → invoke (depth=2) → invoke (depth=3, blocked). + # The guard rejects the third call before _invoke runs, so calls=2. + assert calls["n"] == 2 + + +# --------------------------------------------------------------------------- +# Lifecycle — install / uninstall semantics. +# --------------------------------------------------------------------------- + + +def test_install_is_idempotent() -> None: + agent = _make_sync_agent() + install_delegation_guard(agent, max_depth=5) + wrapped_once = agent.invoke + install_delegation_guard(agent, max_depth=5) + assert agent.invoke is wrapped_once, "second install must not re-wrap" + + +def test_uninstall_restores_originals_for_both_methods() -> None: + agent = _make_dual_agent() + original_invoke = agent.invoke + original_ainvoke = agent.ainvoke + install_delegation_guard(agent, max_depth=5) + assert agent.invoke is not original_invoke + assert agent.ainvoke is not original_ainvoke + + uninstall_delegation_guard(agent) + assert agent.invoke is original_invoke + assert agent.ainvoke is original_ainvoke + assert not getattr(agent, "_delegation_wrapped", False) + + +def test_uninstall_safe_on_unguarded_agent() -> None: + agent = _make_sync_agent() + # Should not raise; should leave agent unchanged. + uninstall_delegation_guard(agent) + assert callable(agent.invoke) + + +# --------------------------------------------------------------------------- +# Edge cases. +# --------------------------------------------------------------------------- + + +def test_agent_without_invoke_methods_is_noop() -> None: + """Agents without any invokable method must not crash the install.""" + agent = SimpleNamespace(unrelated="value") + install_delegation_guard(agent, max_depth=5) + assert not getattr(agent, "_delegation_wrapped", False) + + +def test_env_var_max_depth_override(monkeypatch: pytest.MonkeyPatch) -> None: + """``UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH`` overrides the default.""" + monkeypatch.setenv("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH", "1") + agent = SimpleNamespace() + calls = {"n": 0} + + def _invoke(_payload, **_): + calls["n"] += 1 + return agent.invoke({}) + + agent.invoke = _invoke + install_delegation_guard(agent) # picks up env + + with pytest.raises(GovernanceBlockException): + agent.invoke({}) + assert calls["n"] == 1, "max_depth=1 should allow exactly one call" + + +def test_invalid_env_var_falls_back_to_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH", "not-a-number") + agent = _make_sync_agent() + # Should not raise on install — falls back silently to the default. + install_delegation_guard(agent) + assert os.environ.get("UIPATH_GOVERNANCE_MAX_DELEGATION_DEPTH") == "not-a-number" + assert callable(agent.invoke) + + +# --------------------------------------------------------------------------- +# Leak / scaling — pins the shared-ContextVar design. +# --------------------------------------------------------------------------- + + +def test_install_does_not_allocate_per_agent_contextvars() -> None: + """N installs must not grow the module's ContextVar registry by N. + + The old implementation allocated a ``ContextVar`` per agent. Since + ContextVar instances are interned by the interpreter and never GC'd, + that was an unbounded leak. The current design holds a single + module-level ContextVar of ``dict[id(agent), int]``. + """ + from uipath.runtime.governance import delegation_guard as dg + + # Snapshot the single shared ContextVar. + shared_var = dg._DELEGATION_DEPTHS + + for _ in range(100): + agent = _make_sync_agent() + install_delegation_guard(agent, max_depth=3) + uninstall_delegation_guard(agent) + + # The module-level ContextVar is unchanged — same instance, no new + # ContextVars were allocated. + assert dg._DELEGATION_DEPTHS is shared_var + + +def test_two_agents_have_independent_depth_counters() -> None: + """Exhausting one agent's depth limit doesn't leak into another agent. + + Both agents share the single module-level ContextVar but the dict + inside isolates them via ``id(agent)``. + """ + from uipath.runtime.governance import delegation_guard as dg + + agent_a = SimpleNamespace() + calls_a = {"n": 0} + + def _invoke_a(_payload, **_): + calls_a["n"] += 1 + return agent_a.invoke({}) # self-recursion until limit + + agent_a.invoke = _invoke_a + + agent_b = _make_sync_agent() + + install_delegation_guard(agent_a, max_depth=2) + install_delegation_guard(agent_b, max_depth=2) + + # Drive agent_a to its limit. + with pytest.raises(GovernanceBlockException): + agent_a.invoke({}) + assert calls_a["n"] == 2 + + # agent_b is a fresh chain in the same context. Its depth counter + # is keyed by id(agent_b), so agent_a's exhausted state doesn't + # affect it. Without the per-agent keying, agent_b would inherit + # whatever depth was last set in this context. + assert agent_b.invoke({"x": 1}) == {"sync": {"x": 1}} + + # After both calls, the ContextVar should be back to its initial + # state — either unset (LookupError) or holding an empty dict. The + # set/reset pairs each guarded call cleaned up after itself. + try: + depths = dg._DELEGATION_DEPTHS.get() + except LookupError: + depths = {} + assert depths.get(id(agent_a), 0) == 0 + assert depths.get(id(agent_b), 0) == 0 + + +def test_uninstall_clears_agent_depth_entry() -> None: + """After uninstall, the agent's id is no longer in the depths dict. + + Prevents ``id(agent)`` reuse — Python recycles ids after GC — from + mis-attributing a future agent's count to this one. + """ + from uipath.runtime.governance import delegation_guard as dg + + agent = _make_sync_agent() + install_delegation_guard(agent, max_depth=5) + # Enter the guard once so the agent gets a depth entry. + agent.invoke({}) + # invoke completed -> token reset -> entry should be back to 0 or + # absent. We re-enter manually to plant a non-zero entry. + agent_key = id(agent) + dg._DELEGATION_DEPTHS.set({agent_key: 3}) + assert dg._DELEGATION_DEPTHS.get().get(agent_key) == 3 + + uninstall_delegation_guard(agent) + # Uninstall pops the entry from the current context. + assert agent_key not in dg._DELEGATION_DEPTHS.get() diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py new file mode 100644 index 0000000..e3e6b88 --- /dev/null +++ b/tests/test_evaluator.py @@ -0,0 +1,401 @@ +"""Tests for the audit + enforcement behavior of GovernanceEvaluator. + +The evaluator owns three responsibilities that used to be scattered +across wrapper.py and adapter callbacks: + +1. DISABLED enforcement mode short-circuits — no rules evaluated, no + audit events emitted, no exceptions raised. +2. AUDIT mode evaluates rules and emits audit events, but transforms + matched DENY actions into AUDIT so execution continues. +3. ENFORCE mode evaluates, emits audit, and raises + :class:`GovernanceBlockException` when a DENY rule matches. + +Plus a fail-safe contract: a misbehaving audit sink must not stop +evaluation from completing or propagate as an exception. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import patch + +import pytest +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import Action, LifecycleHook + +from tests._helpers import reset_enforcement_mode +from uipath.runtime.governance.audit import ( + AuditEvent, + AuditSink, + EventType, + get_audit_manager, + reset_audit_manager, +) +from uipath.runtime.governance.config import ( + EnforcementMode, + set_enforcement_mode, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + + +class _CapturingSink(AuditSink): + """Audit sink that records every event for assertions.""" + + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _deny_rule_on_input_contains(needle: str) -> Rule: + """Build a rule that DENIES when agent_input contains ``needle``.""" + return Rule( + rule_id="TEST-01", + name="Test deny on input", + clause="A.1.1", + hook=LifecycleHook.BEFORE_AGENT, + action=Action.DENY, + checks=[ + Check( + conditions=[ + Condition( + operator="contains", + field="agent_input", + value=needle, + ) + ], + action=Action.DENY, + message=f"Input must not contain {needle!r}", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + """Wrap a single rule in a one-pack PolicyIndex.""" + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def _ctx(agent_input: str) -> CheckContext: + return CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name="test-agent", + runtime_id="run-1", + trace_id="trace-1", + agent_input=agent_input, + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def capturing_audit(): + """Replace the global audit manager with a fresh one wired to a capturing sink. + + Yields the sink so tests can inspect emitted events. Restores the + global manager on teardown. + """ + reset_audit_manager() + manager = get_audit_manager() + # Default sinks (traces / console) are noisy here — drop them. + for existing_name in list(manager.list_sinks()): + manager.unregister_sink(existing_name) + sink = _CapturingSink() + manager.register_sink(sink) + # Force synchronous emission so assertions don't race the worker thread. + manager._async_mode = False + yield sink + reset_audit_manager() + + +@pytest.fixture(autouse=True) +def _reset_enforcement_mode(): + """Each test gets a clean enforcement-mode slate.""" + reset_enforcement_mode() + yield + reset_enforcement_mode() + + +# --------------------------------------------------------------------------- +# DISABLED mode +# --------------------------------------------------------------------------- + + +def test_disabled_mode_short_circuits_with_empty_record(capturing_audit): + """DISABLED returns an empty AuditRecord and emits nothing.""" + set_enforcement_mode(EnforcementMode.DISABLED) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + audit = evaluator.evaluate(_ctx("definitely contains secret")) + + assert audit.evaluations == [] + assert audit.final_action == Action.ALLOW + assert audit.metadata["enforcement_mode"] == "disabled" + assert capturing_audit.events == [] + + +def test_disabled_mode_does_not_raise_on_deny_match(capturing_audit): + """Even when a DENY rule WOULD match, DISABLED never raises.""" + set_enforcement_mode(EnforcementMode.DISABLED) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + # Must not raise. + evaluator.evaluate(_ctx("this is blocked")) + + +# --------------------------------------------------------------------------- +# AUDIT mode +# --------------------------------------------------------------------------- + + +def test_audit_mode_transforms_deny_to_audit(capturing_audit): + """AUDIT mode evaluates rules but never returns a DENY final_action.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + audit = evaluator.evaluate(_ctx("contains secret data")) + + assert len(audit.evaluations) == 1 + assert audit.evaluations[0].matched is True + assert audit.evaluations[0].action == Action.DENY # raw rule action preserved + assert audit.final_action == Action.AUDIT # mode-adjusted + assert audit.metadata["audit_mode_would_deny"] is True + + +def test_audit_mode_does_not_raise_on_deny_match(capturing_audit): + """AUDIT mode never raises GovernanceBlockException, even on a DENY hit.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + evaluator.evaluate(_ctx("this is blocked")) # must not raise + + +def test_audit_mode_emits_per_rule_and_summary_events(capturing_audit): + """One rule_evaluation event per rule + one hook_summary per evaluate().""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + evaluator.evaluate(_ctx("contains secret")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert rule_events[0].hook == "BEFORE_AGENT" + assert rule_events[0].data["rule_id"] == "TEST-01" + assert rule_events[0].data["matched"] is True + assert rule_events[0].data["action"] == "deny" + + assert len(summary_events) == 1 + assert summary_events[0].data["matched_rules"] == 1 + assert summary_events[0].data["final_action"] == "audit" + assert summary_events[0].data["enforcement_mode"] == "audit" + + +def test_audit_mode_unmatched_rule_logged_as_allow(capturing_audit): + """Unmatched rules still emit a rule_evaluation event with action='allow'.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + evaluator.evaluate(_ctx("benign user query")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + assert len(rule_events) == 1 + assert rule_events[0].data["matched"] is False + assert rule_events[0].data["action"] == "allow" + + +# --------------------------------------------------------------------------- +# ENFORCE mode +# --------------------------------------------------------------------------- + + +def test_enforce_mode_raises_on_deny_match(capturing_audit): + """ENFORCE mode raises GovernanceBlockException when a DENY rule matches.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + with pytest.raises(GovernanceBlockException) as exc_info: + evaluator.evaluate(_ctx("input is blocked")) + + exc = exc_info.value + assert exc.rule_id == "TEST-01" + assert exc.rule_name == "Test deny on input" + assert exc.audit_record is not None + assert exc.audit_record.final_action == Action.DENY + + +def test_enforce_mode_emits_audit_before_raising(capturing_audit): + """The audit trail must be emitted even when the call raises.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + with pytest.raises(GovernanceBlockException): + evaluator.evaluate(_ctx("contains blocked")) + + rule_events = [ + e for e in capturing_audit.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in capturing_audit.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert summary_events[0].data["final_action"] == "deny" + assert summary_events[0].data["enforcement_mode"] == "enforce" + + +def test_enforce_mode_returns_record_when_no_rule_matches(capturing_audit): + """No DENY hit → no raise; the AuditRecord is returned normally.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("blocked")) + ) + + audit = evaluator.evaluate(_ctx("benign query")) + + assert audit.final_action == Action.ALLOW + assert audit.evaluations[0].matched is False + + +# --------------------------------------------------------------------------- +# Sink-failure isolation +# --------------------------------------------------------------------------- + + +def test_sink_failure_does_not_propagate_or_block_evaluation(capturing_audit): + """A broken sink must not make evaluate() raise or lose its return value. + + The contract: AuditManager wraps each sink's emit() in try/except with + a per-sink failure counter (circuit-breaker), so an exception inside a + sink never propagates back to the evaluator. + """ + + class _BrokenSink(AuditSink): + @property + def name(self) -> str: + return "broken" + + def emit(self, event: AuditEvent) -> None: + raise RuntimeError("sink broke") + + manager = get_audit_manager() + manager.register_sink(_BrokenSink()) + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + # Must complete without raising even with a broken sink registered. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + # The non-broken capturing sink still got its events. + assert any( + e.event_type == EventType.RULE_EVALUATION for e in capturing_audit.events + ) + + +def test_unavailable_audit_manager_is_swallowed(): + """If get_audit_manager() itself raises, _emit_audit must swallow it.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_deny_rule_on_input_contains("secret")) + ) + + with patch( + "uipath.runtime.governance.native.evaluator.get_audit_manager", + side_effect=RuntimeError("manager unavailable"), + ): + # Must complete, return record, and not raise. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + assert audit.evaluations[0].matched is True + + +# --------------------------------------------------------------------------- +# Protocol conformance smoke test +# --------------------------------------------------------------------------- + + +def test_governance_evaluator_satisfies_evaluator_protocol(): + """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected. + + Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol — + an explicit assignment to the protocol-typed variable documents the + structural contract. + """ + from uipath.core.adapters import EvaluatorProtocol + + evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex()) + assert isinstance(evaluator, EvaluatorProtocol) + + +def test_evaluator_protocol_methods_resolvable_on_concrete(): + """Every method the protocol declares must be callable on the concrete impl.""" + from uipath.core.adapters import EvaluatorProtocol + + evaluator: Any = GovernanceEvaluator(PolicyIndex()) + for method_name in ( + "evaluate_before_agent", + "evaluate_after_agent", + "evaluate_before_model", + "evaluate_after_model", + "evaluate_tool_call", + "evaluate_after_tool", + ): + assert callable(getattr(evaluator, method_name)) + # The variable annotation also asserts type compatibility at runtime + # because EvaluatorProtocol is @runtime_checkable. + assert isinstance(evaluator, EvaluatorProtocol) diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py new file mode 100644 index 0000000..f4021db --- /dev/null +++ b/tests/test_evaluator_operators.py @@ -0,0 +1,680 @@ +"""Tests for ``GovernanceEvaluator`` operators and field resolution. + +Covers each operator implemented in :meth:`_apply_operator` plus the +``_check_*`` helper functions (vader, encoding, entropy, incident, +commitment) and the ``evaluate_*`` dispatchers. +""" + +from __future__ import annotations + +import pytest +from uipath.core.governance.models import Action, LifecycleHook + +from tests._helpers import reset_enforcement_mode +from uipath.runtime.governance.config import ( + EnforcementMode, + set_enforcement_mode, +) +from uipath.runtime.governance.native.evaluator import ( + _INCIDENT_PATTERNS, + GovernanceEvaluator, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _evaluator() -> GovernanceEvaluator: + """Build a GovernanceEvaluator with an empty PolicyIndex (operators only).""" + return GovernanceEvaluator(policy_index=PolicyIndex()) + + +def _ctx(**fields) -> CheckContext: + """Construct a CheckContext with sensible defaults plus overrides.""" + defaults = dict( + hook=LifecycleHook.AFTER_MODEL, + agent_name="agent", + runtime_id="rt-1", + trace_id="tr-1", + ) + defaults.update(fields) + return CheckContext(**defaults) + + +def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule: + return Rule( + rule_id="r1", + name="r1", + clause="", + hook=LifecycleHook.AFTER_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition(operator=operator, field=field, value=value, negate=negate) + ], + ) + ], + ) + + +@pytest.fixture(autouse=True) +def _isolate_mode() -> None: + reset_enforcement_mode() + set_enforcement_mode(EnforcementMode.AUDIT) + yield + reset_enforcement_mode() + + +# --------------------------------------------------------------------------- +# Field resolution — _get_field_value +# --------------------------------------------------------------------------- + + +def test_get_field_value_top_level_attr() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + assert ev._get_field_value("model_output", ctx) == "hello" + + +def test_get_field_value_dotted_path_into_dict() -> None: + ev = _evaluator() + ctx = _ctx(session_state={"tool_calls": 7}) + assert ev._get_field_value("session_state.tool_calls", ctx) == 7 + + +def test_get_field_value_missing_segment_returns_none() -> None: + ev = _evaluator() + ctx = _ctx() + assert ev._get_field_value("nonexistent", ctx) is None + assert ev._get_field_value("session_state.absent", ctx) is None + + +# --------------------------------------------------------------------------- +# Existence / guardrail_fallback (special-cased before the None check) +# --------------------------------------------------------------------------- + + +def test_exists_true_when_value_present() -> None: + ev = _evaluator() + ctx = _ctx(model_output="x") + assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True + + +def test_exists_false_when_missing() -> None: + ev = _evaluator() + assert ev._apply_operator("exists", None, None) is False + + +def test_not_exists_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_exists", None, None) is True + assert ev._apply_operator("not_exists", "x", None) is False + + +def test_guardrail_fallback_mapped_and_disabled_fires() -> None: + ev = _evaluator() + result = ev._apply_operator( + "guardrail_fallback", + None, + {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"}, + ) + assert result is True + + +@pytest.mark.parametrize( + "cfg", + [ + {"mapped_to_uipath": False, "policy_enabled": False}, + {"mapped_to_uipath": True, "policy_enabled": True}, + {"mapped_to_uipath": False, "policy_enabled": True}, + ], +) +def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, cfg) is False + + +def test_guardrail_fallback_non_dict_value_silent() -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, "string") is False + + +# --------------------------------------------------------------------------- +# None-field short-circuit (everything except exists / guardrail_fallback) +# --------------------------------------------------------------------------- + + +def test_other_operators_short_circuit_when_field_is_none() -> None: + ev = _evaluator() + for op in ("contains", "regex", "in_list", "gt"): + assert ev._apply_operator(op, None, "anything") is False, op + + +# --------------------------------------------------------------------------- +# Numeric operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("gt", 5, 3, True), + ("gt", 3, 5, False), + ("gt", 3, 3, False), + ("gte", 3, 3, True), + ("gte", 2, 3, False), + ("lt", 1, 3, True), + ("lt", 3, 3, False), + ("lte", 3, 3, True), + ("lte", 4, 3, False), + ], +) +def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_numeric_operators_handle_string_coercion() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "5", "3") is True + + +def test_numeric_operators_return_false_on_uncoercible() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "not-a-number", 3) is False + assert ev._apply_operator("gt", 3, "not-a-number") is False + + +# --------------------------------------------------------------------------- +# String operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("equals", "abc", "abc", True), + ("equals", "abc", "ABC", False), # equals is case-sensitive + ("eq", "x", "x", True), + ("not_equals", "abc", "xyz", True), + ("ne", "x", "x", False), + ("contains", "Hello World", "world", True), # case-insensitive + ("contains", "Hello", "xyz", False), + ("not_contains", "Hello", "xyz", True), + ("not_contains", "Hello", "hello", False), + ], +) +def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_regex_matches_pattern() -> None: + ev = _evaluator() + assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True + + +def test_regex_matches_alias() -> None: + """``matches`` is documented as a synonym for ``regex``.""" + ev = _evaluator() + assert ev._apply_operator("matches", "abc-123", r"\d+") is True + + +def test_regex_invalid_pattern_returns_false() -> None: + """Malformed regex is logged and silently returns False.""" + ev = _evaluator() + assert ev._apply_operator("regex", "anything", "(unclosed") is False + + +# --------------------------------------------------------------------------- +# List operators +# --------------------------------------------------------------------------- + + +def test_in_list_membership() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True + assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False + + +def test_in_list_non_list_value_returns_false() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "x", "not a list") is False + + +def test_not_in_list_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True + assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False + + +def test_not_in_list_non_list_value_returns_true() -> None: + """``not_in_list`` against a non-list value safely returns True + (nothing is in a non-list).""" + ev = _evaluator() + assert ev._apply_operator("not_in_list", "x", "not a list") is True + + +# --------------------------------------------------------------------------- +# Unknown operator +# --------------------------------------------------------------------------- + + +def test_unknown_operator_returns_false() -> None: + """Unknown operator strings log a debug message and return False.""" + ev = _evaluator() + assert ev._apply_operator("never_heard_of_this", "x", "y") is False + + +# --------------------------------------------------------------------------- +# Negate flag — flips the result +# --------------------------------------------------------------------------- + + +def test_condition_negate_flips_result() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + # contains "hello" → matches; negate inverts to False. + cond = Condition( + operator="contains", field="model_output", value="hello", negate=True, + ) + assert ev._evaluate_condition(cond, ctx) is False + cond2 = Condition( + operator="contains", field="model_output", value="world", negate=True, + ) + assert ev._evaluate_condition(cond2, ctx) is True + + +# --------------------------------------------------------------------------- +# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions +# --------------------------------------------------------------------------- + + +def test_empty_check_conditions_always_match() -> None: + """A check with no conditions trivially matches — surfaces rule shape bugs.""" + ev = _evaluator() + check = Check(conditions=[], logic="all") + matched, _ = ev._evaluate_check(check, _ctx()) + assert matched is True + + +def test_check_logic_all_requires_every_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="a"), + Condition(operator="contains", field="model_output", value="missing"), + ], + logic="all", + ) + matched, _ = ev._evaluate_check(check, _ctx(model_output="a only")) + assert matched is False + + +def test_check_logic_any_requires_one_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="present"), + Condition(operator="contains", field="model_output", value="absent"), + ], + logic="any", + ) + matched, detail = ev._evaluate_check(check, _ctx(model_output="present text")) + assert matched is True + # detail is the check's message on match; empty by default in our builder. + assert detail == "" + + +# --------------------------------------------------------------------------- +# VADER sentiment +# --------------------------------------------------------------------------- + + +def test_vader_concern_negative_text_fires() -> None: + """A clearly-negative sentence trips the default threshold of -0.3.""" + assert ( + GovernanceEvaluator._check_vader_concern( + "I absolutely hate this terrible, awful product.", {"threshold": -0.3} + ) + is True + ) + + +def test_vader_concern_positive_text_does_not_fire() -> None: + assert ( + GovernanceEvaluator._check_vader_concern( + "This is wonderful and I love it!", {"threshold": -0.3} + ) + is False + ) + + +def test_vader_concern_empty_text_silent() -> None: + assert GovernanceEvaluator._check_vader_concern("", {}) is False + assert GovernanceEvaluator._check_vader_concern(" ", {}) is False + + +def test_vader_concern_threshold_as_scalar() -> None: + """``params`` may be a bare number; the operator coerces.""" + assert ( + GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True + ) + + +def test_vader_concern_invalid_threshold_falls_back() -> None: + """Non-numeric scalar params fall back to the documented default.""" + # "garbage" -> default -0.3 → should still classify clear negative + assert ( + GovernanceEvaluator._check_vader_concern( + "I hate this awful, terrible thing", "garbage" + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Encoding integrity +# --------------------------------------------------------------------------- + + +def test_encoding_concern_clean_text_silent() -> None: + assert ( + GovernanceEvaluator._check_encoding_concern( + "Just a normal English sentence with no corruption.", {} + ) + is False + ) + + +def test_encoding_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_encoding_concern("", {}) is False + + +def test_encoding_concern_replacement_chars_fire() -> None: + """U+FFFD replacement chars are a strong corruption signal.""" + text = "Hello � � world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_mojibake_bigrams_fire() -> None: + """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape.""" + text = "é é hello é" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_hex_escape_literals_fire() -> None: + """Literal ``\\xHH`` sequences mean raw bytes leaked into a string.""" + text = r"Hello \x80 \x81 \x82 world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Entropy (stdlib only — deterministic) +# --------------------------------------------------------------------------- + + +def test_entropy_concern_normal_english_does_not_fire() -> None: + """English prose entropy lands ~3.5–4.5 bits/byte — inside default range.""" + text = "The quick brown fox jumps over the lazy dog." * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is False + ) + + +def test_entropy_concern_low_entropy_fires() -> None: + """Highly repetitive text approaches 0 bits/byte.""" + text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is True + ) + + +def test_entropy_concern_high_entropy_fires() -> None: + """Random-ish bytes approach 8 bits/byte.""" + # Build text with many distinct chars to push entropy high. + text = "".join(chr(c) for c in range(32, 127)) * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0}) + is True + ) + + +def test_entropy_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_entropy_concern("", {}) is False + + +def test_entropy_concern_non_dict_params_uses_defaults() -> None: + """Non-dict params don't crash; defaults apply.""" + # Normal English prose still won't trip the default min=1.5, max=7.5 range. + text = "The quick brown fox jumps over the lazy dog." + assert ( + GovernanceEvaluator._check_entropy_concern(text, "garbage") is False + ) + + +# --------------------------------------------------------------------------- +# Incident taxonomy (regex-based, deterministic) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "text,expected_category", + [ + ("I cannot help with that.", "safety_refusal"), + ("I'm sorry, but I cannot answer.", "safety_refusal"), + ("500 internal server error", "tool_failure"), + ("Connection refused", "tool_failure"), + ("timed out", "tool_failure"), + ("401 unauthorized", "auth_failure"), + ("authentication failed", "auth_failure"), + ("429", "quota_exceeded"), + ("rate limit exceeded", "quota_exceeded"), + ("I made that up", "hallucination"), + ("I don't actually know", "hallucination"), + ], +) +def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None: + """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar.""" + assert expected_category in _INCIDENT_PATTERNS + assert GovernanceEvaluator._check_incident_concern(text, {}) is True + + +def test_incident_concern_unmatched_silent() -> None: + assert ( + GovernanceEvaluator._check_incident_concern( + "All systems operating normally.", {} + ) + is False + ) + + +def test_incident_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_incident_concern("", {}) is False + + +def test_incident_concern_category_filter() -> None: + """Limit scanning to a subset of categories via ``categories`` param.""" + # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled, + # the scanner should miss it. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["quota_exceeded"]} + ) + is False + ) + # With auth_failure enabled, it fires. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["auth_failure"]} + ) + is True + ) + + +def test_incident_concern_unknown_category_silently_dropped() -> None: + """Categories the system doesn't know about are silently ignored.""" + # Only the unknown category is requested — falls back to no categories, + # so even matching text doesn't fire. + result = GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["unknown_cat_xyz"]} + ) + assert result is False + + +# --------------------------------------------------------------------------- +# evaluate_* dispatchers — verify they build the right CheckContext +# --------------------------------------------------------------------------- + + +def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]: + """Patch evaluate() to capture the context it receives instead of running rules.""" + captured: dict = {} + ev = _evaluator() + + def _fake_evaluate(ctx): # type: ignore[no-untyped-def] + captured["ctx"] = ctx + from datetime import datetime, timezone + + from uipath.core.governance.models import AuditRecord + + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=ctx.agent_name, + runtime_id=ctx.runtime_id, + trace_id=ctx.trace_id, + hook=ctx.hook, + evaluations=[], + final_action=Action.ALLOW, + ) + + ev.evaluate = _fake_evaluate # type: ignore[assignment] + return ev, captured + + +def test_evaluate_before_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_agent( + agent_input="user-text", + agent_name="a", + runtime_id="r", + trace_id="t", + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_AGENT + assert ctx.agent_input == "user-text" + assert ctx.model_name == "gpt-5" + + +def test_evaluate_after_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_agent( + agent_output="reply", agent_name="a", runtime_id="r", trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_AGENT + assert ctx.agent_output == "reply" + + +def test_evaluate_before_model_carries_messages() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_model( + model_input="prompt", + agent_name="a", + runtime_id="r", + trace_id="t", + messages=[{"role": "user", "content": "hi"}], + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_MODEL + assert ctx.model_input == "prompt" + assert ctx.messages == [{"role": "user", "content": "hi"}] + + +def test_evaluate_after_model_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_model( + model_output="resp", agent_name="a", runtime_id="r", trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_MODEL + assert ctx.model_output == "resp" + + +def test_evaluate_tool_call_carries_args() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_tool_call( + tool_name="search", + tool_args={"q": "x"}, + agent_name="a", + runtime_id="r", + trace_id="t", + session_state={"tool_calls": 1}, + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.TOOL_CALL + assert ctx.tool_name == "search" + assert ctx.tool_args == {"q": "x"} + assert ctx.session_state == {"tool_calls": 1} + + +def test_evaluate_after_tool_carries_result() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_tool( + tool_name="search", + tool_result="some-data", + agent_name="a", + runtime_id="r", + trace_id="t", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_TOOL + assert ctx.tool_name == "search" + assert ctx.tool_result == "some-data" + + +# --------------------------------------------------------------------------- +# DISABLED mode — evaluate() short-circuits without emitting audit +# --------------------------------------------------------------------------- + + +def test_disabled_mode_returns_empty_audit_record() -> None: + """DISABLED mode short-circuits the rule loop and audit emission.""" + set_enforcement_mode(EnforcementMode.DISABLED) + + rule = _rule_with_condition("contains", "model_output", "anything") + pack = PolicyPack(name="p", version="1", description="", rules=[rule]) + idx = PolicyIndex() + idx.add_pack(pack) + ev = GovernanceEvaluator(policy_index=idx) + + audit = ev.evaluate(_ctx(model_output="contains anything")) + assert audit.final_action == Action.ALLOW + assert audit.evaluations == [] diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py new file mode 100644 index 0000000..79e8971 --- /dev/null +++ b/tests/test_guardrail_compensation.py @@ -0,0 +1,870 @@ +"""Tests for compensating governance calls to /runtime/govern. + +The compensating call is fire-and-forget: the server runs the disabled +guardrail AND writes the audit trace itself, so we don't parse the +response. These tests cover: + +- payload + header composition, +- URL resolution off the shared backend base URL, +- error swallowing (no exception escapes, warning is logged), +- evaluator integration (a fired ``guardrail_fallback`` rule kicks off + the call on a background daemon thread). +""" + +from __future__ import annotations + +import json +import threading +import time +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from uipath.core.governance.models import Action, LifecycleHook + +from tests._helpers import reset_enforcement_mode +from uipath.runtime.governance.config import ( + EnforcementMode, + set_enforcement_mode, +) +from uipath.runtime.governance.native import guardrail_compensation +from uipath.runtime.governance.native.backend_client import ( + USER_AGENT, + governance_request_headers, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.guardrail_compensation import ( + _resolve_trace_id, + disabled_guardrails, + request_governance, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _mock_response(status: int = 200) -> MagicMock: + """urlopen()-compatible context manager mock.""" + response = MagicMock() + response.status = status + response.read.return_value = b"" # body is not consumed by fire-and-forget + response.__enter__.return_value = response + response.__exit__.return_value = False + return response + + +def _rules(*validators: str, rule_id: str = "R1", rule_name: str = "n", pack: str = "p"): + """Build the per-rule metadata list the compensation API now takes.""" + return [ + { + "ruleId": rule_id, + "ruleName": rule_name, + "packName": pack, + "validator": v, + } + for v in validators + ] + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _reset_enforcement_mode(): + reset_enforcement_mode() + yield + reset_enforcement_mode() + + +@pytest.fixture +def _govern_env(monkeypatch): + """Provide the env vars that request_governance requires. + + The compensating call mirrors the policy fetch — it skips when + ``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` / + ``UIPATH_ACCESS_TOKEN`` are missing (sending without a bearer + token would generate a guaranteed 401 per call). Tests that need + the network path to actually fire must opt into this fixture. + """ + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token") + yield + + +# --------------------------------------------------------------------------- +# Shared header helper (lives in backend_client; covered here because it's +# the wire shape both the compensation POST and the policy GET share) +# --------------------------------------------------------------------------- + + +def test_governance_request_headers_get_shape(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers() + assert headers == {"Accept": "application/json", "User-Agent": USER_AGENT} + + +def test_governance_request_headers_post_shape(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers(json_body=True) + assert headers == { + "Accept": "application/json", + "Content-Type": "application/json", + "User-Agent": USER_AGENT, + } + + +def test_governance_request_headers_includes_authorization_when_token_set( + monkeypatch, +): + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "abc.def.ghi") + headers = governance_request_headers(json_body=True) + assert headers["Authorization"] == "Bearer abc.def.ghi" + + +def test_governance_request_headers_user_agent_is_browser_shaped(monkeypatch): + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + headers = governance_request_headers() + assert headers["User-Agent"].startswith("Mozilla/5.0") + assert "Chrome/" in headers["User-Agent"] + + +# --------------------------------------------------------------------------- +# request_governance — fire-and-forget contract +# --------------------------------------------------------------------------- + + +def test_request_governance_empty_types_short_circuits_without_call(): + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + result = request_governance( + [], {}, "before_model", "t1", "2026-06-06T00:00:00Z", "agent", "rt" + ) + assert result is None + mock_urlopen.assert_not_called() + + +def test_request_governance_posts_expected_payload_and_returns_none( + monkeypatch, _govern_env +): + rules = [ + { + "ruleId": "R-PII", + "ruleName": "PII guardrail", + "packName": "AITL", + "validator": "pii_detection", + }, + { + "ruleId": "R-HARM", + "ruleName": "Harmful content", + "packName": "AITL", + "validator": "harmful_content", + }, + ] + # Job context is resolved from the environment at call time; pin it so + # the assertion is deterministic and exercises the new payload keys. + monkeypatch.setattr( + guardrail_compensation, + "resolve_job_context", + lambda: {"folderKey": "folder-1", "jobKey": "job-1"}, + ) + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + result = request_governance( + rules, + {"content": "hello"}, + "before_model", + "trace-1", + "2026-06-06T00:00:00Z", + "langchain", + "patch-langchain", + ) + + assert result is None # fire-and-forget + + request_arg = mock_urlopen.call_args.args[0] + assert request_arg.get_method() == "POST" + + sent = json.loads(request_arg.data.decode("utf-8")) + assert sent == { + # distinct validators drive the guardrail API call + "type": ["pii_detection", "harmful_content"], + # per-rule metadata drives one trace record per rule + "rules": rules, + "data": {"content": "hello"}, + "hook": "before_model", + "traceId": "trace-1", + "src_timestamp": "2026-06-06T00:00:00Z", + "agentName": "langchain", + "runtimeId": "patch-langchain", + "folderKey": "folder-1", + "jobKey": "job-1", + } + + +def test_request_governance_sends_shared_headers(_govern_env): + """Headers must come from the shared helper — UA + Accept + Content-Type + Auth.""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance( + _rules("x"), {}, "before_model", "t", "ts", "a", "r" + ) + + request_arg = mock_urlopen.call_args.args[0] + # urllib title-cases header keys on the Request object. + assert request_arg.get_header("Accept") == "application/json" + assert request_arg.get_header("Content-type") == "application/json" + assert request_arg.get_header("User-agent") == USER_AGENT + # Bearer is required (see ``test_request_governance_skipped_when_token_missing``). + assert request_arg.get_header("Authorization") == "Bearer test-token" + # Tenant header must travel on the compensating POST (same as the + # policy GET) — the agenticgovernance ingress validates it. + assert request_arg.get_header("X-uipath-internal-tenantid") == "tenant-xyz" + + +def test_request_governance_includes_bearer_token_when_set(monkeypatch, _govern_env): + monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "the-token") + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + + request_arg = mock_urlopen.call_args.args[0] + assert request_arg.get_header("Authorization") == "Bearer the-token" + + +def test_request_governance_skipped_when_token_missing(monkeypatch): + """Missing bearer → skip cleanly instead of sending a guaranteed-401 request. + + Sending without a token would produce a 401 per compensation event + and pollute logs. Mirrors the org-id / tenant-id skip paths above. + """ + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False) + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + assert not mock_urlopen.called, ( + "request_governance must NOT POST when bearer token is missing" + ) + + +def test_request_governance_skipped_when_org_id_missing(monkeypatch): + """Without an org id, we cannot build the URL — skip the call entirely.""" + monkeypatch.delenv("UIPATH_ORGANIZATION_ID", raising=False) + monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz") + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + mock_urlopen.assert_not_called() + + +def test_request_governance_skipped_when_tenant_id_missing(monkeypatch): + """Without a tenant id, the server's tenant header would be invalid.""" + monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev") + monkeypatch.delenv("UIPATH_TENANT_ID", raising=False) + with patch.object( + guardrail_compensation.urllib.request, "urlopen" + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + mock_urlopen.assert_not_called() + + +def test_request_governance_swallows_network_error(_govern_env): + """A network error must not propagate. (Log emission is logger-config + dependent and is verified manually — the test-isolation behavior of + pytest's caplog conflicts with the runtime's log interceptor.)""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + side_effect=OSError("connection refused"), + ): + result = request_governance( + _rules("pii_detection"), + {}, + "before_model", + "t", + "ts", + "langchain", + "patch-langchain", + ) + + assert result is None + + +def test_request_governance_swallows_unexpected_exception(_govern_env): + """Even a programmer-error inside urlopen must not propagate.""" + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + side_effect=RuntimeError("boom"), + ): + assert ( + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + is None + ) + + +def test_request_governance_does_not_read_response_body(_govern_env): + """Fire-and-forget: we must not consume the response body.""" + response = _mock_response() + with patch.object( + guardrail_compensation.urllib.request, "urlopen", return_value=response + ): + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + response.read.assert_not_called() + + +def test_request_governance_url_is_org_scoped(monkeypatch, _govern_env): + """URL must include the org segment and the agenticgovernance_ prefix. + + Mirrors the policy fetch URL shape — the agenticgovernance ingress + requires both segments; without them the request lands on a route + that doesn't exist (404 / wrong service). + """ + monkeypatch.delenv("UIPATH_GOVERNANCE_BACKEND_URL", raising=False) + monkeypatch.setenv("UIPATH_URL", "https://cloud.uipath.com/my-org/my-tenant") + with patch.object( + guardrail_compensation.urllib.request, + "urlopen", + return_value=_mock_response(), + ) as mock_urlopen: + request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r") + + # org_id="appsdev" comes from the _govern_env fixture (UIPATH_ORGANIZATION_ID), + # not from UIPATH_URL — same env source as the policy fetch. + assert ( + mock_urlopen.call_args.args[0].full_url + == "https://cloud.uipath.com/appsdev/agenticgovernance_/api/v1/runtime/govern" + ) + + +# --------------------------------------------------------------------------- +# submit_compensation — bounded background pool +# --------------------------------------------------------------------------- + + +def test_submit_compensation_empty_types_short_circuits(): + """submit_compensation with no types is a no-op (no semaphore taken).""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Patch the executor to a MagicMock so we'd notice any spurious submit. + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation([], {}, "before_model", "t", "ts", "a", "r") + mock_pool.submit.assert_not_called() + + +def test_submit_compensation_routes_through_pool(): + """A non-empty types list submits a single task to the pool.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "trace-1", + "ts", + "agent", + "run", + ) + mock_pool.submit.assert_called_once() + + +def test_submit_compensation_drops_when_pool_saturated(monkeypatch): + """When the in-flight semaphore is exhausted, the call is dropped + logged.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Force the semaphore into "exhausted" state. + drained = threading.BoundedSemaphore(1) + drained.acquire() # value is now 0; next acquire(blocking=False) returns False + monkeypatch.setattr(guardrail_compensation, "_inflight", drained) + + with patch.object(guardrail_compensation, "_pool") as mock_pool: + submit_compensation( + _rules("pii_detection"), + {}, + "before_model", + "trace-1", + "ts", + "agent", + "run", + ) + + mock_pool.submit.assert_not_called() + + +def test_submit_compensation_swallows_pool_shutdown_runtimeerror(monkeypatch): + """If the pool was shut down at process exit, submit must not raise.""" + from uipath.runtime.governance.native.guardrail_compensation import ( + submit_compensation, + ) + + # Fresh semaphore so we don't taint other tests. + monkeypatch.setattr( + guardrail_compensation, "_inflight", threading.BoundedSemaphore(4) + ) + + class _ShutdownPool: + def submit(self, fn, *args, **kwargs): # noqa: ARG002 + raise RuntimeError("cannot schedule new futures after shutdown") + + monkeypatch.setattr(guardrail_compensation, "_pool", _ShutdownPool()) + + # Must not raise. + submit_compensation( + _rules("x"), {}, "before_model", "t", "ts", "a", "r" + ) + + +# --------------------------------------------------------------------------- +# disabled_guardrails +# --------------------------------------------------------------------------- + + +def test_disabled_guardrails_extracts_validators_for_fired_rules(): + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[ + SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") + ] + ) + policy_index = SimpleNamespace( + get_rule=lambda rid: rule if rid == "R1" else None + ) + + assert disabled_guardrails(audit, policy_index) == [ + { + "ruleId": "R1", + "ruleName": "PII guardrail", + "packName": "", + "validator": "pii_detection", + } + ] + + +def test_disabled_guardrails_skips_unmatched_evaluations(): + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: None) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_non_guardrail_conditions(): + cond = SimpleNamespace(operator="regex", value="some-pattern") + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +# --------------------------------------------------------------------------- +# Evaluator integration: a guardrail_fallback rule kicks off the compensation +# --------------------------------------------------------------------------- + + +def _guardrail_fallback_rule() -> Rule: + """A rule whose only check is a guardrail_fallback condition. + + Mirrors what ``_build_check`` produces for a YAML + ``type: guardrail_fallback`` entry with the guardrail mapped to + UiPath but disabled. + """ + return Rule( + rule_id="UIP-GR-01", + name="PII guardrail (UiPath-mapped, disabled)", + clause="UiPath-Mapped Guardrail", + hook=LifecycleHook.BEFORE_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition( + operator="guardrail_fallback", + field="", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + ], + action=Action.AUDIT, + message="PII guardrail disabled", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def test_evaluator_dispatches_compensation_for_fired_guardrail(): + """A matched guardrail_fallback rule must trigger request_governance.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) + + called = threading.Event() + captured: dict[str, Any] = {} + + def _spy(**kwargs: Any) -> None: + captured.update(kwargs) + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="contact jane@acme.com", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + audit = evaluator.evaluate(ctx) + + assert called.wait(timeout=1.0), ( + "Expected request_governance to be called on a background thread" + ) + + assert audit.final_action == Action.AUDIT + assert audit.rules_matched == 1 + assert captured["rules"] == [ + { + "ruleId": "UIP-GR-01", + "ruleName": "PII guardrail (UiPath-mapped, disabled)", + "packName": "test_pack", + "validator": "pii_detection", + } + ] + assert captured["data"] == {"content": "contact jane@acme.com"} + assert captured["hook"] == "before_model" + assert captured["trace_id"] == "trace-1" + assert captured["agent_name"] == "agent-x" + assert captured["runtime_id"] == "run-1" + assert isinstance(captured["src_timestamp"], str) + assert "T" in captured["src_timestamp"] + + +def test_evaluator_does_not_dispatch_when_guardrail_is_enabled(): + rule = _guardrail_fallback_rule() + rule.checks[0].conditions[0].value["policy_enabled"] = True # type: ignore[index] + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(rule)) + + called = threading.Event() + + def _spy(**kwargs: Any) -> None: + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) + + assert not called.is_set() + assert audit.rules_matched == 0 + + +def test_evaluator_does_not_dispatch_when_not_mapped_to_uipath(): + rule = _guardrail_fallback_rule() + rule.checks[0].conditions[0].value["mapped_to_uipath"] = False # type: ignore[index] + rule.checks[0].conditions[0].value["policy_enabled"] = False # type: ignore[index] + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(rule)) + + called = threading.Event() + + def _spy(**kwargs: Any) -> None: + called.set() + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", _spy + ): + evaluator.evaluate(ctx) + time.sleep(0.05) + + assert not called.is_set() + + +def test_evaluator_compensation_dispatch_swallows_thread_errors(): + """If request_governance raises, the background thread must absorb it.""" + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator(_build_index_with(_guardrail_fallback_rule())) + + def _raising_spy(**kwargs: Any) -> None: + raise RuntimeError("network down") + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", + _raising_spy, + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) + + assert audit.final_action == Action.AUDIT + assert audit.rules_matched == 1 + + +def test_evaluator_does_not_emit_audit_trace_for_guardrail_fallback_rule(): + """Python must not emit a per-rule audit trace for ``guardrail_fallback``. + + The governance-server emits the trace in response to the + ``/runtime/govern`` POST; emitting one here too would produce a + duplicate. The rule still appears in the AuditRecord (so + ``disabled_guardrails`` can find it) and the compensation thread + still fires — only the per-rule ``rule_evaluation`` event is + suppressed, and the hook summary's counts exclude it. + """ + from uipath.runtime.governance.audit import ( + AuditEvent, + AuditSink, + EventType, + get_audit_manager, + reset_audit_manager, + ) + + class _CapturingSink(AuditSink): + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + reset_audit_manager() + try: + manager = get_audit_manager() + for existing in list(manager.list_sinks()): + manager.unregister_sink(existing) + sink = _CapturingSink() + manager.register_sink(sink) + manager._async_mode = False # synchronous emission for assertions + + set_enforcement_mode(EnforcementMode.AUDIT) + evaluator = GovernanceEvaluator( + _build_index_with(_guardrail_fallback_rule()) + ) + + ctx = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name="agent-x", + runtime_id="run-1", + trace_id="trace-1", + model_input="hi", + ) + + # Stub the network call so it doesn't actually post; we're + # asserting on the Python-emitted trace events, not on whether + # /runtime/govern was reached. + with patch( + "uipath.runtime.governance.native.evaluator.submit_compensation", + lambda **kwargs: None, + ): + audit = evaluator.evaluate(ctx) + time.sleep(0.05) # let the daemon thread land + + # The rule still matched and is in the audit record … + assert audit.rules_matched == 1 + assert any( + ev.matched and ev.rule_id == "UIP-GR-01" for ev in audit.evaluations + ) + + # … but NO rule_evaluation event for it was emitted by Python. + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + assert not any( + e.data.get("rule_id") == "UIP-GR-01" for e in rule_events + ), "guardrail_fallback rule must not emit a Python-side audit trace" + + # The hook summary's counts must also exclude the fallback rule + # (so total_rules / matched_rules match what was actually emitted). + summaries = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(summaries) == 1 + assert summaries[0].data["total_rules"] == 0 + assert summaries[0].data["matched_rules"] == 0 + finally: + reset_audit_manager() + + +# --------------------------------------------------------------------------- +# _resolve_trace_id — must capture the live trace on the caller thread +# (the /govern call later runs on a worker thread with no OTel context). +# --------------------------------------------------------------------------- + + +def test_resolve_trace_id_prefers_env_over_active_span( + monkeypatch: pytest.MonkeyPatch, +): + """UIPATH_TRACE_ID wins over a live span — this is the binding fix. + + The native audit spans are exported under UIPATH_TRACE_ID (the platform + rebinds spans to the agent's run trace), so the server-written + compensation records must land on that same id, not the live OTel + span's id. + """ + from opentelemetry.sdk.trace import TracerProvider + + monkeypatch.setenv("UIPATH_TRACE_ID", "env-trace-0001") + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root"): + assert _resolve_trace_id("fallback-id") == "env-trace-0001" + + +def test_resolve_trace_id_falls_back_to_active_span_when_env_unset( + monkeypatch: pytest.MonkeyPatch, +): + """With UIPATH_TRACE_ID unset, the live span's trace id is used.""" + from opentelemetry.sdk.trace import TracerProvider + + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) + tracer = TracerProvider().get_tracer("test") + with tracer.start_as_current_span("root") as span: + expected = format(span.get_span_context().trace_id, "032x") + result = _resolve_trace_id("fallback-id") + assert result == expected + assert len(result) == 32 # dashless OTel hex, not a dashed uuid + + +def test_resolve_trace_id_uses_fallback_without_context( + monkeypatch: pytest.MonkeyPatch, +): + """With no active span and no UIPATH_TRACE_ID env, fallback wins.""" + # Outside any active span and with the env trace id unset, neither + # source can supply an id, so the fallback must be returned. + monkeypatch.delenv("UIPATH_TRACE_ID", raising=False) + assert _resolve_trace_id("fallback-id") == "fallback-id" + + +def test_submit_compensation_captures_live_trace_before_thread_hop(): + """End-to-end thread-boundary proof for the binding fix. + + ``submit_compensation`` runs on the caller (hook) thread, then hands the + ``/govern`` call to a background worker pool. This test asserts BOTH + halves of why the resolve must happen at the entry: + + 1. On the **worker thread**, the OTel context is gone — resolving there + would miss the live span (so the early capture is mandatory). + 2. Despite that, ``request_governance`` (on the worker) receives the + **live span's** trace id, not the stale fallback we passed in — + proving it was captured on the caller thread before the hop. + """ + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + + done = threading.Event() + captured: dict[str, Any] = {} + + def _spy(**kwargs: Any) -> None: + # This runs on the background worker thread. + captured["trace_id"] = kwargs["trace_id"] + # Prove the worker has NO live context: if we resolved *here*, the + # sentinel would survive untouched. + captured["worker_resolves_to"] = _resolve_trace_id("WORKER-MISS") + done.set() + + with patch.object(guardrail_compensation, "request_governance", _spy): + with tracer.start_as_current_span("agent-run") as span: + expected = format(span.get_span_context().trace_id, "032x") + guardrail_compensation.submit_compensation( + rules=_rules("pii_detection"), + data={"content": "contact jane@acme.com"}, + hook="before_model", + trace_id="stale-fallback", # must be overridden by the live trace + src_timestamp="2026-06-06T00:00:00Z", + agent_name="agent", + runtime_id="rt", + ) + assert done.wait(timeout=2.0), "compensation worker never ran" + + # (1) worker thread could not see the span — fell back to the sentinel + assert captured["worker_resolves_to"] == "WORKER-MISS" + # (2) but the value it received is the live span trace, captured pre-hop + assert captured["trace_id"] == expected + assert captured["trace_id"] != "stale-fallback" diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py new file mode 100644 index 0000000..e163932 --- /dev/null +++ b/tests/test_text_extraction.py @@ -0,0 +1,307 @@ +"""Tests for ``_extract_governable_text`` content extraction. + +Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent`` +and ``_check_after_agent``. Pulls clean text out of structured shapes +(dicts, list-of-blocks, pydantic models) instead of letting dict-repr +noise leak into the regex-scanned blob. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import pytest + +# The wrapper lands in a later slice of the governance stack; skip (don't +# error at collection) when it isn't present yet. +_wrapper = pytest.importorskip( + "uipath.runtime.governance.wrapper", + reason="governance wrapper not yet present in this slice", +) +_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP +_extract_governable_text = _wrapper._extract_governable_text + + +def test_plain_string_passes_through() -> None: + assert _extract_governable_text("hello world") == "hello world" + + +def test_none_returns_empty() -> None: + assert _extract_governable_text(None) == "" + + +def test_dict_with_content_key_extracts_content_first() -> None: + """The classic coded-agent output shape — content comes through clean.""" + out = _extract_governable_text( + {"content": "Estimated cost: $780", "_meta": {"id": "abc"}} + ) + assert out.startswith("Estimated cost: $780") + # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``. + assert "{'content'" not in out + assert "'_meta'" not in out + + +def test_dict_priority_keys_lead() -> None: + """``content`` / ``text`` / etc. lead before remaining keys.""" + out = _extract_governable_text( + {"trailing_meta": "noise-meta", "content": "primary-text"} + ) + assert out.index("primary-text") < out.index("noise-meta") + + +def test_list_of_text_blocks_concatenates() -> None: + """Anthropic-style content blocks.""" + out = _extract_governable_text( + [ + {"type": "text", "text": "first part"}, + {"type": "image", "source": {"data": "..."}}, + {"type": "text", "text": "second part"}, + ] + ) + assert "first part" in out + assert "second part" in out + + +def test_openai_function_call_shape_extracts_arguments() -> None: + """``arguments`` field on OpenAI-style function-call blocks.""" + out = _extract_governable_text( + [ + { + "type": "function_call", + "name": "end_execution", + "arguments": '{"content":"Cost: $1,200"}', + "id": "fc_abc", + } + ] + ) + assert "Cost: $1,200" in out + + +def test_numeric_scalars_are_skipped() -> None: + """Numbers / booleans aren't governance text — they shouldn't pad the blob.""" + out = _extract_governable_text( + {"content": "hello", "count": 42, "ok": True, "rate": 3.14} + ) + assert out == "hello" + + +def test_pydantic_like_model_dump_is_walked() -> None: + """Anything with ``model_dump()`` is walked as its dict form.""" + + class Stub: + def model_dump(self) -> dict: + return {"content": "from pydantic"} + + assert _extract_governable_text(Stub()) == "from pydantic" + + +def test_dataclass_via_dict_method() -> None: + """Objects exposing a ``dict()`` callable also walk via that path.""" + + class Stub: + def dict(self) -> dict: + return {"content": "from dict"} + + assert _extract_governable_text(Stub()) == "from dict" + + +def test_plain_object_attribute_fallback() -> None: + """Public attributes on opaque objects feed the walker.""" + + @dataclass + class Result: + content: str + _private: str = "ignored" + + out = _extract_governable_text(Result(content="visible")) + assert "visible" in out + assert "ignored" not in out + + +def test_cycle_in_structure_does_not_recurse_forever() -> None: + a: dict = {"content": "outer"} + b: dict = {"loop": a} + a["loop"] = b + # Should return without recursing infinitely. + out = _extract_governable_text(a) + assert "outer" in out + + +def test_text_is_capped_at_budget() -> None: + """Long content is truncated so a runaway payload can't dominate scans.""" + big = "x" * (_GOVERNANCE_TEXT_CAP + 1000) + out = _extract_governable_text(big) + assert len(out) == _GOVERNANCE_TEXT_CAP + + +def test_nested_dict_content_extracted() -> None: + """LangGraph-style state with messages nested under a key.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "Cost: $50"}, + ] + } + ) + assert "Cost: $50" in out + + +def test_unknown_block_type_with_no_text_returns_empty() -> None: + """Image-only block with no text payload contributes nothing.""" + out = _extract_governable_text( + [{"type": "image", "source": {"type": "base64", "data": "..."}}] + ) + # Could be empty or contain just the base64 data — but should NOT + # contain Python dict syntax characters that the old path emitted. + assert "{'type'" not in out + + +# --------------------------------------------------------------------------- +# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat). +# --------------------------------------------------------------------------- + + +def test_budget_cap_is_64k() -> None: + """Documents the cap so a future drop won't go unnoticed.""" + assert _GOVERNANCE_TEXT_CAP == 64000 + + +# --------------------------------------------------------------------------- +# Reverse list iteration — latest entry gets the budget first. +# --------------------------------------------------------------------------- + + +def test_lists_are_walked_in_reverse() -> None: + """Latest list entry leads the extracted blob. + + Critical for chat history: the new user message lives at the end of + the messages list and must be visible even when prior turns would + otherwise fill the budget first. + """ + out = _extract_governable_text( + [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}] + ) + assert out.index("latest") < out.index("middle") < out.index("earliest") + + +def test_long_chat_history_keeps_latest_user_message() -> None: + """A long history must not push the latest message out of the budget. + + Regression for the prior 8K-cap + forward-walk combination, which + silently dropped the latest user message once the conversation + grew past ~7,800 chars of prior content. + """ + bulky_prior = "x" * 2000 + messages = [{"role": "user", "content": bulky_prior}] * 40 # ~80K chars + messages.append({"role": "user", "content": "Cost: $1,200 — latest"}) + + out = _extract_governable_text({"messages": messages}) + assert "Cost: $1,200 — latest" in out + + +# --------------------------------------------------------------------------- +# latest_only — BEFORE_AGENT in a conversational agent +# --------------------------------------------------------------------------- + + +def test_latest_only_extracts_just_the_last_list_item() -> None: + """``latest_only=True`` drops every list entry but the last one.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old message"}, + {"role": "assistant", "content": "old response"}, + {"role": "user", "content": "Cost: $1,200"}, + ] + }, + latest_only=True, + ) + assert "Cost: $1,200" in out + assert "old message" not in out + assert "old response" not in out + + +def test_latest_only_resets_inside_chosen_item() -> None: + """Multi-block content inside the latest message is still walked fully. + + ``latest_only`` reduces the OUTER list (chat history) to its last + entry, but multi-block content (text + tool_call + thinking) + inside that latest message must still be extracted in full — + otherwise we'd lose answer text that arrives in a non-final block. + """ + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "part A"}, + { + "type": "function_call", + "arguments": '{"answer":"part B"}', + }, + ], + }, + ] + }, + latest_only=True, + ) + assert "part A" in out + assert "part B" in out + assert "old" not in out + + +def test_latest_only_top_level_list() -> None: + """``latest_only`` applies when the input itself is a list.""" + out = _extract_governable_text( + [ + {"content": "history item 1"}, + {"content": "history item 2"}, + {"content": "latest input"}, + ], + latest_only=True, + ) + assert "latest input" in out + assert "history item 1" not in out + assert "history item 2" not in out + + +def test_latest_only_default_false_still_walks_all() -> None: + """Default behavior unchanged — AFTER_AGENT etc. still see everything.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "first"}, + {"role": "user", "content": "second"}, + ] + } + ) + assert "first" in out + assert "second" in out + + +def test_latest_only_empty_list_is_empty() -> None: + """Empty history → empty extraction.""" + assert _extract_governable_text({"messages": []}, latest_only=True) == "" + + +def test_messages_is_a_priority_content_key() -> None: + """``messages`` (plural) leads ahead of non-priority keys. + + Without ``messages`` in the priority list, an input that also + carries siblings like ``thread_id`` / ``metadata`` could siphon + budget before the actual chat history is walked. + """ + out = _extract_governable_text( + { + "thread_id": "abc-xyz", + "metadata": {"foo": "bar"}, + "messages": [{"role": "user", "content": "primary content"}], + } + ) + assert "primary content" in out + assert out.index("primary content") < ( + out.find("abc-xyz") if "abc-xyz" in out else len(out) + ) diff --git a/tests/test_traces_severity.py b/tests/test_traces_severity.py new file mode 100644 index 0000000..9dfc676 --- /dev/null +++ b/tests/test_traces_severity.py @@ -0,0 +1,227 @@ +"""Tests for trace-span verbosity / status semantics. + +``TracesAuditSink`` emits an OpenTelemetry span for every governance +hook end and every rule evaluation. The contract: + +- Matched non-allow rules carry a ``verbosityLevel`` span attribute + (UiPath Orchestrator log levels: 3=Warning, 4=Error). Platform default + is 2 (Information); we only emit this attribute when a violation + warrants Warning or Error. OTel ``StatusCode`` only has OK / ERROR / + UNSET, so verbosityLevel is the channel that distinguishes + "audit-mode advisory violation" from "actually blocked the agent". +- ``verbosityLevel = 4`` (Error) and ``StatusCode.ERROR`` fire **only** + when the runtime actually blocked the agent — enforce mode AND the + rule's action is ``deny`` or ``escalate``. +- ``verbosityLevel = 3`` (Warning) and ``Status.UNSET`` for advisory + violations — audit mode (any non-allow action), or audit-action rules + even in enforce mode. The agent didn't fail; surfacing Status.ERROR + would falsely paint a successful run as a failure. +- Hook spans never set Status, regardless of enforcement mode or + final_action. They're summary containers; verbosityLevel belongs on + the individual rule span that fired. +- ``allow`` actions and unmatched evaluations leave Status at UNSET and + do not emit a verbosityLevel attribute (platform default applies). +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from tests._helpers import reset_enforcement_mode +from uipath.runtime.governance.audit.base import AuditEvent, EventType +from uipath.runtime.governance.audit.traces import TracesAuditSink +from uipath.runtime.governance.config import ( + EnforcementMode, + set_enforcement_mode, +) + + +@pytest.fixture +def captured_span(monkeypatch: pytest.MonkeyPatch) -> MagicMock: + """Wire ``TracesAuditSink`` to a mock tracer and return the span mock.""" + span = MagicMock(name="span") + tracer = MagicMock(name="tracer") + tracer.start_as_current_span.return_value.__enter__.return_value = span + tracer.start_as_current_span.return_value.__exit__.return_value = False + monkeypatch.setattr(TracesAuditSink, "_get_tracer", lambda self: tracer) + return span + + +@pytest.fixture(autouse=True) +def _reset_mode() -> None: + """Each test selects its own enforcement mode explicitly.""" + reset_enforcement_mode() + yield + reset_enforcement_mode() + + +def _hook_event(final_action: str, mode: str = "audit") -> AuditEvent: + return AuditEvent( + event_type=EventType.HOOK_END, + agent_name="agent", + hook="after_model", + data={ + "total_rules": 1, + "matched_rules": 1 if final_action != "allow" else 0, + "final_action": final_action, + "enforcement_mode": mode, + }, + ) + + +def _rule_event(matched: bool, action: str) -> AuditEvent: + return AuditEvent( + event_type=EventType.RULE_EVALUATION, + agent_name="agent", + hook="after_model", + data={ + "rule_id": "A.10.4", + "rule_name": "commitment-language", + "pack_name": "iso42001", + "matched": matched, + "action": action, + "status": "MATCHED" if matched else "PASS", + "detail": "Customer-binding commitment detected.", + }, + ) + + +def _span_attrs(span: MagicMock) -> dict[str, object]: + """Return a mapping of attribute name → value for set_attribute calls.""" + attrs: dict[str, object] = {} + for call in span.set_attribute.call_args_list: + key, value = call.args + attrs[key] = value + return attrs + + +# --------------------------------------------------------------------------- +# Hook span — never marked ERROR +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "final_action,mode", + [ + ("deny", "enforce"), + ("deny", "audit"), + ("audit", "audit"), + ("escalate", "audit"), + ("allow", "audit"), + ], +) +def test_hook_span_never_sets_error( + captured_span: MagicMock, final_action: str, mode: str +) -> None: + """Hook spans are summary containers — they never carry an ERROR Status.""" + sink = TracesAuditSink() + sink.emit(_hook_event(final_action=final_action, mode=mode)) + assert not captured_span.set_status.called, ( + f"Hook span should never set_status; called with " + f"final_action={final_action!r}, mode={mode!r}" + ) + + +# --------------------------------------------------------------------------- +# Rule span — enforce-mode actually-blocking violations +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("action", ["deny", "escalate"]) +def test_enforce_mode_blocking_violation_is_error( + captured_span: MagicMock, action: str +) -> None: + """Enforce mode + deny/escalate = real failure → verbosityLevel=4 + Status.ERROR.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action=action)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 4 + assert "severity" not in attrs + assert "governance.severity" not in attrs + + assert captured_span.set_status.called, ( + f"Status.ERROR must fire for enforce-mode {action} violation" + ) + (status_arg,) = captured_span.set_status.call_args.args + from opentelemetry.trace import Status, StatusCode + + assert isinstance(status_arg, Status) + assert status_arg.status_code is StatusCode.ERROR + assert "commitment-language" in status_arg.description + assert action in status_arg.description + + +# --------------------------------------------------------------------------- +# Rule span — advisory violations (audit mode, or audit-action rules) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("action", ["deny", "audit", "escalate"]) +def test_audit_mode_violation_is_warning( + captured_span: MagicMock, action: str +) -> None: + """Audit mode never blocks → verbosityLevel=3, Status.UNSET. + + Surfacing Status.ERROR for an audit-mode violation would falsely + mark the agent's run as failed when the runtime intentionally + let it through. + """ + set_enforcement_mode(EnforcementMode.AUDIT) + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action=action)) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 3 + assert "severity" not in attrs + assert "governance.severity" not in attrs + + assert not captured_span.set_status.called, ( + f"Audit-mode {action} violation must NOT set Status.ERROR" + ) + + +def test_enforce_mode_audit_action_is_warning(captured_span: MagicMock) -> None: + """Enforce mode + action=audit is still advisory → verbosityLevel=3. + + An ``audit`` action means "log this match but don't block" even + when the policy is in enforce mode. The runtime doesn't block; + verbosity stays Warning. + """ + set_enforcement_mode(EnforcementMode.ENFORCE) + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="audit")) + + attrs = _span_attrs(captured_span) + assert attrs.get("verbosityLevel") == 3 + assert not captured_span.set_status.called + + +# --------------------------------------------------------------------------- +# Rule span — no violation, no verbosityLevel attribute (platform default = 2) +# --------------------------------------------------------------------------- + + +def test_unmatched_rule_no_verbosity_no_error(captured_span: MagicMock) -> None: + """Unmatched evaluations are quiet: no verbosityLevel attr, no Status.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + sink = TracesAuditSink() + sink.emit(_rule_event(matched=False, action="deny")) + + attrs = _span_attrs(captured_span) + assert "verbosityLevel" not in attrs + assert not captured_span.set_status.called + + +def test_matched_allow_action_no_verbosity(captured_span: MagicMock) -> None: + """A rule whose action is 'allow' is an explicit non-violation.""" + set_enforcement_mode(EnforcementMode.ENFORCE) + sink = TracesAuditSink() + sink.emit(_rule_event(matched=True, action="allow")) + + attrs = _span_attrs(captured_span) + assert "verbosityLevel" not in attrs + assert not captured_span.set_status.called