diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py index 7c5114516..b83eb954a 100644 --- a/src/uipath/_cli/_evals/_progress_reporter.py +++ b/src/uipath/_cli/_evals/_progress_reporter.py @@ -30,6 +30,7 @@ from uipath.eval.models import EvalItemResult, ScoreType from uipath.eval.models.evaluation_set import EvaluationItem from uipath.eval.runtime.events import ( + AgentExecutionCompletedEvent, EvalRunCreatedEvent, EvalRunUpdatedEvent, EvalSetRunCreatedEvent, @@ -47,6 +48,7 @@ class EvaluationStatus(IntEnum): IN_PROGRESS = 1 COMPLETED = 2 FAILED = 3 + WORKLOAD_EXECUTING = 4 class StudioWebProgressItem(BaseModel): @@ -725,6 +727,50 @@ async def handle_create_eval_run(self, payload: EvalRunCreatedEvent) -> None: except Exception as e: self._format_error_message(e, "StudioWeb create eval run error") + async def handle_agent_execution_completed( + self, payload: AgentExecutionCompletedEvent + ) -> None: + """Handle agent execution completed — move eval run from WorkloadExecuting to Running.""" + try: + eval_run_id = self.eval_run_ids.get(payload.execution_id) + if not eval_run_id: + logger.warning( + f"Cannot move eval run to Running: eval_run_id not found for " + f"execution_id={payload.execution_id}" + ) + return + + is_coded = self.is_coded_eval.get(self.eval_set_execution_id or "", False) + + endpoint_suffix = "coded/" if is_coded else "" + spec = RequestSpec( + method="PUT", + endpoint=Endpoint( + f"{self._get_endpoint_prefix()}execution/agents/{self._project_id}/{endpoint_suffix}evalRun" + ), + json={ + "evalRunId": eval_run_id, + "status": EvaluationStatus.IN_PROGRESS.value, + }, + headers=self._tenant_header(), + ) + + await self._client.request_async( + method=spec.method, + url=spec.endpoint, + params=spec.params, + json=spec.json, + headers=spec.headers, + scoped="org" if self._is_localhost() else "tenant", + ) + + logger.info( + f"Moved eval_run_id={eval_run_id} from WorkloadExecuting to Running (coded={is_coded})" + ) + + except Exception as e: + self._format_error_message(e, "StudioWeb agent execution completed error") + async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None: try: logger.info( @@ -827,6 +873,10 @@ async def subscribe_to_eval_runtime_events(self, event_bus: EventBus) -> None: event_bus.subscribe( EvaluationEvents.CREATE_EVAL_RUN, self.handle_create_eval_run ) + event_bus.subscribe( + EvaluationEvents.AGENT_EXECUTION_COMPLETED, + self.handle_agent_execution_completed, + ) event_bus.subscribe( EvaluationEvents.UPDATE_EVAL_RUN, self.handle_update_eval_run ) @@ -1234,7 +1284,7 @@ def _create_eval_run_spec( "evalSetRunId": eval_set_run_id, "evalSnapshot": eval_snapshot, # Backend expects integer status - "status": EvaluationStatus.IN_PROGRESS.value, + "status": EvaluationStatus.WORKLOAD_EXECUTING.value, } # Legacy backend expects payload wrapped in "request" field diff --git a/src/uipath/eval/runtime/events.py b/src/uipath/eval/runtime/events.py index 589f82ba7..2ccc5627e 100644 --- a/src/uipath/eval/runtime/events.py +++ b/src/uipath/eval/runtime/events.py @@ -19,6 +19,7 @@ class EvaluationEvents(str, Enum): CREATE_EVAL_RUN = "create_eval_run" UPDATE_EVAL_SET_RUN = "update_eval_set_run" UPDATE_EVAL_RUN = "update_eval_run" + AGENT_EXECUTION_COMPLETED = "agent_execution_completed" class EvalSetRunCreatedEvent(BaseModel): @@ -80,9 +81,16 @@ class EvalSetRunUpdatedEvent(BaseModel): success: bool = True +class AgentExecutionCompletedEvent(BaseModel): + """Event emitted when agent execution completes and evaluation/scoring is about to begin.""" + + execution_id: str + + ProgressEvent = Union[ EvalSetRunCreatedEvent, EvalRunCreatedEvent, EvalRunUpdatedEvent, EvalSetRunUpdatedEvent, + AgentExecutionCompletedEvent, ] diff --git a/src/uipath/eval/runtime/runtime.py b/src/uipath/eval/runtime/runtime.py index 4ed305f1d..680e961d9 100644 --- a/src/uipath/eval/runtime/runtime.py +++ b/src/uipath/eval/runtime/runtime.py @@ -85,6 +85,7 @@ from ._utils import apply_input_overrides from .context import UiPathEvalContext from .events import ( + AgentExecutionCompletedEvent, EvalItemExceptionDetails, EvalRunCreatedEvent, EvalRunUpdatedEvent, @@ -536,6 +537,14 @@ async def _execute_eval( # The evaluation will be completed when resumed return evaluation_run_results + # Agent execution completed — move eval run from WorkloadExecuting to Running + await self.event_bus.publish( + EvaluationEvents.AGENT_EXECUTION_COMPLETED, + AgentExecutionCompletedEvent( + execution_id=execution_id, + ), + ) + if self.context.verbose: evaluation_run_results.agent_execution_output = ( convert_eval_execution_output_to_serializable(