Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion src/uipath/_cli/_evals/_progress_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from uipath.eval.models import EvalItemResult, ScoreType
from uipath.eval.models.evaluation_set import EvaluationItem
from uipath.eval.runtime.events import (
AgentExecutionCompletedEvent,
EvalRunCreatedEvent,
EvalRunUpdatedEvent,
EvalSetRunCreatedEvent,
Expand All @@ -47,6 +48,7 @@ class EvaluationStatus(IntEnum):
IN_PROGRESS = 1
COMPLETED = 2
FAILED = 3
WORKLOAD_EXECUTING = 4


class StudioWebProgressItem(BaseModel):
Expand Down Expand Up @@ -725,6 +727,50 @@ async def handle_create_eval_run(self, payload: EvalRunCreatedEvent) -> None:
except Exception as e:
self._format_error_message(e, "StudioWeb create eval run error")

async def handle_agent_execution_completed(
self, payload: AgentExecutionCompletedEvent
) -> None:
"""Handle agent execution completed — move eval run from WorkloadExecuting to Running."""
try:
eval_run_id = self.eval_run_ids.get(payload.execution_id)
if not eval_run_id:
logger.warning(
f"Cannot move eval run to Running: eval_run_id not found for "
f"execution_id={payload.execution_id}"
)
return

is_coded = self.is_coded_eval.get(self.eval_set_execution_id or "", False)

endpoint_suffix = "coded/" if is_coded else ""
spec = RequestSpec(
method="PUT",
endpoint=Endpoint(
f"{self._get_endpoint_prefix()}execution/agents/{self._project_id}/{endpoint_suffix}evalRun"
),
json={
"evalRunId": eval_run_id,
"status": EvaluationStatus.IN_PROGRESS.value,
},
headers=self._tenant_header(),
)

await self._client.request_async(
method=spec.method,
url=spec.endpoint,
params=spec.params,
json=spec.json,
headers=spec.headers,
scoped="org" if self._is_localhost() else "tenant",
)

logger.info(
f"Moved eval_run_id={eval_run_id} from WorkloadExecuting to Running (coded={is_coded})"
)

except Exception as e:
self._format_error_message(e, "StudioWeb agent execution completed error")

async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
try:
logger.info(
Expand Down Expand Up @@ -827,6 +873,10 @@ async def subscribe_to_eval_runtime_events(self, event_bus: EventBus) -> None:
event_bus.subscribe(
EvaluationEvents.CREATE_EVAL_RUN, self.handle_create_eval_run
)
event_bus.subscribe(
EvaluationEvents.AGENT_EXECUTION_COMPLETED,
self.handle_agent_execution_completed,
)
event_bus.subscribe(
EvaluationEvents.UPDATE_EVAL_RUN, self.handle_update_eval_run
)
Expand Down Expand Up @@ -1234,7 +1284,7 @@ def _create_eval_run_spec(
"evalSetRunId": eval_set_run_id,
"evalSnapshot": eval_snapshot,
# Backend expects integer status
"status": EvaluationStatus.IN_PROGRESS.value,
"status": EvaluationStatus.WORKLOAD_EXECUTING.value,
}

# Legacy backend expects payload wrapped in "request" field
Expand Down
8 changes: 8 additions & 0 deletions src/uipath/eval/runtime/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class EvaluationEvents(str, Enum):
CREATE_EVAL_RUN = "create_eval_run"
UPDATE_EVAL_SET_RUN = "update_eval_set_run"
UPDATE_EVAL_RUN = "update_eval_run"
AGENT_EXECUTION_COMPLETED = "agent_execution_completed"


class EvalSetRunCreatedEvent(BaseModel):
Expand Down Expand Up @@ -80,9 +81,16 @@ class EvalSetRunUpdatedEvent(BaseModel):
success: bool = True


class AgentExecutionCompletedEvent(BaseModel):
"""Event emitted when agent execution completes and evaluation/scoring is about to begin."""

execution_id: str


ProgressEvent = Union[
EvalSetRunCreatedEvent,
EvalRunCreatedEvent,
EvalRunUpdatedEvent,
EvalSetRunUpdatedEvent,
AgentExecutionCompletedEvent,
]
9 changes: 9 additions & 0 deletions src/uipath/eval/runtime/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
from ._utils import apply_input_overrides
from .context import UiPathEvalContext
from .events import (
AgentExecutionCompletedEvent,
EvalItemExceptionDetails,
EvalRunCreatedEvent,
EvalRunUpdatedEvent,
Expand Down Expand Up @@ -536,6 +537,14 @@ async def _execute_eval(
# The evaluation will be completed when resumed
return evaluation_run_results

# Agent execution completed — move eval run from WorkloadExecuting to Running
await self.event_bus.publish(
EvaluationEvents.AGENT_EXECUTION_COMPLETED,
AgentExecutionCompletedEvent(
execution_id=execution_id,
),
)

if self.context.verbose:
evaluation_run_results.agent_execution_output = (
convert_eval_execution_output_to_serializable(
Expand Down