private boolean isPodRunState(Pod pod, RunState runState) { final String podName = pod.getMetadata().getName(); final Optional<String> executionIdOpt = runState.data().executionId(); if (!executionIdOpt.isPresent()) { LOG.debug("Pod event for state with no current executionId: {}", podName); return false; } final String executionId = executionIdOpt.get(); if (!podName.equals(executionId)) { LOG.debug("Pod event not matching current exec id, current:{} != pod:{}", executionId, podName); return false; } return true; }
private void examineRunningWFISandAssociatedPods(Map<WorkflowInstance, RunState> activeStates, PodList podList) { final Map<WorkflowInstance, RunState> runningWorkflowInstances = Maps.filterValues(activeStates, runState -> runState.state().equals(RUNNING) && runState.data().executionId().isPresent()); final Set<WorkflowInstance> workflowInstancesForPods = podList.getItems().stream() .map(pod -> pod.getMetadata().getAnnotations()) .filter(Objects::nonNull) .map(annotations -> annotations.get(STYX_WORKFLOW_INSTANCE_ANNOTATION)) .filter(Objects::nonNull) .map(WorkflowInstance::parseKey) .collect(toSet()); // Emit errors for workflow instances that seem to be missing its pod runningWorkflowInstances.forEach((workflowInstance, runState) -> { // Is there a matching pod in the list? Bail. if (workflowInstancesForPods.contains(workflowInstance)) { return; } // The pod list might be stale so explicitly look for a pod using the execution ID. final String executionId = runState.data().executionId().get(); final Pod pod = client.pods().withName(executionId).get(); // We found a pod? Bail. if (pod != null) { return; } // No pod found. Emit an error guarded by the state counter we are basing the error conclusion on. stateManager.receiveIgnoreClosed( Event.runError(workflowInstance, "No pod associated with this instance"), runState.counter()); }); }
private void emitPodEvents(Pod pod, RunState runState) { final List<Event> events = translate(runState.workflowInstance(), runState, pod, stats); for (int i = 0; i < events.size(); ++i) { final Event event = events.get(i); if (event.accept(new PullImageErrorMatcher())) { stats.recordPullImageError(); } if (EventUtil.name(event).equals("started")) { runState.data().executionId().ifPresent(stats::recordRunning); } try { // TODO: spoofing counter values like this can give unexpected results, e.g. if we emit two events here the // first one might be discarded and the second one accepted. stateManager.receive(event, runState.counter() + i); } catch (IsClosedException isClosedException) { LOG.warn("Could not receive kubernetes event", isClosedException); throw new RuntimeException(isClosedException); } } }
@VisibleForTesting static String stateInfo(RunState state) { switch (state.state()) { case NEW: case PREPARE: case ERROR: case DONE: return format("tries:%d", state.data().tries()); case SUBMITTED: case RUNNING: case FAILED: return format("tries:%d execId:%s", state.data().tries(), state.data().executionId()); case TERMINATED: return format("tries:%d execId:%s exitCode:%s", state.data().tries(), state.data().executionId(), state.data().lastExit().map( String::valueOf).orElse("-")); case QUEUED: return format("tries:%d delayMs:%s", state.data().tries(), state.data().retryDelayMillis()); default: return ""; } } }
@Override public void printStates(RunStateDataPayload runStateDataPayload) { System.out.println(String.format(" %-20s %-12s %-47s %-7s %s", "WORKFLOW INSTANCE", "STATE", "EXECUTION ID", "TRIES", "PREVIOUS EXECUTION MESSAGE")); CliUtil.groupStates(runStateDataPayload.activeStates()).entrySet().forEach(entry -> { System.out.println(); System.out.println(String.format("%s %s", colored(CYAN, entry.getKey().componentId()), colored(BLUE, entry.getKey().id()))); entry.getValue().forEach(runStateData -> { final StateData stateData = runStateData.stateData(); final Ansi ansiState = getAnsiForState(runStateData); final Message lastMessage = stateData.message().orElse(Message.create(Message.MessageLevel.UNKNOWN, "No info")); final Ansi ansiMessage = colored(messageColor(lastMessage.level()), lastMessage.line()); System.out.println(String.format(" %-20s %-20s %-47s %-7d %s", runStateData.workflowInstance().parameter(), ansiState, stateData.executionId().orElse("<no-execution-id>"), stateData.tries(), ansiMessage)); }); }); }
@Override public void printStates(RunStateDataPayload runStateDataPayload) { SortedMap<WorkflowId, SortedSet<RunStateDataPayload.RunStateData>> groupedStates = CliUtil.groupStates(runStateDataPayload.activeStates()); groupedStates.forEach((workflowId, value) -> value.forEach(RunStateData -> { final StateData stateData = RunStateData.stateData(); System.out.println(String.format( "%s %s %s %s %s %d %s", workflowId.componentId(), workflowId.id(), RunStateData.workflowInstance().parameter(), RunStateData.state(), stateData.executionId().orElse("<no-execution-id>"), stateData.tries(), stateData.message().map(Message::line).orElse("No info") )); })); }
@Override public RunState submitted(WorkflowInstance workflowInstance, String executionId) { switch (state()) { case SUBMITTING: return state( SUBMITTED, data().builder() .tries(data().tries() + 1) // backwards compatibility .executionId(data().executionId().orElse(executionId)) .build()); default: throw illegalTransition("submitted"); } }
case FAILED: case ERROR: if (state.data().executionId().isPresent()) { final String executionId = state.data().executionId().get(); dockerRunner.cleanup(state.workflowInstance(), executionId);
entity.set(PROPERTY_STATE_TRIGGER_ID, TriggerUtil.triggerId(trigger)); }); state.data().executionId().ifPresent(v -> entity.set(PROPERTY_STATE_EXECUTION_ID, v)); if (state.data().triggerParameters().isPresent()) { entity.set(PROPERTY_STATE_TRIGGER_PARAMETERS, jsonValue(state.data().triggerParameters().get()));
private RunSpec createRunSpec(RunState state) throws ResourceNotFoundException { final Optional<ExecutionDescription> executionDescriptionOpt = state.data().executionDescription(); final ExecutionDescription executionDescription = executionDescriptionOpt.orElseThrow( () -> new ResourceNotFoundException("Missing execution description for " + state.workflowInstance())); final String executionId = state.data().executionId().orElseThrow( () -> new ResourceNotFoundException("Missing execution id for " + state.workflowInstance())); final String dockerImage = executionDescription.dockerImage(); final List<String> dockerArgs = executionDescription.dockerArgs(); final String parameter = state.workflowInstance().parameter(); final List<String> command = argsReplace(dockerArgs, parameter); return RunSpec.builder() .executionId(executionId) .imageName(dockerImage) .args(command) .terminationLogging(executionDescription.dockerTerminationLogging()) .secret(executionDescription.secret()) .serviceAccount(executionDescription.serviceAccount()) .trigger(state.data().trigger()) .commitSha(state.data().executionDescription().flatMap(ExecutionDescription::commitSha)) .env(executionDescription.env()) .build(); } }