@VisibleForTesting static String stateInfo(RunState state) { switch (state.state()) { case NEW: case PREPARE: case ERROR: case DONE: return format("tries:%d", state.data().tries()); case SUBMITTED: case RUNNING: case FAILED: return format("tries:%d execId:%s", state.data().tries(), state.data().executionId()); case TERMINATED: return format("tries:%d execId:%s exitCode:%s", state.data().tries(), state.data().executionId(), state.data().lastExit().map( String::valueOf).orElse("-")); case QUEUED: return format("tries:%d delayMs:%s", state.data().tries(), state.data().retryDelayMillis()); default: return ""; } } }
int consecutiveFailures(StateData data, Optional<Integer> exitCode) { return exitCode.map(c -> { switch (c) { case SUCCESS_EXIT_CODE: case MISSING_DEPS_EXIT_CODE: return 0; default: return data.consecutiveFailures() + 1; } }).orElse(data.consecutiveFailures() + 1); }
public static void emitResourceLimitReachedMessage(StateManager stateManager, RunState runState, List<String> depletedResources) { if (depletedResources.isEmpty()) { throw new IllegalArgumentException(); } final List<String> depletedResourcesOrdered = depletedResources.stream().sorted().collect(toList()); final Message message = Message.info("Resource limit reached for: " + depletedResourcesOrdered); if (!runState.data().message().map(message::equals).orElse(false)) { stateManager.receiveIgnoreClosed(Event.info(runState.workflowInstance(), message), runState.counter()); } } }
.set(PROPERTY_STATE, state.state().toString()) .set(PROPERTY_STATE_TIMESTAMP, state.timestamp()) .set(PROPERTY_STATE_TRIES, state.data().tries()) .set(PROPERTY_STATE_CONSECUTIVE_FAILURES, state.data().consecutiveFailures()) .set(PROPERTY_STATE_RETRY_COST, state.data().retryCost()) .set(PROPERTY_STATE_MESSAGES, jsonValue(state.data().messages())); state.data().retryDelayMillis().ifPresent(v -> entity.set(PROPERTY_STATE_RETRY_DELAY_MILLIS, v)); state.data().lastExit().ifPresent(v -> entity.set(PROPERTY_STATE_LAST_EXIT, v)); state.data().trigger().ifPresent(trigger -> { entity.set(PROPERTY_STATE_TRIGGER_TYPE, TriggerUtil.triggerType(trigger)); entity.set(PROPERTY_STATE_TRIGGER_ID, TriggerUtil.triggerId(trigger)); }); state.data().executionId().ifPresent(v -> entity.set(PROPERTY_STATE_EXECUTION_ID, v)); if (state.data().triggerParameters().isPresent()) { entity.set(PROPERTY_STATE_TRIGGER_PARAMETERS, jsonValue(state.data().triggerParameters().get())); if (state.data().executionDescription().isPresent()) { entity.set(PROPERTY_STATE_EXECUTION_DESCRIPTION, jsonValue(state.data().executionDescription().get())); if (state.data().resourceIds().isPresent()) { entity.set(PROPERTY_STATE_RESOURCE_IDS, jsonValue(state.data().resourceIds().get()));
@Override public void printStates(RunStateDataPayload runStateDataPayload) { SortedMap<WorkflowId, SortedSet<RunStateDataPayload.RunStateData>> groupedStates = CliUtil.groupStates(runStateDataPayload.activeStates()); groupedStates.forEach((workflowId, value) -> value.forEach(RunStateData -> { final StateData stateData = RunStateData.stateData(); System.out.println(String.format( "%s %s %s %s %s %d %s", workflowId.componentId(), workflowId.id(), RunStateData.workflowInstance().parameter(), RunStateData.state(), stateData.executionId().orElse("<no-execution-id>"), stateData.tries(), stateData.message().map(Message::line).orElse("No info") )); })); }
@Override public RunState submitted(WorkflowInstance workflowInstance, String executionId) { switch (state()) { case SUBMITTING: return state( SUBMITTED, data().builder() .tries(data().tries() + 1) // backwards compatibility .executionId(data().executionId().orElse(executionId)) .build()); default: throw illegalTransition("submitted"); } }
private RunSpec createRunSpec(RunState state) throws ResourceNotFoundException { final Optional<ExecutionDescription> executionDescriptionOpt = state.data().executionDescription(); final ExecutionDescription executionDescription = executionDescriptionOpt.orElseThrow( () -> new ResourceNotFoundException("Missing execution description for " + state.workflowInstance())); final String executionId = state.data().executionId().orElseThrow( () -> new ResourceNotFoundException("Missing execution id for " + state.workflowInstance())); final String dockerImage = executionDescription.dockerImage(); final List<String> dockerArgs = executionDescription.dockerArgs(); final String parameter = state.workflowInstance().parameter(); final List<String> command = argsReplace(dockerArgs, parameter); return RunSpec.builder() .executionId(executionId) .imageName(dockerImage) .args(command) .terminationLogging(executionDescription.dockerTerminationLogging()) .secret(executionDescription.secret()) .serviceAccount(executionDescription.serviceAccount()) .trigger(state.data().trigger()) .commitSha(state.data().executionDescription().flatMap(ExecutionDescription::commitSha)) .env(executionDescription.env()) .build(); } }
private void checkRetry(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); if (state.data().retryCost() < MAX_RETRY_COST) { final Optional<Integer> exitCode = state.data().lastExit(); if (shouldFailFast(exitCode)) { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } else { final long delayMillis; if (isMissingDependency(exitCode)) { delayMillis = Duration.ofMinutes(MISSING_DEPS_RETRY_DELAY_MINUTES).toMillis(); } else { delayMillis = retryUtil.calculateDelay(state.data().consecutiveFailures()).toMillis(); } stateManager.receiveIgnoreClosed(Event.retryAfter(workflowInstance, delayMillis)); } } else { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } }
private boolean isPodRunState(Pod pod, RunState runState) { final String podName = pod.getMetadata().getName(); final Optional<String> executionIdOpt = runState.data().executionId(); if (!executionIdOpt.isPresent()) { LOG.debug("Pod event for state with no current executionId: {}", podName); return false; } final String executionId = executionIdOpt.get(); if (!podName.equals(executionId)) { LOG.debug("Pod event not matching current exec id, current:{} != pod:{}", executionId, podName); return false; } return true; }
@Override public RunState runError(WorkflowInstance workflowInstance, String message) { switch (state()) { case QUEUED: case SUBMITTING: case SUBMITTED: case RUNNING: case PREPARE: final StateData newStateData = data().builder() .retryCost(data().retryCost() + FAILURE_COST) .lastExit(empty()) .consecutiveFailures(data().consecutiveFailures() + 1) .messages(Message.error(message)) .build(); return state(FAILED, newStateData); default: throw illegalTransition("runError"); } }
@Deprecated @Override public RunState created(WorkflowInstance workflowInstance, String executionId, String dockerImage) { switch (state()) { case PREPARE: case QUEUED: return state( SUBMITTED, // for backwards compatibility data().builder() .executionId(executionId) .executionDescription(ExecutionDescription.forImage(dockerImage)) .tries(data().tries() + 1) .build()); default: throw illegalTransition("created"); } }
@Override public Map<WorkflowInstance, RunState> readActiveStatesByTriggerId(String triggerId) throws IOException { return activeStatesMap.entrySet().stream() .filter((entry) -> triggerId.equals(TriggerUtil.triggerId(entry.getValue().data().trigger().get()))) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); }
private void sendDequeue(InstanceState instanceState, Set<String> resourceIds) { final WorkflowInstance workflowInstance = instanceState.workflowInstance(); final RunState state = instanceState.runState(); if (state.data().tries() == 0) { LOG.info("Executing {}", workflowInstance); } else { LOG.info("Executing {}, retry #{}", workflowInstance, state.data().tries()); } stateManager.receiveIgnoreClosed(Event.dequeue(workflowInstance, resourceIds), instanceState.runState().counter()); }
@Override public RunState terminate(WorkflowInstance workflowInstance, Optional<Integer> exitCode) { switch (state()) { case RUNNING: final double cost = exitCost(exitCode); final int consecutiveFailures = consecutiveFailures(data(), exitCode); final MessageLevel level = messageLevel(exitCode); final StateData newStateData = data().builder() .retryCost(data().retryCost() + cost) .lastExit(exitCode) .consecutiveFailures(consecutiveFailures) .messages(Message.create(level, "Exit code: " + exitCode.map(String::valueOf).orElse("-"))) .build(); return state(TERMINATED, newStateData); default: throw illegalTransition("terminate"); } }
private boolean shouldExecute(RunState runState) { if (runState.state() != State.QUEUED) { return false; } final Instant now = time.get(); final Instant deadline = Instant .ofEpochMilli(runState.timestamp()) .plusMillis(runState.data().retryDelayMillis().orElse(0L)); return !deadline.isAfter(now); }
private void updateResourceCounters(StorageTransaction tx, Event event, RunState currentRunState, RunState nextRunState) throws IOException { // increment counters if event is dequeue if (isDequeue(event) && nextRunState.data().resourceIds().isPresent()) { tryUpdatingCounter(currentRunState, tx, nextRunState.data().resourceIds().get()); } // decrement counters if transitioning from a state that consumes resources // to a state that doesn't consume any resources if (isConsumingResources(currentRunState.state()) && !isConsumingResources(nextRunState.state())) { if (nextRunState.data().resourceIds().isPresent()) { for (String resource : nextRunState.data().resourceIds().get()) { tx.updateCounter(shardedCounter, resource, -1); } } else { log.error("Resource ids are missing for {} when transitioning from {} to {}.", nextRunState.workflowInstance(), currentRunState, nextRunState); } } }
case SUBMITTED: try { Preconditions.checkArgument(state.data().executionDescription().isPresent()); final ExecutionDescription executionDescription = state.data().executionDescription().get(); final String type = "deployed"; try { Preconditions.checkArgument(state.data().executionDescription().isPresent()); final ExecutionDescription executionDescription = state.data().executionDescription().get();
@Override public void printStates(RunStateDataPayload runStateDataPayload) { System.out.println(String.format(" %-20s %-12s %-47s %-7s %s", "WORKFLOW INSTANCE", "STATE", "EXECUTION ID", "TRIES", "PREVIOUS EXECUTION MESSAGE")); CliUtil.groupStates(runStateDataPayload.activeStates()).entrySet().forEach(entry -> { System.out.println(); System.out.println(String.format("%s %s", colored(CYAN, entry.getKey().componentId()), colored(BLUE, entry.getKey().id()))); entry.getValue().forEach(runStateData -> { final StateData stateData = runStateData.stateData(); final Ansi ansiState = getAnsiForState(runStateData); final Message lastMessage = stateData.message().orElse(Message.create(Message.MessageLevel.UNKNOWN, "No info")); final Ansi ansiMessage = colored(messageColor(lastMessage.level()), lastMessage.line()); System.out.println(String.format(" %-20s %-20s %-47s %-7d %s", runStateData.workflowInstance().parameter(), ansiState, stateData.executionId().orElse("<no-execution-id>"), stateData.tries(), ansiMessage)); }); }); }
private void examineRunningWFISandAssociatedPods(Map<WorkflowInstance, RunState> activeStates, PodList podList) { final Map<WorkflowInstance, RunState> runningWorkflowInstances = Maps.filterValues(activeStates, runState -> runState.state().equals(RUNNING) && runState.data().executionId().isPresent()); final Set<WorkflowInstance> workflowInstancesForPods = podList.getItems().stream() .map(pod -> pod.getMetadata().getAnnotations()) .filter(Objects::nonNull) .map(annotations -> annotations.get(STYX_WORKFLOW_INSTANCE_ANNOTATION)) .filter(Objects::nonNull) .map(WorkflowInstance::parseKey) .collect(toSet()); // Emit errors for workflow instances that seem to be missing its pod runningWorkflowInstances.forEach((workflowInstance, runState) -> { // Is there a matching pod in the list? Bail. if (workflowInstancesForPods.contains(workflowInstance)) { return; } // The pod list might be stale so explicitly look for a pod using the execution ID. final String executionId = runState.data().executionId().get(); final Pod pod = client.pods().withName(executionId).get(); // We found a pod? Bail. if (pod != null) { return; } // No pod found. Emit an error guarded by the state counter we are basing the error conclusion on. stateManager.receiveIgnoreClosed( Event.runError(workflowInstance, "No pod associated with this instance"), runState.counter()); }); }