public static void emitResourceLimitReachedMessage(StateManager stateManager, RunState runState, List<String> depletedResources) { if (depletedResources.isEmpty()) { throw new IllegalArgumentException(); } final List<String> depletedResourcesOrdered = depletedResources.stream().sorted().collect(toList()); final Message message = Message.info("Resource limit reached for: " + depletedResourcesOrdered); if (!runState.data().message().map(message::equals).orElse(false)) { stateManager.receiveIgnoreClosed(Event.info(runState.workflowInstance(), message), runState.counter()); } } }
private void sendTimeout(WorkflowInstance workflowInstance, RunState runState) { LOG.info("Found stale state {} since {} for workflow {}; Issuing a timeout", runState.state(), Instant.ofEpochMilli(runState.timestamp()), workflowInstance); stateManager.receiveIgnoreClosed(Event.timeout(workflowInstance), runState.counter()); } }
private void checkStatuses() { LOG.debug("Checking running statuses, {} statuses to check", inFlight.size()); for (String containerId : inFlight.keySet()) { final ContainerInfo containerInfo; try { containerInfo = client.inspectContainer(containerId); } catch (DockerException | InterruptedException e) { LOG.error("Error while reading status from docker", e); continue; } if (containerInfo.state().running() && !started.contains(containerId)) { final WorkflowInstance workflowInstance = inFlight.get(containerId); stateManager.receiveIgnoreClosed(Event.started(workflowInstance)); started.add(containerId); } if (!containerInfo.state().running()) { // Unlike in KubernetesDockerRunner case, where docker_termination_logging is supported, // here we are susceptible to Docker exit code bug, https://github.com/kubernetes/kubernetes/issues/41516. final Optional<Integer> exitCode = Optional.ofNullable(containerInfo.state().exitCode()); final WorkflowInstance workflowInstance = inFlight.remove(containerId); // trigger started event if we didn't see the container in running before if (!started.contains(containerId)) { stateManager.receiveIgnoreClosed(Event.started(workflowInstance)); } else { started.remove(containerId); } stateManager.receiveIgnoreClosed(Event.terminate(workflowInstance, exitCode)); } } }
private void checkRetry(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); if (state.data().retryCost() < MAX_RETRY_COST) { final Optional<Integer> exitCode = state.data().lastExit(); if (shouldFailFast(exitCode)) { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } else { final long delayMillis; if (isMissingDependency(exitCode)) { delayMillis = Duration.ofMinutes(MISSING_DEPS_RETRY_DELAY_MINUTES).toMillis(); } else { delayMillis = retryUtil.calculateDelay(state.data().consecutiveFailures()).toMillis(); } stateManager.receiveIgnoreClosed(Event.retryAfter(workflowInstance, delayMillis)); } } else { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } }
@Override public void transitionInto(RunState state) { switch (state.state()) { case TERMINATED: if (state.data().lastExit().map(v -> v.equals(0)).orElse(false)) { stateManager.receiveIgnoreClosed(Event.success(state.workflowInstance())); } else { checkRetry(state); } break; case FAILED: checkRetry(state); break; default: // do nothing } }
private void examineRunningWFISandAssociatedPods(Map<WorkflowInstance, RunState> activeStates, PodList podList) { final Map<WorkflowInstance, RunState> runningWorkflowInstances = Maps.filterValues(activeStates, runState -> runState.state().equals(RUNNING) && runState.data().executionId().isPresent()); final Set<WorkflowInstance> workflowInstancesForPods = podList.getItems().stream() .map(pod -> pod.getMetadata().getAnnotations()) .filter(Objects::nonNull) .map(annotations -> annotations.get(STYX_WORKFLOW_INSTANCE_ANNOTATION)) .filter(Objects::nonNull) .map(WorkflowInstance::parseKey) .collect(toSet()); // Emit errors for workflow instances that seem to be missing its pod runningWorkflowInstances.forEach((workflowInstance, runState) -> { // Is there a matching pod in the list? Bail. if (workflowInstancesForPods.contains(workflowInstance)) { return; } // The pod list might be stale so explicitly look for a pod using the execution ID. final String executionId = runState.data().executionId().get(); final Pod pod = client.pods().withName(executionId).get(); // We found a pod? Bail. if (pod != null) { return; } // No pod found. Emit an error guarded by the state counter we are basing the error conclusion on. stateManager.receiveIgnoreClosed( Event.runError(workflowInstance, "No pod associated with this instance"), runState.counter()); }); }
private void sendDequeue(InstanceState instanceState, Set<String> resourceIds) { final WorkflowInstance workflowInstance = instanceState.workflowInstance(); final RunState state = instanceState.runState(); if (state.data().tries() == 0) { LOG.info("Executing {}", workflowInstance); } else { LOG.info("Executing {}, retry #{}", workflowInstance, state.data().tries()); } stateManager.receiveIgnoreClosed(Event.dequeue(workflowInstance, resourceIds), instanceState.runState().counter()); }
stateManager.receiveIgnoreClosed( Event.runError(instanceState.workflowInstance(), String.format("Referenced resources not found: %s", unknownResources)), stateManager.receiveIgnoreClosed(Event.retryAfter(instanceState.workflowInstance(), blocker.get().delay().toMillis()), instanceState.runState().counter());
stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (MissingRequiredPropertyException e) { LOG.warn("Failed to prepare execution description for " + state.workflowInstance(), e); stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (IOException e) { try {
} catch (ResourceNotFoundException e) { LOG.error("Unable to start docker procedure.", e); stateManager.receiveIgnoreClosed(Event.halt(state.workflowInstance())); return;