@VisibleForTesting Optional<RunState> getState(WorkflowInstance workflowInstance) { return stateManager.getActiveState(workflowInstance); }
/** * Like {@link #receive(Event)} but ignoring the {@link IsClosedException} exception. * * @param event The event to receive */ default void receiveIgnoreClosed(Event event) { try { receive(event); } catch (IsClosedException isClosedException) { LOG.info("Ignored event, state receiver closed", isClosedException); } }
public static void emitResourceLimitReachedMessage(StateManager stateManager, RunState runState, List<String> depletedResources) { if (depletedResources.isEmpty()) { throw new IllegalArgumentException(); } final List<String> depletedResourcesOrdered = depletedResources.stream().sorted().collect(toList()); final Message message = Message.info("Resource limit reached for: " + depletedResourcesOrdered); if (!runState.data().message().map(message::equals).orElse(false)) { stateManager.receiveIgnoreClosed(Event.info(runState.workflowInstance(), message), runState.counter()); } } }
state.workflowInstance(), getExecDescription(workflowInstance, state.data()), createExecutionId()); try { stateManager.receive(submitEvent); } catch (IsClosedException isClosedException) { LOG.warn("Could not send 'submit' event", isClosedException); stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (MissingRequiredPropertyException e) { LOG.warn("Failed to prepare execution description for " + state.workflowInstance(), e); stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (IOException e) { try { LOG.error("Failed to retrieve execution description for " + state.workflowInstance(), e); stateManager.receive(Event.runError(state.workflowInstance(), e.getMessage())); } catch (IsClosedException isClosedException) { LOG.warn("Failed to send 'runError' event", isClosedException);
Resource.create(GLOBAL_RESOURCE_ID, concurrency))); final Map<WorkflowInstance, RunState> activeStatesMap = stateManager.getActiveStates(); final List<InstanceState> activeStates = getActiveInstanceStates(activeStatesMap);
@Override public CompletionStage<Void> event(Workflow workflow, Trigger trigger, Instant instant, TriggerParameters parameters) { if (!workflow.configuration().dockerImage().isPresent()) { LOG.warn("{} has no docker image, skipping", workflow.id()); return CompletableFuture.completedFuture(null); } final String parameter = toParameter(workflow.configuration().schedule(), instant); final WorkflowInstance workflowInstance = WorkflowInstance.create(workflow.id(), parameter); try { return stateManager.trigger(workflowInstance, trigger, parameters); } catch (IsClosedException isClosedException) { LOG.warn("State receiver is closed when processing workflow {} for trigger {} at {}", workflow, trigger, instant, isClosedException); return exceptionallyCompletedFuture(isClosedException); } } }
backfill.concurrency() - stateManager.getActiveStatesByTriggerId(backfill.id()).size();
} catch (ResourceNotFoundException e) { LOG.error("Unable to start docker procedure.", e); stateManager.receiveIgnoreClosed(Event.halt(state.workflowInstance())); return; stateManager.receive(submitted, state.counter()); } catch (IsClosedException isClosedException) { LOG.warn("Could not emit 'submitted' event", isClosedException); LOG.error(msg, e); stateManager.receive(Event.runError(state.workflowInstance(), e.getMessage()), state.counter() + 1); } catch (IsClosedException isClosedException) {
@VisibleForTesting synchronized void tryPollPods() { // Fetch pods _before_ fetching all active states final PodList list = client.pods().list(); final Map<WorkflowInstance, RunState> activeStates = stateManager.getActiveStates(); examineRunningWFISandAssociatedPods(activeStates, list); for (Pod pod : list.getItems()) { logEvent(Watcher.Action.MODIFIED, pod, list.getMetadata().getResourceVersion(), true); final Optional<WorkflowInstance> workflowInstance = readPodWorkflowInstance(pod); if (!workflowInstance.isPresent()) { continue; } final RunState runState = activeStates.get(workflowInstance.get()); if (runState != null && isPodRunState(pod, runState)) { emitPodEvents(pod, runState); cleanupWithRunState(workflowInstance.get(), pod, runState); } else { // The pod is stale as we fetched the active states _after_ listing all pods. cleanupWithoutRunState(workflowInstance.get(), pod); } } }
private void sendTimeout(WorkflowInstance workflowInstance, RunState runState) { LOG.info("Found stale state {} since {} for workflow {}; Issuing a timeout", runState.state(), Instant.ofEpochMilli(runState.timestamp()), workflowInstance); stateManager.receiveIgnoreClosed(Event.timeout(workflowInstance), runState.counter()); } }
/** * Like {@link #receive(Event)} but ignoring the {@link IsClosedException} exception. * * @param event The event to receive * @param counter The state counter upon which the event must act upon */ default void receiveIgnoreClosed(Event event, long counter) { try { receive(event, counter); } catch (IsClosedException isClosedException) { LOG.info("Ignored event, state receiver closed", isClosedException); } }
private Optional<RunState> lookupPodRunState(Pod pod, WorkflowInstance workflowInstance) { final Optional<RunState> runStateOpt = stateManager.getActiveState(workflowInstance); if (!runStateOpt.isPresent()) { LOG.debug("Pod event for unknown or inactive workflow instance {}", workflowInstance); return Optional.empty(); } return runStateOpt.filter(runState -> isPodRunState(pod, runState)); }
private void checkStatuses() { LOG.debug("Checking running statuses, {} statuses to check", inFlight.size()); for (String containerId : inFlight.keySet()) { final ContainerInfo containerInfo; try { containerInfo = client.inspectContainer(containerId); } catch (DockerException | InterruptedException e) { LOG.error("Error while reading status from docker", e); continue; } if (containerInfo.state().running() && !started.contains(containerId)) { final WorkflowInstance workflowInstance = inFlight.get(containerId); stateManager.receiveIgnoreClosed(Event.started(workflowInstance)); started.add(containerId); } if (!containerInfo.state().running()) { // Unlike in KubernetesDockerRunner case, where docker_termination_logging is supported, // here we are susceptible to Docker exit code bug, https://github.com/kubernetes/kubernetes/issues/41516. final Optional<Integer> exitCode = Optional.ofNullable(containerInfo.state().exitCode()); final WorkflowInstance workflowInstance = inFlight.remove(containerId); // trigger started event if we didn't see the container in running before if (!started.contains(containerId)) { stateManager.receiveIgnoreClosed(Event.started(workflowInstance)); } else { started.remove(containerId); } stateManager.receiveIgnoreClosed(Event.terminate(workflowInstance, exitCode)); } } }
@VisibleForTesting CompletionStage<Void> receive(Event event) throws IsClosedException { return stateManager.receive(event); }
private void checkRetry(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); if (state.data().retryCost() < MAX_RETRY_COST) { final Optional<Integer> exitCode = state.data().lastExit(); if (shouldFailFast(exitCode)) { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } else { final long delayMillis; if (isMissingDependency(exitCode)) { delayMillis = Duration.ofMinutes(MISSING_DEPS_RETRY_DELAY_MINUTES).toMillis(); } else { delayMillis = retryUtil.calculateDelay(state.data().consecutiveFailures()).toMillis(); } stateManager.receiveIgnoreClosed(Event.retryAfter(workflowInstance, delayMillis)); } } else { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } }
private StatusType eventInjectorHelper(Event event) { try { stateManager.receive(event).toCompletableFuture().get(); } catch (IsClosedException | InterruptedException e) { return INTERNAL_SERVER_ERROR.withReasonPhrase(e.getMessage()); } catch (ExecutionException e) { if (e.getCause() instanceof IllegalArgumentException || e.getCause() instanceof IllegalStateException) { return BAD_REQUEST.withReasonPhrase(e.getCause().getMessage()); } else { return INTERNAL_SERVER_ERROR.withReasonPhrase(e.getMessage()); } } return OK; }
@Override public void transitionInto(RunState state) { switch (state.state()) { case TERMINATED: if (state.data().lastExit().map(v -> v.equals(0)).orElse(false)) { stateManager.receiveIgnoreClosed(Event.success(state.workflowInstance())); } else { checkRetry(state); } break; case FAILED: checkRetry(state); break; default: // do nothing } }
private void emitPodEvents(Pod pod, RunState runState) { final List<Event> events = translate(runState.workflowInstance(), runState, pod, stats); for (int i = 0; i < events.size(); ++i) { final Event event = events.get(i); if (event.accept(new PullImageErrorMatcher())) { stats.recordPullImageError(); } if (EventUtil.name(event).equals("started")) { runState.data().executionId().ifPresent(stats::recordRunning); } try { // TODO: spoofing counter values like this can give unexpected results, e.g. if we emit two events here the // first one might be discarded and the second one accepted. stateManager.receive(event, runState.counter() + i); } catch (IsClosedException isClosedException) { LOG.warn("Could not receive kubernetes event", isClosedException); throw new RuntimeException(isClosedException); } } }
private void examineRunningWFISandAssociatedPods(Map<WorkflowInstance, RunState> activeStates, PodList podList) { final Map<WorkflowInstance, RunState> runningWorkflowInstances = Maps.filterValues(activeStates, runState -> runState.state().equals(RUNNING) && runState.data().executionId().isPresent()); final Set<WorkflowInstance> workflowInstancesForPods = podList.getItems().stream() .map(pod -> pod.getMetadata().getAnnotations()) .filter(Objects::nonNull) .map(annotations -> annotations.get(STYX_WORKFLOW_INSTANCE_ANNOTATION)) .filter(Objects::nonNull) .map(WorkflowInstance::parseKey) .collect(toSet()); // Emit errors for workflow instances that seem to be missing its pod runningWorkflowInstances.forEach((workflowInstance, runState) -> { // Is there a matching pod in the list? Bail. if (workflowInstancesForPods.contains(workflowInstance)) { return; } // The pod list might be stale so explicitly look for a pod using the execution ID. final String executionId = runState.data().executionId().get(); final Pod pod = client.pods().withName(executionId).get(); // We found a pod? Bail. if (pod != null) { return; } // No pod found. Emit an error guarded by the state counter we are basing the error conclusion on. stateManager.receiveIgnoreClosed( Event.runError(workflowInstance, "No pod associated with this instance"), runState.counter()); }); }
private void sendDequeue(InstanceState instanceState, Set<String> resourceIds) { final WorkflowInstance workflowInstance = instanceState.workflowInstance(); final RunState state = instanceState.runState(); if (state.data().tries() == 0) { LOG.info("Executing {}", workflowInstance); } else { LOG.info("Executing {}, retry #{}", workflowInstance, state.data().tries()); } stateManager.receiveIgnoreClosed(Event.dequeue(workflowInstance, resourceIds), instanceState.runState().counter()); }