private void verifyCounter(Event event, long expectedCounter, RunState currentRunState) { if (expectedCounter == Long.MAX_VALUE) { return; } final long currentCounter = currentRunState.counter(); if (currentCounter > expectedCounter) { final String message = "Stale event encountered. Expected counter is " + expectedCounter + " but current counter is " + currentCounter + ". Discarding event " + event; log.debug(message); throw new StaleEventException(message); } else if (currentCounter < expectedCounter) { // This should never happen final String message = "Unexpected current counter is less than last observed one for " + currentRunState; log.error(message); throw new RuntimeException(message); } }
public static void emitResourceLimitReachedMessage(StateManager stateManager, RunState runState, List<String> depletedResources) { if (depletedResources.isEmpty()) { throw new IllegalArgumentException(); } final List<String> depletedResourcesOrdered = depletedResources.stream().sorted().collect(toList()); final Message message = Message.info("Resource limit reached for: " + depletedResourcesOrdered); if (!runState.data().message().map(message::equals).orElse(false)) { stateManager.receiveIgnoreClosed(Event.info(runState.workflowInstance(), message), runState.counter()); } } }
private void sendTimeout(WorkflowInstance workflowInstance, RunState runState) { LOG.info("Found stale state {} since {} for workflow {}; Issuing a timeout", runState.state(), Instant.ofEpochMilli(runState.timestamp()), workflowInstance); stateManager.receiveIgnoreClosed(Event.timeout(workflowInstance), runState.counter()); } }
private RunState transitionUpdates(Instant instant) { return new AutoValue_RunState( workflowInstance(), state(), instant.toEpochMilli(), data(), counter() + 1); }
private RunState state(State state, StateData newStateData) { return new AutoValue_RunState( workflowInstance(), state, timestamp(), newStateData, counter()); }
private void emitPodEvents(Pod pod, RunState runState) { final List<Event> events = translate(runState.workflowInstance(), runState, pod, stats); for (int i = 0; i < events.size(); ++i) { final Event event = events.get(i); if (event.accept(new PullImageErrorMatcher())) { stats.recordPullImageError(); } if (EventUtil.name(event).equals("started")) { runState.data().executionId().ifPresent(stats::recordRunning); } try { // TODO: spoofing counter values like this can give unexpected results, e.g. if we emit two events here the // first one might be discarded and the second one accepted. stateManager.receive(event, runState.counter() + i); } catch (IsClosedException isClosedException) { LOG.warn("Could not receive kubernetes event", isClosedException); throw new RuntimeException(isClosedException); } } }
private RunState state(State state) { return new AutoValue_RunState( workflowInstance(), state, timestamp(), data(), counter()); }
private void examineRunningWFISandAssociatedPods(Map<WorkflowInstance, RunState> activeStates, PodList podList) { final Map<WorkflowInstance, RunState> runningWorkflowInstances = Maps.filterValues(activeStates, runState -> runState.state().equals(RUNNING) && runState.data().executionId().isPresent()); final Set<WorkflowInstance> workflowInstancesForPods = podList.getItems().stream() .map(pod -> pod.getMetadata().getAnnotations()) .filter(Objects::nonNull) .map(annotations -> annotations.get(STYX_WORKFLOW_INSTANCE_ANNOTATION)) .filter(Objects::nonNull) .map(WorkflowInstance::parseKey) .collect(toSet()); // Emit errors for workflow instances that seem to be missing its pod runningWorkflowInstances.forEach((workflowInstance, runState) -> { // Is there a matching pod in the list? Bail. if (workflowInstancesForPods.contains(workflowInstance)) { return; } // The pod list might be stale so explicitly look for a pod using the execution ID. final String executionId = runState.data().executionId().get(); final Pod pod = client.pods().withName(executionId).get(); // We found a pod? Bail. if (pod != null) { return; } // No pod found. Emit an error guarded by the state counter we are basing the error conclusion on. stateManager.receiveIgnoreClosed( Event.runError(workflowInstance, "No pod associated with this instance"), runState.counter()); }); }
private void sendDequeue(InstanceState instanceState, Set<String> resourceIds) { final WorkflowInstance workflowInstance = instanceState.workflowInstance(); final RunState state = instanceState.runState(); if (state.data().tries() == 0) { LOG.info("Executing {}", workflowInstance); } else { LOG.info("Executing {}, retry #{}", workflowInstance, state.data().tries()); } stateManager.receiveIgnoreClosed(Event.dequeue(workflowInstance, resourceIds), instanceState.runState().counter()); }
Event.runError(instanceState.workflowInstance(), String.format("Referenced resources not found: %s", unknownResources)), instanceState.runState().counter()); return; stateManager.receiveIgnoreClosed(Event.retryAfter(instanceState.workflowInstance(), blocker.get().delay().toMillis()), instanceState.runState().counter()); LOG.debug("Dequeue rescheduled: {}: {}", instanceState.workflowInstance(), blocker.get()); return;
SequenceEvent.create(event, nextRunState.counter(), nextRunState.timestamp());
.set(PROPERTY_WORKFLOW, wfi.workflowId().id()) .set(PROPERTY_PARAMETER, wfi.parameter()) .set(PROPERTY_COUNTER, state.counter());