private IllegalStateException illegalTransition(String event) { return new IllegalStateException(workflowInstance() + " received " + event + " while in " + state()); }
public static void emitResourceLimitReachedMessage(StateManager stateManager, RunState runState, List<String> depletedResources) { if (depletedResources.isEmpty()) { throw new IllegalArgumentException(); } final List<String> depletedResourcesOrdered = depletedResources.stream().sorted().collect(toList()); final Message message = Message.info("Resource limit reached for: " + depletedResourcesOrdered); if (!runState.data().message().map(message::equals).orElse(false)) { stateManager.receiveIgnoreClosed(Event.info(runState.workflowInstance(), message), runState.counter()); } } }
@Override public void transitionInto(RunState state) { final String name = state.state().name().toLowerCase(); LOG.info("{}{} transition -> {} {}", prefix, state.workflowInstance(), name, stateInfo(state)); }
final RuntimeException exception = new RuntimeException( "Failed to update resource counter for workflow instance: " + runState.workflowInstance() + ": " + failedResources); failedTries.stream() .map(x -> x._2.getCause())
private RunState state(State state, StateData newStateData) { return new AutoValue_RunState( workflowInstance(), state, timestamp(), newStateData, counter()); }
private RunState transitionUpdates(Instant instant) { return new AutoValue_RunState( workflowInstance(), state(), instant.toEpochMilli(), data(), counter() + 1); }
@Override public void transitionInto(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); try { final Event submitEvent = Event.submit( state.workflowInstance(), getExecDescription(workflowInstance, state.data()), createExecutionId()); try { stateManager.receive(submitEvent); stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (MissingRequiredPropertyException e) { LOG.warn("Failed to prepare execution description for " + state.workflowInstance(), e); stateManager.receiveIgnoreClosed(Event.halt(workflowInstance)); } catch (IOException e) { try { LOG.error("Failed to retrieve execution description for " + state.workflowInstance(), e); stateManager.receive(Event.runError(state.workflowInstance(), e.getMessage())); } catch (IsClosedException isClosedException) { LOG.warn("Failed to send 'runError' event", isClosedException);
private void emitPodEvents(Pod pod, RunState runState) { final List<Event> events = translate(runState.workflowInstance(), runState, pod, stats); for (int i = 0; i < events.size(); ++i) { final Event event = events.get(i); if (event.accept(new PullImageErrorMatcher())) { stats.recordPullImageError(); } if (EventUtil.name(event).equals("started")) { runState.data().executionId().ifPresent(stats::recordRunning); } try { // TODO: spoofing counter values like this can give unexpected results, e.g. if we emit two events here the // first one might be discarded and the second one accepted. stateManager.receive(event, runState.counter() + i); } catch (IsClosedException isClosedException) { LOG.warn("Could not receive kubernetes event", isClosedException); throw new RuntimeException(isClosedException); } } }
@Override public void transitionInto(RunState state) { switch (state.state()) { case TERMINATED: if (state.data().lastExit().map(v -> v.equals(0)).orElse(false)) { stateManager.receiveIgnoreClosed(Event.success(state.workflowInstance())); } else { checkRetry(state); } break; case FAILED: checkRetry(state); break; default: // do nothing } }
} catch (ResourceNotFoundException e) { LOG.error("Unable to start docker procedure.", e); stateManager.receiveIgnoreClosed(Event.halt(state.workflowInstance())); return; final Event submitted = Event.submitted(state.workflowInstance(), runSpec.executionId()); try { stateManager.receive(submitted, state.counter()); LOG.info("running:{} image:{} args:{} termination_logging:{}", state.workflowInstance(), runSpec.imageName(), runSpec.args(), runSpec.terminationLogging()); dockerRunner.start(state.workflowInstance(), runSpec); } catch (Throwable e) { try { final String msg = "Failed the docker starting procedure for " + state.workflowInstance(); if (isUserError(e)) { LOG.info("{}: {}", msg, e.getMessage()); LOG.error(msg, e); stateManager.receive(Event.runError(state.workflowInstance(), e.getMessage()), state.counter() + 1); } catch (IsClosedException isClosedException) { if (state.data().executionId().isPresent()) { final String executionId = state.data().executionId().get(); dockerRunner.cleanup(state.workflowInstance(), executionId);
private RunState state(State state) { return new AutoValue_RunState( workflowInstance(), state, timestamp(), data(), counter()); }
private void updateResourceCounters(StorageTransaction tx, Event event, RunState currentRunState, RunState nextRunState) throws IOException { // increment counters if event is dequeue if (isDequeue(event) && nextRunState.data().resourceIds().isPresent()) { tryUpdatingCounter(currentRunState, tx, nextRunState.data().resourceIds().get()); } // decrement counters if transitioning from a state that consumes resources // to a state that doesn't consume any resources if (isConsumingResources(currentRunState.state()) && !isConsumingResources(nextRunState.state())) { if (nextRunState.data().resourceIds().isPresent()) { for (String resource : nextRunState.data().resourceIds().get()) { tx.updateCounter(shardedCounter, resource, -1); } } else { log.error("Resource ids are missing for {} when transitioning from {} to {}.", nextRunState.workflowInstance(), currentRunState, nextRunState); } } }
private void checkRetry(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); if (state.data().retryCost() < MAX_RETRY_COST) { final Optional<Integer> exitCode = state.data().lastExit(); if (shouldFailFast(exitCode)) { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } else { final long delayMillis; if (isMissingDependency(exitCode)) { delayMillis = Duration.ofMinutes(MISSING_DEPS_RETRY_DELAY_MINUTES).toMillis(); } else { delayMillis = retryUtil.calculateDelay(state.data().consecutiveFailures()).toMillis(); } stateManager.receiveIgnoreClosed(Event.retryAfter(workflowInstance, delayMillis)); } } else { stateManager.receiveIgnoreClosed(Event.stop(workflowInstance)); } }
@Override public void transitionInto(RunState state) { final WorkflowInstance workflowInstance = state.workflowInstance(); switch (state.state()) { case SUBMITTED:
private RunSpec createRunSpec(RunState state) throws ResourceNotFoundException { final Optional<ExecutionDescription> executionDescriptionOpt = state.data().executionDescription(); final ExecutionDescription executionDescription = executionDescriptionOpt.orElseThrow( () -> new ResourceNotFoundException("Missing execution description for " + state.workflowInstance())); final String executionId = state.data().executionId().orElseThrow( () -> new ResourceNotFoundException("Missing execution id for " + state.workflowInstance())); final String dockerImage = executionDescription.dockerImage(); final List<String> dockerArgs = executionDescription.dockerArgs(); final String parameter = state.workflowInstance().parameter(); final List<String> command = argsReplace(dockerArgs, parameter); return RunSpec.builder() .executionId(executionId) .imageName(dockerImage) .args(command) .terminationLogging(executionDescription.dockerTerminationLogging()) .secret(executionDescription.secret()) .serviceAccount(executionDescription.serviceAccount()) .trigger(state.data().trigger()) .commitSha(state.data().executionDescription().flatMap(ExecutionDescription::commitSha)) .env(executionDescription.env()) .build(); } }
private RunStateData getRunStateData(Backfill backfill, Map<WorkflowInstance, RunState> activeWorkflowInstances, Instant instant) { final WorkflowInstance wfi = WorkflowInstance .create(backfill.workflowId(), toParameter(backfill.schedule(), instant)); if (activeWorkflowInstances.containsKey(wfi)) { final RunState state = activeWorkflowInstances.get(wfi); return RunStateData.newBuilder() .workflowInstance(state.workflowInstance()) .state(state.state().name()) .stateData(state.data()) .latestTimestamp(state.timestamp()) .build(); } return ReplayEvents.getBackfillRunStateData(wfi, storage, backfill.id()) .orElse(RunStateData.create(wfi, UNKNOWN, StateData.zero())); } }
private RunStateData runStateToRunStateData(RunState state) { return RunStateData.newBuilder() .workflowInstance(state.workflowInstance()) .state(state.state().name()) .stateData(state.data()) .latestTimestamp(state.timestamp()) .build(); }
.workflowInstance(restoredState.workflowInstance()) .state(restoredState.state().name()) .stateData(restoredState.data())