@JsonIgnore public boolean isFailed() { return currentState.isPresent() && currentState.get().isFailed(); }
private Optional<SingularityDeployFailure> getNonHealthcheckedTaskFailure(Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates, SingularityTaskId taskId) { List<SingularityTaskHistoryUpdate> updates = taskUpdates.get(taskId); SingularityTaskHistoryUpdate lastUpdate = Iterables.getLast(updates); if (lastUpdate.getTaskState().isSuccess()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or(""))))); } else if (lastUpdate.getTaskState().isDone()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_ON_STARTUP, Optional.of(taskId), lastUpdate.getStatusMessage())); } else if (SingularityTaskHistoryUpdate.getCurrentState(updates) == SimplifiedTaskState.WAITING) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_NEVER_ENTERED_RUNNING, Optional.of(taskId), Optional.of(String.format("Task never entered running state, last state was %s (%s)", lastUpdate.getTaskState().getDisplayName(), lastUpdate.getStatusMessage().or(""))))); } return Optional.absent(); } }
public static TaskState toTaskState(ExtendedTaskState extendedTaskState) { return TaskState.valueOf(extendedTaskState.toTaskState().get().name()); } }
if (lastUpdate.getTaskState().isDone()) { if (lastUpdate.getTaskState().isSuccess()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or("")))));
private TaskCleanupResult cleanTask(SingularityExecutorTaskDefinition taskDefinition, Optional<SingularityTaskHistory> taskHistory) { SingularityExecutorTaskLogManager logManager = new SingularityExecutorTaskLogManager(taskDefinition, templateManager, baseConfiguration, executorConfiguration, LOG, jsonObjectFileHelper, false); SingularityExecutorTaskCleanup taskCleanup = new SingularityExecutorTaskCleanup(logManager, executorConfiguration, taskDefinition, LOG, dockerUtils); boolean cleanupTaskAppDirectory = !taskDefinition.getExecutorData().getPreserveTaskSandboxAfterFinish().or(Boolean.FALSE); if (taskDefinition.shouldLogrotateLogFile()) { checkForUncompressedLogrotatedFile(taskDefinition); } if (taskHistory.isPresent()) { final Optional<SingularityTaskHistoryUpdate> lastUpdate = JavaUtils.getLast(taskHistory.get().getTaskUpdates()); if (lastUpdate.isPresent()) { if (lastUpdate.get().getTaskState().isDone() && System.currentTimeMillis() - lastUpdate.get().getTimestamp() > TimeUnit.MINUTES.toMillis(15)) { LOG.info("Task {} is done for > 15 minutes, removing logrotate files"); taskCleanup.cleanUpLogs(); } if (lastUpdate.get().getTaskState().isFailed()) { final long delta = System.currentTimeMillis() - lastUpdate.get().getTimestamp(); if (delta < cleanupConfiguration.getCleanupAppDirectoryOfFailedTasksAfterMillis()) { LOG.info("Not cleaning up task app directory of {} because only {} has elapsed since it failed (will cleanup after {})", taskDefinition.getTaskId(), JavaUtils.durationFromMillis(delta), JavaUtils.durationFromMillis(cleanupConfiguration.getCleanupAppDirectoryOfFailedTasksAfterMillis())); cleanupTaskAppDirectory = false; } } } } boolean isDocker = (taskHistory.isPresent() && taskHistory.get().getTask().getTaskRequest().getDeploy().getContainerInfo().isPresent() && taskHistory.get().getTask().getTaskRequest().getDeploy().getContainerInfo().get().getType() == SingularityContainerType.DOCKER); return taskCleanup.cleanup(cleanupTaskAppDirectory, isDocker); }
private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) { SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder(); if (!state.isFailed()) { if (bldr.getAverageRuntimeMillis().isPresent()) { long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1); final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo()); if (!state.isSuccess()) { if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) { LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
@JsonIgnore public boolean isDone() { return currentState.isPresent() && currentState.get().isDone(); }
@JsonIgnore public boolean isSuccess() { return currentState.isPresent() && currentState.get().isSuccess(); }
private String getUpdatePath(SingularityTaskId taskId, ExtendedTaskState state) { return ZKPaths.makePath(getUpdatesPath(taskId), state.name()); }
public String getSubjectForTaskHistory(SingularityTaskId taskId, ExtendedTaskState state, SingularityEmailType type, Collection<SingularityTaskHistoryUpdate> history) { if (type == SingularityEmailType.TASK_SCHEDULED_OVERDUE_TO_FINISH) { return String.format("Task is overdue to finish (%s)", taskId.toString()); } if (!didTaskRun(history)) { return String.format("Task never started and was %s (%s)", state.getDisplayName(), taskId.toString()); } return String.format("Task %s (%s)", state.getDisplayName(), taskId.toString()); }
private long getTaskRunningStartTime(SingularityTaskId task) { Optional<SingularityTaskHistory> taskHistory = taskManager.getTaskHistory(task); if (taskHistory.isPresent()) { java.util.Optional<SingularityTaskHistoryUpdate> taskRunningState = taskHistory.get().getTaskUpdates().stream().filter(h -> h.getTaskState().equals(ExtendedTaskState.TASK_RUNNING)).findFirst(); if (taskRunningState.isPresent()) { return taskRunningState.get().getTimestamp(); } LOG.error("Could not find time when task {} reached TASK_RUNNING state", task); } else { LOG.error("Could not find task history for {}", task); } return System.currentTimeMillis(); }
private void updateDeployStatistics(SingularityDeployStatistics deployStatistics, SingularityTaskId taskId, Optional<SingularityTask> task, long timestamp, ExtendedTaskState state, Optional<PendingType> scheduleResult) { SingularityDeployStatisticsBuilder bldr = deployStatistics.toBuilder(); if (!state.isFailed()) { if (bldr.getAverageRuntimeMillis().isPresent()) { long newAvgRuntimeMillis = (bldr.getAverageRuntimeMillis().get() * bldr.getNumTasks() + (timestamp - taskId.getStartedAt())) / (bldr.getNumTasks() + 1); final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo()); if (!state.isSuccess()) { if (SingularityTaskHistoryUpdate.getUpdate(taskManager.getTaskHistoryUpdates(taskId), ExtendedTaskState.TASK_CLEANING).isPresent()) { LOG.debug("{} failed with {} after cleaning - ignoring it for cooldown", taskId, state);
private void saveNewTaskStatusHolder(SingularityTaskId taskIdObj, SingularityTaskStatusHolder newTaskStatusHolder, ExtendedTaskState taskState) { if (taskState.isDone()) { taskManager.deleteLastActiveTaskStatus(taskIdObj); } else { taskManager.saveLastActiveTaskStatus(newTaskStatusHolder); } }
if (lastUpdate.getTaskState().isDone()) { if (lastUpdate.getTaskState().isSuccess()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or("")))));
if (!state.isSuccess() && taskHistoryUpdateCreateResult == SingularityCreateResult.CREATED && cooldown.shouldEnterCooldown(request, taskId, requestState, deployStatistics, timestamp)) { LOG.info("Request {} is entering cooldown due to task {}", request.getId(), taskId); requestState = RequestState.SYSTEM_COOLDOWN; Optional<Resources> resources = Optional.absent(); if (!state.isSuccess() && shouldRetryImmediately(request, deployStatistics, task)) { LOG.debug("Retrying {} because {}", request.getId(), state); pendingType = PendingType.RETRY; if (state.isSuccess() && requestState == RequestState.SYSTEM_COOLDOWN) {
private String getTaskHistoryUpdateId(SingularityTaskHistoryUpdate taskUpdate) { return taskUpdate.getTaskId() + "-" + taskUpdate.getTaskState().name(); }
public List<SingularityMailTaskHistoryUpdate> getJadeTaskHistory(Collection<SingularityTaskHistoryUpdate> taskHistory) { List<SingularityMailTaskHistoryUpdate> output = Lists.newArrayListWithCapacity(taskHistory.size()); for (SingularityTaskHistoryUpdate taskUpdate : taskHistory) { output.add( new SingularityMailTaskHistoryUpdate( humanizeTimestamp(taskUpdate.getTimestamp()), WordUtils.capitalize(taskUpdate.getTaskState().getDisplayName()), taskUpdate.getStatusMessage().or(""))); } return output; }
private long getTaskRunningStartTime(SingularityTaskId task) { Optional<SingularityTaskHistory> taskHistory = taskManager.getTaskHistory(task); if (taskHistory.isPresent()) { java.util.Optional<SingularityTaskHistoryUpdate> taskRunningState = taskHistory.get().getTaskUpdates().stream().filter(h -> h.getTaskState().equals(ExtendedTaskState.TASK_RUNNING)).findFirst(); if (taskRunningState.isPresent()) { return taskRunningState.get().getTimestamp(); } LOG.error("Could not find time when task {} reached TASK_RUNNING state", task); } else { LOG.error("Could not find task history for {}", task); } return System.currentTimeMillis(); }
private Optional<SingularityDeployFailure> getNonHealthcheckedTaskFailure(Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates, SingularityTaskId taskId) { List<SingularityTaskHistoryUpdate> updates = taskUpdates.get(taskId); SingularityTaskHistoryUpdate lastUpdate = Iterables.getLast(updates); if (lastUpdate.getTaskState().isSuccess()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_EXPECTED_RUNNING_FINISHED, Optional.of(taskId), Optional.of(String.format("Task was expected to maintain TASK_RUNNING state but finished. (%s)", lastUpdate.getStatusMessage().or(""))))); } else if (lastUpdate.getTaskState().isDone()) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_ON_STARTUP, Optional.of(taskId), lastUpdate.getStatusMessage())); } else if (SingularityTaskHistoryUpdate.getCurrentState(updates) == SimplifiedTaskState.WAITING) { return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_NEVER_ENTERED_RUNNING, Optional.of(taskId), Optional.of(String.format("Task never entered running state, last state was %s (%s)", lastUpdate.getTaskState().getDisplayName(), lastUpdate.getStatusMessage().or(""))))); } return Optional.absent(); } }
public static SimplifiedTaskState getCurrentState(Iterable<SingularityTaskHistoryUpdate> updates) { SimplifiedTaskState state = SimplifiedTaskState.UNKNOWN; if (updates == null) { return state; } for (SingularityTaskHistoryUpdate update : updates) { if (update.getTaskState().isDone()) { return SimplifiedTaskState.DONE; } else if (update.getTaskState() == ExtendedTaskState.TASK_RUNNING) { state = SimplifiedTaskState.RUNNING; } else if (state == SimplifiedTaskState.UNKNOWN) { state = SimplifiedTaskState.WAITING; } } return state; }