public String getSubjectForTaskHistory(SingularityTaskId taskId, ExtendedTaskState state, SingularityEmailType type, Collection<SingularityTaskHistoryUpdate> history) { if (type == SingularityEmailType.TASK_SCHEDULED_OVERDUE_TO_FINISH) { return String.format("Task is overdue to finish (%s)", taskId.toString()); } if (!didTaskRun(history)) { return String.format("Task never started and was %s (%s)", state.getDisplayName(), taskId.toString()); } return String.format("Task %s (%s)", state.getDisplayName(), taskId.toString()); }
@Override public void run() { try { asyncHealthcheck(task); } catch (Throwable t) { LOG.error("Uncaught throwable in async healthcheck", t); exceptionNotifier.notify(String.format("Uncaught throwable in async healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); reEnqueueOrAbort(task, inStartup); } }
private void reEnqueueCheckOrAbort(SingularityTask task, SingularityHealthchecker healthchecker) { try { reEnqueueCheck(task, healthchecker); } catch (Throwable t) { LOG.error("Uncaught throwable re-enqueuing task check for task {}, aborting", task, t); exceptionNotifier.notify(String.format("Error in task check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); abort.abort(AbortReason.UNRECOVERABLE_ERROR, Optional.of(t)); } }
private Optional<SingularityTaskState> getTaskStateFromId(SingularityTaskId singularityTaskId) { Optional<SingularityTaskHistory> maybeTaskHistory = taskManager.getTaskHistory(singularityTaskId).or(historyManager.getTaskHistory(singularityTaskId.toString())); if (maybeTaskHistory.isPresent() && maybeTaskHistory.get().getLastTaskUpdate().isPresent()) { return Optional.of(SingularityTaskState.fromTaskHistory(maybeTaskHistory.get())); } else { return Optional.absent(); } } }
public void reEnqueueOrAbort(SingularityTask task, boolean inStartup) { try { enqueueHealthcheck(task, true, inStartup, false); } catch (Throwable t) { LOG.error("Caught throwable while re-enqueuing health check for {}, aborting", task.getTaskId(), t); exceptionNotifier.notify(String.format("Caught throwable while re-enqueuing health check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); abort.abort(SingularityAbort.AbortReason.UNRECOVERABLE_ERROR, Optional.of(t)); } }
public static String getS3KeyFormat(String s3KeyFormat, SingularityTaskId taskId, Optional<String> loggingTag, String group) { s3KeyFormat = getS3KeyFormat(s3KeyFormat, taskId.getRequestId(), taskId.getDeployId(), loggingTag, group); s3KeyFormat = s3KeyFormat.replace("%host", taskId.getSanitizedHost()); s3KeyFormat = s3KeyFormat.replace("%taskId", taskId.toString()); return s3KeyFormat; }
void checkStateAfterFinishedTask(SingularityTaskId taskId, String slaveId, SingularityLeaderCache leaderCache) { Optional<SingularitySlave> slave = slaveManager.getSlave(slaveId); if (!slave.isPresent()) { final String message = String.format("Couldn't find slave with id %s for task %s", slaveId, taskId); LOG.warn(message); exceptionNotifier.notify(message, ImmutableMap.of("slaveId", slaveId, "taskId", taskId.toString())); return; } if (slave.get().getCurrentState().getState() == MachineState.DECOMMISSIONING) { if (!hasTaskLeftOnSlave(taskId, slaveId, leaderCache)) { slaveManager.changeState(slave.get(), MachineState.DECOMMISSIONED, slave.get().getCurrentState().getMessage(), slave.get().getCurrentState().getUser()); } } Optional<SingularityRack> rack = rackManager.getObject(slave.get().getRackId()); if (!rack.isPresent()) { final String message = String.format("Couldn't find rack with id %s for task %s", slave.get().getRackId(), taskId); LOG.warn(message); exceptionNotifier.notify(message, ImmutableMap.of("rackId", slave.get().getRackId(), "taskId", taskId.toString())); return; } if (rack.get().getCurrentState().getState() == MachineState.DECOMMISSIONING) { if (!hasTaskLeftOnRack(taskId, leaderCache)) { rackManager.changeState(rack.get(), MachineState.DECOMMISSIONED, rack.get().getCurrentState().getMessage(), rack.get().getCurrentState().getUser()); } } }
String msg = String.format("No next run date found for %s (%s)", taskId, scheduleExpression); LOG.warn(msg); exceptionNotifier.notify(msg, ImmutableMap.of("taskId", taskId.toString())); return Optional.absent(); exceptionNotifier.notify(String.format("Unable to parse schedule (%s)", e.getMessage()), e, ImmutableMap.of("taskId", taskId.toString(), "scheduleExpression", scheduleExpression, "scheduleType", request.getScheduleTypeSafe().toString())); return Optional.absent();
private Runnable getTaskCheck(final SingularityTask task, final SingularityHealthchecker healthchecker) { return () -> { try { Optional<SingularityRequestWithState> requestWithState = requestManager.getRequest(task.getTaskId().getRequestId()); if (!requestWithState.isPresent()) { LOG.info("Ignoring task check for {}, missing request {}", task.getTaskId(), task.getTaskId().getRequestId()); return; } boolean shouldReschedule = checkTask(task, requestWithState, healthchecker); if (shouldReschedule) { reEnqueueCheck(task, healthchecker); } else { taskIdToCheck.remove(task.getTaskId().getId()); } } catch (Throwable t) { LOG.error("Uncaught throwable in task check for task {}, re-enqueing", task, t); exceptionNotifier.notify(String.format("Error in task check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); reEnqueueCheckOrAbort(task, healthchecker); } }; }
public void killAndRecord(SingularityTaskId taskId, Optional<RequestCleanupType> requestCleanupType, Optional<TaskCleanupType> taskCleanupType, Optional<Long> originalTimestamp, Optional<Integer> retries, Optional<String> user) { Preconditions.checkState(isRunning()); Optional<TaskCleanupType> maybeCleanupFromRequestAndTask = getTaskCleanupType(requestCleanupType, taskCleanupType); if (maybeCleanupFromRequestAndTask.isPresent() && (maybeCleanupFromRequestAndTask.get() == TaskCleanupType.USER_REQUESTED_DESTROY || maybeCleanupFromRequestAndTask.get() == TaskCleanupType.REQUEST_DELETING)) { Optional<SingularityTask> task = taskManager.getTask(taskId); if (task.isPresent()) { if (task.get().getTaskRequest().getDeploy().getCustomExecutorCmd().isPresent()) { byte[] messageBytes = transcoder.toBytes(new SingularityTaskDestroyFrameworkMessage(taskId, user)); mesosSchedulerClient.frameworkMessage( MesosProtosUtils.toExecutorId(task.get().getMesosTask().getExecutor().getExecutorId()), MesosProtosUtils.toAgentId(task.get().getMesosTask().getAgentId()), messageBytes ); } else { LOG.warn("Not using custom executor, will not send framework message to destroy task"); } } else { String message = String.format("No task data available to build kill task framework message for task %s", taskId); exceptionNotifier.notify(message); LOG.error(message); } } mesosSchedulerClient.kill(TaskID.newBuilder().setValue(taskId.toString()).build()); taskManager.saveKilledRecord(new SingularityKilledTaskIdRecord(taskId, System.currentTimeMillis(), originalTimestamp.or(System.currentTimeMillis()), requestCleanupType, taskCleanupType, retries.or(-1) + 1)); }
public void saveResult(Optional<Integer> statusCode, Optional<String> responseBody, Optional<String> errorMessage, Optional<Throwable> throwable) { boolean inStartup = throwable.isPresent() && throwable.get() instanceof ConnectException; try { SingularityTaskHealthcheckResult result = new SingularityTaskHealthcheckResult(statusCode, Optional.of(System.currentTimeMillis() - startTime), startTime, responseBody, errorMessage, task.getTaskId(), Optional.of(inStartup)); LOG.trace("Saving healthcheck result {}", result); taskManager.saveHealthcheckResult(result); if (result.isFailed()) { if (!taskManager.isActiveTask(task.getTaskId().getId())) { LOG.trace("Task {} is not active, not re-enqueueing healthcheck", task.getTaskId()); return; } if (statusCode.isPresent() && failureStatusCodes.contains(statusCode.get())) { LOG.debug("Failed status code present for task {} ({})", task.getTaskId(), statusCode.get()); healthchecker.markHealthcheckFinished(task.getTaskId().getId()); newTaskChecker.runNewTaskCheckImmediately(task, healthchecker); return; } healthchecker.enqueueHealthcheck(task, true, inStartup, false); } else { healthchecker.markHealthcheckFinished(task.getTaskId().getId()); newTaskChecker.runNewTaskCheckImmediately(task, healthchecker); } } catch (Throwable t) { LOG.error("Caught throwable while saving health check result for {}, will re-enqueue", task.getTaskId(), t); exceptionNotifier.notify(String.format("Error saving healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); healthchecker.reEnqueueOrAbort(task, inStartup); } }
private void asyncHealthcheck(final SingularityTask task) { final SingularityHealthcheckAsyncHandler handler = new SingularityHealthcheckAsyncHandler(exceptionNotifier, configuration, this, newTaskChecker, taskManager, task); final Optional<String> uri = getHealthcheckUri(task); if (!uri.isPresent()) { saveFailure(handler, "Invalid healthcheck uri or ports not present"); return; } final Integer timeoutSeconds = task.getTaskRequest().getDeploy().getHealthcheck().isPresent() ? task.getTaskRequest().getDeploy().getHealthcheck().get().getResponseTimeoutSeconds().or(configuration.getHealthcheckTimeoutSeconds()) : configuration.getHealthcheckTimeoutSeconds(); try { PerRequestConfig prc = new PerRequestConfig(); prc.setRequestTimeoutInMs((int) TimeUnit.SECONDS.toMillis(timeoutSeconds)); RequestBuilder builder = new RequestBuilder("GET"); builder.setFollowRedirects(true); builder.setUrl(uri.get()); builder.setPerRequestConfig(prc); LOG.trace("Issuing a healthcheck ({}) for task {} with timeout {}s", uri.get(), task.getTaskId(), timeoutSeconds); http.prepareRequest(builder.build()).execute(handler); } catch (Throwable t) { LOG.debug("Exception while preparing healthcheck ({}) for task ({})", uri, task.getTaskId(), t); exceptionNotifier.notify(String.format("Error preparing healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); saveFailure(handler, String.format("Healthcheck failed due to exception: %s", t.getMessage())); } }
.setTaskId(TaskID.newBuilder().setValue(taskId.toString()));
public String getSubjectForTaskHistory(SingularityTaskId taskId, ExtendedTaskState state, SingularityEmailType type, Collection<SingularityTaskHistoryUpdate> history) { if (type == SingularityEmailType.TASK_SCHEDULED_OVERDUE_TO_FINISH) { return String.format("Task is overdue to finish (%s)", taskId.toString()); } if (!didTaskRun(history)) { return String.format("Task never started and was %s (%s)", state.getDisplayName(), taskId.toString()); } return String.format("Task %s (%s)", state.getDisplayName(), taskId.toString()); }
protected SingularityTask prepTask(SingularityRequest request, SingularityDeploy deploy, long launchTime, int instanceNo, boolean separateHosts, Optional<String> runId) { SingularityPendingTask pendingTask = buildPendingTask(request, deploy, launchTime, instanceNo, runId); SingularityTaskRequest taskRequest = new SingularityTaskRequest(request, deploy, pendingTask); Offer offer; if (separateHosts) { offer = createOffer(125, 1024, 2048, String.format("slave%s", instanceNo), String.format("host%s", instanceNo)); } else { offer = createOffer(125, 1024, 2048); } SingularityTaskId taskId = new SingularityTaskId(request.getId(), deploy.getId(), launchTime, instanceNo, offer.getHostname(), "rack1"); TaskID taskIdProto = TaskID.newBuilder().setValue(taskId.toString()).build(); TaskInfo taskInfo = TaskInfo.newBuilder() .setAgentId(offer.getAgentId()) .setExecutor(ExecutorInfo.newBuilder().setExecutorId(ExecutorID.newBuilder().setValue("executorID"))) .setTaskId(taskIdProto) .setName("name") .build(); SingularityTask task = new SingularityTask(taskRequest, taskId, Collections.singletonList(mesosProtosUtils.offerFromProtos(offer)), mesosProtosUtils.taskFromProtos(taskInfo), Optional.of("rack1")); taskManager.savePendingTask(pendingTask); return task; }
private Optional<SingularityTaskState> getTaskStateFromId(SingularityTaskId singularityTaskId) { Optional<SingularityTaskHistory> maybeTaskHistory = taskManager.getTaskHistory(singularityTaskId).or(historyManager.getTaskHistory(singularityTaskId.toString())); if (maybeTaskHistory.isPresent() && maybeTaskHistory.get().getLastTaskUpdate().isPresent()) { return Optional.of(SingularityTaskState.fromTaskHistory(maybeTaskHistory.get())); } else { return Optional.absent(); } } }
@Override public void run() { try { asyncHealthcheck(task); } catch (Throwable t) { LOG.error("Uncaught throwable in async healthcheck", t); exceptionNotifier.notify(String.format("Uncaught throwable in async healthcheck (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); reEnqueueOrAbort(task, inStartup); } }
private void reEnqueueCheckOrAbort(SingularityTask task, SingularityHealthchecker healthchecker) { try { reEnqueueCheck(task, healthchecker); } catch (Throwable t) { LOG.error("Uncaught throwable re-enqueuing task check for task {}, aborting", task, t); exceptionNotifier.notify(String.format("Error in task check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); abort.abort(AbortReason.UNRECOVERABLE_ERROR, Optional.of(t)); } }
public void reEnqueueOrAbort(SingularityTask task, boolean inStartup) { try { enqueueHealthcheck(task, true, inStartup, false); } catch (Throwable t) { LOG.error("Caught throwable while re-enqueuing health check for {}, aborting", task.getTaskId(), t); exceptionNotifier.notify(String.format("Caught throwable while re-enqueuing health check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); abort.abort(SingularityAbort.AbortReason.UNRECOVERABLE_ERROR, Optional.of(t)); } }
private Runnable getTaskCheck(final SingularityTask task, final SingularityHealthchecker healthchecker) { return () -> { try { Optional<SingularityRequestWithState> requestWithState = requestManager.getRequest(task.getTaskId().getRequestId()); if (!requestWithState.isPresent()) { LOG.info("Ignoring task check for {}, missing request {}", task.getTaskId(), task.getTaskId().getRequestId()); return; } boolean shouldReschedule = checkTask(task, requestWithState, healthchecker); if (shouldReschedule) { reEnqueueCheck(task, healthchecker); } else { taskIdToCheck.remove(task.getTaskId().getId()); } } catch (Throwable t) { LOG.error("Uncaught throwable in task check for task {}, re-enqueing", task, t); exceptionNotifier.notify(String.format("Error in task check (%s)", t.getMessage()), t, ImmutableMap.of("taskId", task.getTaskId().toString())); reEnqueueCheckOrAbort(task, healthchecker); } }; }