private String fillInTaskIdValues(String string, SingularityOfferHolder offerHolder, SingularityTaskId taskId) { if (!Strings.isNullOrEmpty(string)) { string = string.replace("${TASK_REQUEST_ID}", taskId.getRequestId()) .replace("${TASK_DEPLOY_ID}", taskId.getDeployId()) .replace("${TASK_STARTED_AT}", Long.toString(taskId.getStartedAt())) .replace("${TASK_INSTANCE_NO}", Integer.toString(taskId.getInstanceNo())) .replace("${TASK_HOST}", offerHolder.getHostname()) .replace("${TASK_RACK_ID}", offerHolder.getRackId()) .replace("${TASK_ID}", taskId.getId()); } return string; }
public boolean shouldEnterCooldown(SingularityRequest request, SingularityTaskId taskId, RequestState requestState, SingularityDeployStatistics deployStatistics, long failureTimestamp) { if (requestState != RequestState.ACTIVE || !request.isAlwaysRunning()) { return false; } if (configuration.getCooldownAfterFailures() < 1 || configuration.getCooldownExpiresAfterMinutes() < 1) { return false; } final boolean failedTooManyTimes = hasFailedTooManyTimes(request, deployStatistics, Optional.of(taskId.getInstanceNo()), Optional.of(failureTimestamp)); if (failedTooManyTimes) { LOG.trace("Request {} has failed at least {} times in {}", request.getId(), configuration.getCooldownAfterFailures(), configuration.getCooldownAfterFailures()); } return failedTooManyTimes; }
if (usedIds.contains(taskId.getInstanceNo()) || taskId.getInstanceNo() > expectedInstances) { remainingActiveTasks.remove(taskId); LOG.info("Cleaning up task {} due to new request {} - scaling down to {} instances", taskId.getId(), request.getId(), request.getInstancesSafe()); taskManager.createTaskCleanup(new SingularityTaskCleanup(pendingRequest.getUser(), TaskCleanupType.SCALING_DOWN, now, taskId, Optional.absent(), Optional.absent(), Optional.absent())); usedIds.add(taskId.getInstanceNo());
deploy.getHealthcheck().get().getMaxRetries().or(configuration.getHealthcheckMaxRetries()) : configuration.getHealthcheckMaxRetries(); if (healthcheckMaxRetries.isPresent() && taskManager.getNumNonstartupHealthchecks(taskId) > healthcheckMaxRetries.get()) { String message = String.format("Instance %s failed %s healthchecks, the max for the deploy.", taskId.getInstanceNo(), healthcheckMaxRetries.get() + 1); if (healthcheckResult.getStatusCode().isPresent()) { message = String.format("%s Last check returned with status code %s", message, healthcheckResult.getStatusCode().get()); if (healthcheckResult.isStartup() && deploy.getHealthcheck().isPresent() && durationSinceRunning > deploy.getHealthcheck().get().getStartupTimeoutSeconds() .or(configuration.getStartupTimeoutSeconds())) { String message = String.format("Instance %s has not responded to healthchecks after running for %s", taskId.getInstanceNo(), JavaUtils.durationFromMillis(durationSinceRunning)); return Optional.of(new SingularityDeployFailure(SingularityDeployFailureReason.TASK_FAILED_HEALTH_CHECKS, Optional.of(taskId), Optional.of(message))); String message = String.format("Instance %s has been running for %s and has yet to pass healthchecks.", taskId.getInstanceNo(), JavaUtils.durationFromMillis(durationSinceRunning)); if (healthcheckResult.getStatusCode().isPresent()) { message = String.format("%s Last check returned with status code %s", message, healthcheckResult.getStatusCode().get());
if (task.getDeploy().getTaskEnv().isPresent() && task.getDeploy().getTaskEnv().get().containsKey(taskId.getInstanceNo()) && !task.getDeploy().getTaskEnv().get().get(taskId.getInstanceNo()).isEmpty()) { for (Entry<String, String> envEntry : task.getDeploy().getTaskEnv().get().get(taskId.getInstanceNo()).entrySet()) { envVars.put(envEntry.getKey(), fillInTaskIdValues(envEntry.getValue(), offerHolder, taskId));
public Logger buildTaskLogger(String taskId, String executorId, String executorPid, String taskLogFile) { LOG.info("Building a task logger for {} pointing to {}", taskId, taskLogFile); LoggerContext context = new LoggerContext(); context.setName(executorPid); baseLogging.prepareRootLogger(context); String loggerId = taskId; try { SingularityTaskId singularityTaskId = SingularityTaskId.valueOf(taskId); loggerId = String.format("%s.%s.%s.%s.%s", singularityTaskId.getRequestId(), singularityTaskId.getDeployId(), singularityTaskId.getStartedAt(), singularityTaskId.getInstanceNo(), executorId); } catch (InvalidSingularityTaskIdException e) { LOG.info("Handling non-SingularityTaskId %s", taskId); } Logger taskLogger = context.getLogger(loggerId); taskLogger.detachAndStopAllAppenders(); if (baseLogging.getRootLogPath().isPresent()) { taskLogger.addAppender(baseLogging.buildFileAppender(context, baseLogging.getRootLogPath().get())); } taskLogger.addAppender(baseLogging.buildFileAppender(context, taskLogFile)); context.start(); return taskLogger; }
inuseInstanceNumbers.add(matchingTaskId.getInstanceNo());
List<SingularityTaskId> remainingActiveTasks = new ArrayList<>(); taskManager.getActiveTaskIdsForDeploy(request.getId(), maybeDeployState.get().getActiveDeploy().get().getDeployId()).forEach((taskId) -> { if (taskId.getInstanceNo() > newInstances) { taskManager.createTaskCleanup(new SingularityTaskCleanup( Optional.of(user.getId()),
final List<Long> sequentialFailureTimestamps = instanceSequentialFailureTimestamps.get(taskId.getInstanceNo());
dockerVolumeBuilder.setName(dockerVolume.getName().get().replace("%i", Integer.toString(taskId.getInstanceNo())));
@Test public void testScaleDownTakesHighestInstances() { initRequest(); initFirstDeploy(); saveAndSchedule(request.toBuilder().setInstances(Optional.of(5))); resourceOffers(); Assert.assertEquals(5, taskManager.getActiveTaskIds().size()); requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(2), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), singularityUser); resourceOffers(); cleaner.drainCleanupQueue(); Assert.assertEquals(3, taskManager.getKilledTaskIdRecords().size()); for (SingularityKilledTaskIdRecord taskId : taskManager.getKilledTaskIdRecords()) { Assert.assertTrue(taskId.getTaskId().getInstanceNo() > 2); scheduler.drainPendingQueue(); } }
@Test public void testScaleDownTakesHighestInstancesWithPendingTask() { initRequest(); initFirstDeploy(); saveAndSchedule(request.toBuilder().setInstances(Optional.of(5))); resourceOffers(); Assert.assertEquals(5, taskManager.getActiveTaskIds().size()); SingularityTaskId instance2 = null; for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) { if (taskId.getInstanceNo() == 2) { instance2 = taskId; } } statusUpdate(taskManager.getTask(instance2).get(), TaskState.TASK_KILLED); killKilledTasks(); scheduler.drainPendingQueue(); requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), singularityUser); scheduler.drainPendingQueue(); cleaner.drainCleanupQueue(); // instances 4 and 5 should get killed Assert.assertEquals(2, taskManager.getKilledTaskIdRecords().size()); killKilledTasks(); resourceOffers(); // instances 1,2,3 should be active Assert.assertEquals(3, taskManager.getActiveTaskIds().size()); for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) { Assert.assertTrue(taskId.getInstanceNo() < 4); } }
private void assertEquals(SingularityTaskId one, SingularityTaskId two) { Assert.assertEquals(one, two); Assert.assertEquals(one.getDeployId(), two.getDeployId()); Assert.assertEquals(one.getRequestId(), two.getRequestId()); Assert.assertEquals(one.getSanitizedHost(), two.getSanitizedHost()); Assert.assertEquals(one.getSanitizedRackId(), two.getSanitizedRackId()); Assert.assertEquals(one.getStartedAt(), two.getStartedAt()); Assert.assertEquals(one.getInstanceNo(), two.getInstanceNo()); }
if (taskId.getInstanceNo() == 2) { secondNewTaskId = taskId;
private String fillInTaskIdValues(String string, SingularityOfferHolder offerHolder, SingularityTaskId taskId) { if (!Strings.isNullOrEmpty(string)) { string = string.replace("${TASK_REQUEST_ID}", taskId.getRequestId()) .replace("${TASK_DEPLOY_ID}", taskId.getDeployId()) .replace("${TASK_STARTED_AT}", Long.toString(taskId.getStartedAt())) .replace("${TASK_INSTANCE_NO}", Integer.toString(taskId.getInstanceNo())) .replace("${TASK_HOST}", offerHolder.getHostname()) .replace("${TASK_RACK_ID}", offerHolder.getRackId()) .replace("${TASK_ID}", taskId.getId()); } return string; }
public boolean shouldEnterCooldown(SingularityRequest request, SingularityTaskId taskId, RequestState requestState, SingularityDeployStatistics deployStatistics, long failureTimestamp) { if (requestState != RequestState.ACTIVE || !request.isAlwaysRunning()) { return false; } if (configuration.getCooldownAfterFailures() < 1 || configuration.getCooldownExpiresAfterMinutes() < 1) { return false; } final boolean failedTooManyTimes = hasFailedTooManyTimes(request, deployStatistics, Optional.of(taskId.getInstanceNo()), Optional.of(failureTimestamp)); if (failedTooManyTimes) { LOG.trace("Request {} has failed at least {} times in {}", request.getId(), configuration.getCooldownAfterFailures(), configuration.getCooldownAfterFailures()); } return failedTooManyTimes; }
inuseInstanceNumbers.add(matchingTaskId.getInstanceNo());
@Test public void testScaleDownTakesHighestInstances() { initRequest(); initFirstDeploy(); saveAndSchedule(request.toBuilder().setInstances(Optional.of(5))); resourceOffers(); Assert.assertEquals(5, taskManager.getActiveTaskIds().size()); requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(2), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), singularityUser); resourceOffers(); cleaner.drainCleanupQueue(); Assert.assertEquals(3, taskManager.getKilledTaskIdRecords().size()); for (SingularityKilledTaskIdRecord taskId : taskManager.getKilledTaskIdRecords()) { Assert.assertTrue(taskId.getTaskId().getInstanceNo() > 2); scheduler.drainPendingQueue(); } }
@Test public void testScaleDownTakesHighestInstancesWithPendingTask() { initRequest(); initFirstDeploy(); saveAndSchedule(request.toBuilder().setInstances(Optional.of(5))); resourceOffers(); Assert.assertEquals(5, taskManager.getActiveTaskIds().size()); SingularityTaskId instance2 = null; for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) { if (taskId.getInstanceNo() == 2) { instance2 = taskId; } } statusUpdate(taskManager.getTask(instance2).get(), TaskState.TASK_KILLED); killKilledTasks(); scheduler.drainPendingQueue(); requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent(), Optional.absent()), singularityUser); scheduler.drainPendingQueue(); cleaner.drainCleanupQueue(); // instances 4 and 5 should get killed Assert.assertEquals(2, taskManager.getKilledTaskIdRecords().size()); killKilledTasks(); resourceOffers(); // instances 1,2,3 should be active Assert.assertEquals(3, taskManager.getActiveTaskIds().size()); for (SingularityTaskId taskId : taskManager.getActiveTaskIds()) { Assert.assertTrue(taskId.getInstanceNo() < 4); } }
private void assertEquals(SingularityTaskId one, SingularityTaskId two) { Assert.assertEquals(one, two); Assert.assertEquals(one.getDeployId(), two.getDeployId()); Assert.assertEquals(one.getRequestId(), two.getRequestId()); Assert.assertEquals(one.getSanitizedHost(), two.getSanitizedHost()); Assert.assertEquals(one.getSanitizedRackId(), two.getSanitizedRackId()); Assert.assertEquals(one.getStartedAt(), two.getStartedAt()); Assert.assertEquals(one.getInstanceNo(), two.getInstanceNo()); }