return new TaskStatus( initialTaskStatus.getTaskId(), taskInstanceId, ++version, taskState, initialTaskStatus.getSelf(), "fake", ImmutableSet.of(), initialTaskStatus.getFailures(), initialTaskStatus.getQueuedPartitionedDrivers(), initialTaskStatus.getRunningPartitionedDrivers(), initialTaskStatus.isOutputBufferOverutilized(), initialTaskStatus.getPhysicalWrittenDataSize(), initialTaskStatus.getMemoryReservation(), initialTaskStatus.getSystemMemoryReservation(), initialTaskStatus.getFullGcCount(), initialTaskStatus.getFullGcTime());
private int getNewTaskCount() { if (scheduledNodes.isEmpty()) { return 1; } double fullTasks = sourceTasksProvider.get().stream() .filter(task -> !task.getState().isDone()) .map(TaskStatus::isOutputBufferOverutilized) .mapToDouble(full -> full ? 1.0 : 0.0) .average().orElse(0.0); long writtenBytes = writerTasksProvider.get().stream() .map(TaskStatus::getPhysicalWrittenDataSize) .mapToLong(DataSize::toBytes) .sum(); if ((fullTasks >= 0.5) && (writtenBytes >= (writerMinSizeBytes * scheduledNodes.size()))) { return 1; } return 0; }
public static TaskStatus failWith(TaskStatus taskStatus, TaskState state, List<ExecutionFailureInfo> exceptions) { return new TaskStatus( taskStatus.getTaskId(), taskStatus.getTaskInstanceId(), MAX_VERSION, state, taskStatus.getSelf(), taskStatus.getNodeId(), taskStatus.getCompletedDriverGroups(), exceptions, taskStatus.getQueuedPartitionedDrivers(), taskStatus.getRunningPartitionedDrivers(), taskStatus.isOutputBufferOverutilized(), taskStatus.getPhysicalWrittenDataSize(), taskStatus.getMemoryReservation(), taskStatus.getSystemMemoryReservation(), taskStatus.getFullGcCount(), taskStatus.getFullGcTime()); } }
@Override public int getPartitionedSplitCount() { TaskStatus taskStatus = getTaskStatus(); if (taskStatus.getState().isDone()) { return 0; } return getPendingSourceSplitCount() + taskStatus.getQueuedPartitionedDrivers() + taskStatus.getRunningPartitionedDrivers(); }
void updateTaskStatus(TaskStatus newValue) { // change to new value if old value is not changed and new value has a newer version AtomicBoolean taskMismatch = new AtomicBoolean(); taskStatus.setIf(newValue, oldValue -> { // did the task instance id change if (!isNullOrEmpty(oldValue.getTaskInstanceId()) && !oldValue.getTaskInstanceId().equals(newValue.getTaskInstanceId())) { taskMismatch.set(true); return false; } if (oldValue.getState().isDone()) { // never update if the task has reached a terminal state return false; } if (newValue.getVersion() < oldValue.getVersion()) { // don't update to an older version (same version is ok) return false; } return true; }); if (taskMismatch.get()) { // This will also set the task status to FAILED state directly. // Additionally, this will issue a DELETE for the task to the worker. // While sending the DELETE is not required, it is preferred because a task was created by the previous request. onFail.accept(new PrestoException(REMOTE_TASK_MISMATCH, format("%s (%s)", REMOTE_TASK_MISMATCH_ERROR, HostAddress.fromUri(getTaskStatus().getSelf())))); } }
public ContinuousTaskStatusFetcher( Consumer<Throwable> onFail, TaskStatus initialTaskStatus, Duration refreshMaxWait, JsonCodec<TaskStatus> taskStatusCodec, Executor executor, HttpClient httpClient, Duration maxErrorDuration, ScheduledExecutorService errorScheduledExecutor, RemoteTaskStats stats) { requireNonNull(initialTaskStatus, "initialTaskStatus is null"); this.taskId = initialTaskStatus.getTaskId(); this.onFail = requireNonNull(onFail, "onFail is null"); this.taskStatus = new StateMachine<>("task-" + taskId, executor, initialTaskStatus); this.refreshMaxWait = requireNonNull(refreshMaxWait, "refreshMaxWait is null"); this.taskStatusCodec = requireNonNull(taskStatusCodec, "taskStatusCodec is null"); this.executor = requireNonNull(executor, "executor is null"); this.httpClient = requireNonNull(httpClient, "httpClient is null"); this.errorTracker = new RequestErrorTracker(taskId, initialTaskStatus.getSelf(), maxErrorDuration, errorScheduledExecutor, "getting task status"); this.stats = requireNonNull(stats, "stats is null"); }
TaskState taskState = taskStatus.getState(); if (taskState == TaskState.FAILED) { RuntimeException failure = taskStatus.getFailures().stream() .findFirst() .map(this::rewriteTransportFailure) finishedTasks.add(taskStatus.getTaskId());
/** * Move the task directly to the failed state if there was a failure in this task */ private void failTask(Throwable cause) { TaskStatus taskStatus = getTaskStatus(); if (!taskStatus.getState().isDone()) { log.debug(cause, "Remote task %s failed with %s", taskStatus.getSelf(), cause); } abort(failWith(getTaskStatus(), FAILED, ImmutableList.of(toFailure(cause)))); }
private synchronized void updateMemoryUsage(TaskStatus taskStatus) { long currentUserMemory = taskStatus.getMemoryReservation().toBytes(); long currentSystemMemory = taskStatus.getSystemMemoryReservation().toBytes(); long deltaUserMemoryInBytes = currentUserMemory - previousUserMemory; long deltaTotalMemoryInBytes = (currentUserMemory + currentSystemMemory) - (previousUserMemory + previousSystemMemory); previousUserMemory = currentUserMemory; previousSystemMemory = currentSystemMemory; stateMachine.updateMemoryUsage(deltaUserMemoryInBytes, deltaTotalMemoryInBytes); }
private HttpUriBuilder getHttpUriBuilder(TaskStatus taskStatus) { HttpUriBuilder uriBuilder = uriBuilderFrom(taskStatus.getSelf()); if (summarizeTaskInfo) { uriBuilder.addParameter("summarize"); } return uriBuilder; }
@Override public int getQueuedPartitionedSplitCount() { TaskStatus taskStatus = getTaskStatus(); if (taskStatus.getState().isDone()) { return 0; } return getPendingSourceSplitCount() + taskStatus.getQueuedPartitionedDrivers(); }
private void runTest(FailureScenario failureScenario) throws Exception { AtomicLong lastActivityNanos = new AtomicLong(System.nanoTime()); TestingTaskResource testingTaskResource = new TestingTaskResource(lastActivityNanos, failureScenario); HttpRemoteTaskFactory httpRemoteTaskFactory = createHttpRemoteTaskFactory(testingTaskResource); RemoteTask remoteTask = createRemoteTask(httpRemoteTaskFactory); testingTaskResource.setInitialTaskInfo(remoteTask.getTaskInfo()); remoteTask.start(); waitUntilIdle(lastActivityNanos); httpRemoteTaskFactory.stop(); assertTrue(remoteTask.getTaskStatus().getState().isDone(), format("TaskStatus is not in a done state: %s", remoteTask.getTaskStatus())); ErrorCode actualErrorCode = getOnlyElement(remoteTask.getTaskStatus().getFailures()).getErrorCode(); switch (failureScenario) { case TASK_MISMATCH: case TASK_MISMATCH_WHEN_VERSION_IS_HIGH: assertTrue(remoteTask.getTaskInfo().getTaskStatus().getState().isDone(), format("TaskInfo is not in a done state: %s", remoteTask.getTaskInfo())); assertEquals(actualErrorCode, REMOTE_TASK_MISMATCH.toErrorCode()); break; case REJECTED_EXECUTION: // for a rejection to occur, the http client must be shutdown, which means we will not be able to ge the final task info assertEquals(actualErrorCode, REMOTE_TASK_ERROR.toErrorCode()); break; default: throw new UnsupportedOperationException(); } }
synchronized void updateTaskInfo(TaskInfo newValue) { boolean updated = taskInfo.setIf(newValue, oldValue -> { TaskStatus oldTaskStatus = oldValue.getTaskStatus(); TaskStatus newTaskStatus = newValue.getTaskStatus(); if (oldTaskStatus.getState().isDone()) { // never update if the task has reached a terminal state return false; } // don't update to an older version (same version is ok) return newTaskStatus.getVersion() >= oldTaskStatus.getVersion(); }); if (updated && newValue.getTaskStatus().getState().isDone()) { finalTaskInfo.compareAndSet(Optional.empty(), Optional.of(newValue)); stop(); } }
public static TaskStatus initialTaskStatus(TaskId taskId, URI location, String nodeId) { return new TaskStatus( taskId, "", MIN_VERSION, PLANNED, location, nodeId, ImmutableSet.of(), ImmutableList.of(), 0, 0, false, new DataSize(0, BYTE), new DataSize(0, BYTE), new DataSize(0, BYTE), 0, new Duration(0, MILLISECONDS)); }
public void failAbandonedTasks() { DateTime now = DateTime.now(); DateTime oldestAllowedHeartbeat = now.minus(clientTimeout.toMillis()); for (SqlTask sqlTask : tasks.asMap().values()) { try { TaskInfo taskInfo = sqlTask.getTaskInfo(); TaskStatus taskStatus = taskInfo.getTaskStatus(); if (taskStatus.getState().isDone()) { continue; } DateTime lastHeartbeat = taskInfo.getLastHeartbeat(); if (lastHeartbeat != null && lastHeartbeat.isBefore(oldestAllowedHeartbeat)) { log.info("Failing abandoned task %s", taskStatus.getTaskId()); sqlTask.failed(new PrestoException(ABANDONED_TASK, format("Task %s has not been accessed since %s: currentTime %s", taskStatus.getTaskId(), lastHeartbeat, now))); } } catch (RuntimeException e) { log.warn(e, "Error while inspecting age of task %s", sqlTask.getTaskId()); } } }
private Optional<QueryFailureInfo> createQueryFailureInfo(ExecutionFailureInfo failureInfo, Optional<StageInfo> outputStage) { if (failureInfo == null) { return Optional.empty(); } Optional<TaskInfo> failedTask = outputStage.flatMap(QueryMonitor::findFailedTask); return Optional.of(new QueryFailureInfo( failureInfo.getErrorCode(), Optional.ofNullable(failureInfo.getType()), Optional.ofNullable(failureInfo.getMessage()), failedTask.map(task -> task.getTaskStatus().getTaskId().toString()), failedTask.map(task -> task.getTaskStatus().getSelf().getHost()), executionFailureInfoCodec.toJson(failureInfo))); }
@Override public synchronized void stateChanged(TaskStatus newStatus) { long currentUserMemory = newStatus.getMemoryReservation().toBytes(); long currentSystemMemory = newStatus.getSystemMemoryReservation().toBytes(); long currentTotalMemory = currentUserMemory + currentSystemMemory; long deltaUserMemoryInBytes = currentUserMemory - previousUserMemory; long deltaTotalMemoryInBytes = currentTotalMemory - (previousUserMemory + previousSystemMemory); previousUserMemory = currentUserMemory; previousSystemMemory = currentSystemMemory; stateMachine.updateMemoryUsage(deltaUserMemoryInBytes, deltaTotalMemoryInBytes, currentTotalMemory); } }