public static void updateSparkBytesWrittenMetrics(Logger log, FileSystem fs, Path[] commitPaths) { AtomicLong bytesWritten = new AtomicLong(); Arrays.stream(commitPaths).parallel().forEach(path -> { try { bytesWritten.addAndGet(fs.getFileStatus(path).getLen()); } catch (IOException e) { log.debug("Unable to collect stats for file: " + path + " output metrics may be inaccurate", e); } }); if (bytesWritten.get() > 0) { TaskContext.get().taskMetrics().outputMetrics().setBytesWritten(bytesWritten.get()); } } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
public static void updateSparkRecordsWrittenMetrics(long numRows) { TaskContext taskContext = TaskContext.get(); if (taskContext != null && numRows > 0) { taskContext.taskMetrics().outputMetrics().setRecordsWritten(numRows); } }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
/** * Increase the peak execution memory for current task. */ protected void incPeakExecutionMemory(long size) { TaskContext.get().taskMetrics().incPeakExecutionMemory(size); }
/** * Increase the peak execution memory for current task. */ public void incPeakExecutionMemory(long size) { TaskContext.get().taskMetrics().incPeakExecutionMemory(size); }
/** * Increase the peak execution memory for current task. */ public void incPeakExecutionMemory(long size) { TaskContext.get().taskMetrics().incPeakExecutionMemory(size); }
@Override public Void apply(Iterator<T> iterator) { writer.write(TaskContext.get(), iterator); return null; } }
@Override public Void apply(Iterator<T> iterator) { writer.write(TaskContext.get(), iterator); return null; } }
public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer, BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) { super(hoodieConfig.getWriteBufferLimitBytes(), producer, Optional.of(consumer), bufferedIteratorTransform); this.sparkThreadTaskContext = TaskContext.get(); }
public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer, BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) { super(hoodieConfig.getWriteBufferLimitBytes(), producer, Optional.of(consumer), bufferedIteratorTransform); this.sparkThreadTaskContext = TaskContext.get(); }
@Override public Tuple apply(Tuple2<Text, Tuple> v1) { if (!initialized) { long partitionId = TaskContext.get().partitionId(); Configuration jobConf = PigMapReduce.sJobConfInternal.get(); jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId)); jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId)); initialized = true; } if (sparkCounters != null && disableCounter == false) { sparkCounters.increment(counterGroupName, counterName, 1L); } return v1._2(); }
/** * Increase the peak execution memory for current task. */ protected void incPeakExecutionMemory(long size) { TaskContext.get().taskMetrics().incPeakExecutionMemory(size); }
this.keySchema = keySchema; this.valueSchema = valueSchema; final TaskContext taskContext = TaskContext.get();
this.keySchema = keySchema; this.valueSchema = valueSchema; final TaskContext taskContext = TaskContext.get();
@Override protected boolean writeErrorToHDFS(int limit, String taskId) { if (command.getPersistStderr()) { int tipId = TaskContext.get().attemptNumber(); return tipId < command.getLogFilesLimit(); } return false; } }
this.keySchema = keySchema; this.valueSchema = valueSchema; final TaskContext taskContext = TaskContext.get();