org.apache.spark.TaskContext.get java code examples

 public static void updateSparkBytesWrittenMetrics(Logger log, FileSystem fs, Path[]
     commitPaths) {
  AtomicLong bytesWritten = new AtomicLong();
  Arrays.stream(commitPaths).parallel().forEach(path -> {
   try {
    bytesWritten.addAndGet(fs.getFileStatus(path).getLen());
   } catch (IOException e) {
    log.debug("Unable to collect stats for file: " + path + " output metrics may be inaccurate",
        e);
   }
  });
  if (bytesWritten.get() > 0) {
   TaskContext.get().taskMetrics().outputMetrics().setBytesWritten(bytesWritten.get());
  }
 }
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed
  // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for
  // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum".
  // When there're multiple attempts for a task, Hive will rely on the partitionId
  // to figure out if the data are duplicate or not when collecting the final outputs
  // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles)
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_").append(TaskContext.get().attemptNumber());

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed
  // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for
  // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum".
  // When there're multiple attempts for a task, Hive will rely on the partitionId
  // to figure out if the data are duplicate or not when collecting the final outputs
  // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles)
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_").append(TaskContext.get().attemptNumber());

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

public static void updateSparkRecordsWrittenMetrics(long numRows) {
 TaskContext taskContext = TaskContext.get();
 if (taskContext != null && numRows > 0) {
  taskContext.taskMetrics().outputMetrics().setRecordsWritten(numRows);
 }
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

/**
 * Increase the peak execution memory for current task.
 */
protected void incPeakExecutionMemory(long size) {
 TaskContext.get().taskMetrics().incPeakExecutionMemory(size);
}

/**
 * Increase the peak execution memory for current task.
 */
public void incPeakExecutionMemory(long size) {
 TaskContext.get().taskMetrics().incPeakExecutionMemory(size);
}

/**
 * Increase the peak execution memory for current task.
 */
public void incPeakExecutionMemory(long size) {
 TaskContext.get().taskMetrics().incPeakExecutionMemory(size);
}

  @Override
  public Void apply(Iterator<T> iterator) {
    writer.write(TaskContext.get(), iterator);
    return null;
  }
}

  @Override
  public Void apply(Iterator<T> iterator) {
    writer.write(TaskContext.get(), iterator);
    return null;
  }
}

public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig,
  BoundedInMemoryQueueProducer<I> producer,
  BoundedInMemoryQueueConsumer<O, E> consumer,
  Function<I, O> bufferedIteratorTransform) {
 super(hoodieConfig.getWriteBufferLimitBytes(), producer,
   Optional.of(consumer), bufferedIteratorTransform);
 this.sparkThreadTaskContext = TaskContext.get();
}

public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig,
  BoundedInMemoryQueueProducer<I> producer,
  BoundedInMemoryQueueConsumer<O, E> consumer,
  Function<I, O> bufferedIteratorTransform) {
 super(hoodieConfig.getWriteBufferLimitBytes(), producer,
   Optional.of(consumer), bufferedIteratorTransform);
 this.sparkThreadTaskContext = TaskContext.get();
}

@Override
public Tuple apply(Tuple2<Text, Tuple> v1) {
  if (!initialized) {
    long partitionId = TaskContext.get().partitionId();
    Configuration jobConf = PigMapReduce.sJobConfInternal.get();
    jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId));
    jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId));
    initialized = true;
  }
  if (sparkCounters != null && disableCounter == false) {
    sparkCounters.increment(counterGroupName, counterName, 1L);
  }
  return v1._2();
}

/**
 * Increase the peak execution memory for current task.
 */
protected void incPeakExecutionMemory(long size) {
 TaskContext.get().taskMetrics().incPeakExecutionMemory(size);
}

this.keySchema = keySchema;
this.valueSchema = valueSchema;
final TaskContext taskContext = TaskContext.get();

this.keySchema = keySchema;
this.valueSchema = valueSchema;
final TaskContext taskContext = TaskContext.get();

  @Override
  protected boolean writeErrorToHDFS(int limit, String taskId) {
    if (command.getPersistStderr()) {
      int tipId = TaskContext.get().attemptNumber();
      return tipId < command.getLogFilesLimit();
    }
    return false;
  }
}

this.keySchema = keySchema;
this.valueSchema = valueSchema;
final TaskContext taskContext = TaskContext.get();

Popular methods of TaskContext

Popular in Java

Parsing JSON documents to java classes using gson
getContentResolver (Context)
getExternalFilesDir (Context)
onCreateOptionsMenu (Activity)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Top 12 Jupyter Notebook extensions

How to use getmethodin org.apache.spark.TaskContext

Best Java code snippets using org.apache.spark.TaskContext.get (Showing top 20 results out of 315)

How to use
get
method
in
org.apache.spark.TaskContext