org.apache.hadoop.mapreduce.TaskAttemptID.<init> java code examples

@Override
public TaskAttemptID createTaskAttemptID() {
 return new TaskAttemptID("", 0, TaskType.MAP, 0, 0);
}

@Override
public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) {
 TaskAttemptID taskAttemptId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
 if (taskAttemptId == null) {
  // If the caller is not within a mapper/reducer (if reading from the table via CliDriver),
  // then TaskAttemptID.forname() may return NULL. Fall back to using default constructor.
  taskAttemptId = new TaskAttemptID();
 }
 return new TaskAttemptContextImpl(conf, taskAttemptId) {
  @Override
  public void progress() {
   progressable.progress();
  }
 };
}

@Override
public void open(HadoopInputSplit split) throws IOException {
  TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID());
  try {
    this.recordReader = this.hCatInputFormat
        .createRecordReader(split.getHadoopInputSplit(), context);
    this.recordReader.initialize(split.getHadoopInputSplit(), context);
  } catch (InterruptedException e) {
    throw new IOException("Could not create RecordReader.", e);
  } finally {
    this.fetched = false;
  }
}

@Override
public void open(HadoopInputSplit split) throws IOException {
  // enforce sequential open() calls
  synchronized (OPEN_MUTEX) {
    TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID());
    try {
      this.recordReader = this.mapreduceInputFormat
          .createRecordReader(split.getHadoopInputSplit(), context);
      this.recordReader.initialize(split.getHadoopInputSplit(), context);
    } catch (InterruptedException e) {
      throw new IOException("Could not create RecordReader.", e);
    } finally {
      this.fetched = false;
    }
  }
}

@Override
public TaskAttemptID newTaskAttemptID(JobID jobId, boolean isMap, int taskId, int id) {
 return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), isMap ?  TaskType.MAP : TaskType.REDUCE, taskId, id);
}

public ParquetRecordWriterWrapper(
  final OutputFormat<Void, ParquetHiveRecord> realOutputFormat,
  final JobConf jobConf,
  final String name,
  final Progressable progress, Properties tableProperties) throws
    IOException {
 try {
  // create a TaskInputOutputContext
  TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id"));
  if (taskAttemptID == null) {
   taskAttemptID = new TaskAttemptID();
  }
  taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID);
  LOG.info("initialize serde with table properties.");
  initializeSerProperties(taskContext, tableProperties);
  LOG.info("creating real writer to write at " + name);
  realWriter =
      ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name));
  LOG.info("real writer: " + realWriter);
 } catch (final InterruptedException e) {
  throw new IOException(e);
 }
}

public ParquetRecordWriterWrapper(
  final OutputFormat<Void, ParquetHiveRecord> realOutputFormat,
  final JobConf jobConf,
  final String name,
  final Progressable progress, Properties tableProperties) throws
    IOException {
 try {
  // create a TaskInputOutputContext
  TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id"));
  if (taskAttemptID == null) {
   taskAttemptID = new TaskAttemptID();
  }
  taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID);
  LOG.info("initialize serde with table properties.");
  initializeSerProperties(taskContext, tableProperties);
  LOG.info("creating real writer to write at " + name);
  realWriter =
      ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name));
  LOG.info("real writer: " + realWriter);
 } catch (final InterruptedException e) {
  throw new IOException(e);
 }
}

private void mockTaskAttemptContext(String indexType) {
 TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2), 2);
 when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId);
 when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
}

@Test
public void testRecordReader()
  throws Exception {
 List<String> paths = Lists.newArrayList("/path1", "/path2");
 GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);
 GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
 RecordReader<LongWritable, Text> recordReader =
   inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1,
   TaskType.MAP, 1, 1)));
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");
 Assert.assertFalse(recordReader.nextKeyValue());
}

taskAttemptID = new TaskAttemptID();

@Override
public Iterator<HCatRecord> read() throws HCatException {
 HCatInputFormat inpFmt = new HCatInputFormat();
 RecordReader<WritableComparable, HCatRecord> rr;
 try {
  TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID());
  rr = inpFmt.createRecordReader(split, cntxt);
  rr.initialize(split, cntxt);
 } catch (IOException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 } catch (InterruptedException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 }
 return new HCatRecordItr(rr);
}

@Test public void testRecordReader() {
 List<KafkaWritable>
   serRecords =
   expectedRecords.stream()
     .map((consumerRecord) -> new KafkaWritable(consumerRecord.partition(),
       consumerRecord.offset(),
       consumerRecord.timestamp(),
       consumerRecord.value(), consumerRecord.key()))
     .collect(Collectors.toList());
 KafkaRecordReader recordReader = new KafkaRecordReader();
 TaskAttemptContext context = new TaskAttemptContextImpl(this.conf, new TaskAttemptID());
 recordReader.initialize(new KafkaInputSplit(currentTopic, 0, 50L, 100L, null), context);
 for (int i = 50; i < 100; ++i) {
  KafkaWritable record = new KafkaWritable();
  Assert.assertTrue(recordReader.next(null, record));
  Assert.assertEquals(serRecords.get(i), record);
 }
 recordReader.close();
}

taskAttemptID = new TaskAttemptID();

for (int i = 0; i < splits.size(); i++) {
 TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf,
   new TaskAttemptID());
 RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
 rr.initialize(splits.get(i), tac);

 private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
   final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
   InterruptedException {
  SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR);
  // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
  // what is missing.
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
    new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
   InputSplit is =
    new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
   rr.initialize(is, context);
   while (rr.nextKeyValue()) {
    rr.getCurrentKey();
    BytesWritable bw = rr.getCurrentValue();
    if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
     byte[] key = new byte[rr.getCurrentKey().getLength()];
     System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
       .getLength());
     result.add(key);
    }
   }
  }
  return result;
 }
}

HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
  conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
OutputCommitter committer = null;
RecordWriter<WritableComparable<?>, HCatRecord> writer;

public HadoopElementIterator(final HadoopGraph graph) {
  try {
    this.graph = graph;
    final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
    final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
    if (inputFormat instanceof FileInputFormat) {
      final Storage storage = FileSystemStorage.open(configuration);
      if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
        return; // there is no input location and thus, no data (empty graph)
      if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
        return; // there is no data at the input location (empty graph)
      configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
    }
    final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
    for (final InputSplit split : splits) {
      this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
    }
  } catch (final Exception e) {
    throw new IllegalStateException(e.getMessage(), e);
  }
}

public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
  throws IOException {
 // Set up context
 File testFile = new File(testFileUrl.getFile());
 long testFileSize = testFile.length();
 Path testFilePath = new Path(testFile.getAbsolutePath());
 Configuration conf = new Configuration();
 conf.setInt("io.file.buffer.size", 1);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // Gather the records returned by the record reader
 ArrayList<String> records = new ArrayList<String>();
 long offset = 0;
 while (offset < testFileSize) {
  FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  while (reader.nextKeyValue()) {
   records.add(reader.getCurrentValue().toString());
  }
  offset += splitSize;
 }
 return records;
}

@Before
public void before() throws Exception {
 Configuration conf = new Configuration();
 conf.set(CamusJob.KAFKA_CLIENT_NAME, "DummyClientName");
 TaskAttemptContext context = null;
 try {
  Class<?> taskAttemptContextImplClass = Class.forName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl");
  context =
    (TaskAttemptContext) taskAttemptContextImplClass.getDeclaredConstructor(Configuration.class,
      TaskAttemptID.class).newInstance(conf, new TaskAttemptID());
 } catch (ClassNotFoundException e) {
  context =
    (TaskAttemptContext) Class.forName("org.apache.hadoop.mapreduce.TaskAttemptContext")
      .getDeclaredConstructor(Configuration.class, TaskAttemptID.class).newInstance(conf, new TaskAttemptID());
 }
 EtlRequest request = new EtlRequest();
 request.setOffset(0);
 request.setLatestOffset(1);
 request.setURI(new URI("http://localhost:8888"));
 this.kafkaReader = new KafkaReader(new EtlInputFormat(), context, request, 100, 100);
}

@Test
public void testCloneMapContext() throws Exception {
 TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
 TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);
 MapContext<IntWritable, IntWritable, IntWritable, IntWritable> mapContext =
 new MapContextImpl<IntWritable, IntWritable, IntWritable, IntWritable>(
   conf, taskAttemptid, null, null, null, null, null);
 Mapper<IntWritable, IntWritable, IntWritable, IntWritable>.Context mapperContext = 
  new WrappedMapper<IntWritable, IntWritable, IntWritable, IntWritable>().getMapContext(
    mapContext);
 ContextFactory.cloneMapContext(mapperContext, conf, null, null);
}

Javadoc

Constructs a TaskId object from given parts.

Popular methods of TaskAttemptID

getTaskID
Returns the TaskID object that this task attempt belongs to
toString
forName
Construct a TaskAttemptID object from given string
getId
getJobID
Returns the JobID object that this task attempt belongs to
getTaskType
Returns the TaskType of the TaskAttemptID
appendTo
Add the unique string to the StringBuilder
equals
readFields
write
hashCode
isMap
Returns whether this TaskID is a map ID

Popular in Java

Reading from database using SQL prepared statement
addToBackStack (FragmentTransaction)
getSharedPreferences (Context)
requestLocationUpdates (LocationManager)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Github Copilot alternatives

How to use org.apache.hadoop.mapreduce.TaskAttemptIDconstructor

Best Java code snippets using org.apache.hadoop.mapreduce.TaskAttemptID.<init> (Showing top 20 results out of 909)

How to use
org.apache.hadoop.mapreduce.TaskAttemptID
constructor