org.apache.hadoop.mapreduce.task java code examples

@Override
public TaskAttemptContext newTask(Configuration c, TaskAttemptID t) {
  return new TaskAttemptContextImpl(c, t);
}

@Override
public JobContext createJobContext(Configuration conf,
                  JobID jobId) {
 return new JobContextImpl(conf instanceof JobConf? new JobConf(conf) : conf,
     jobId);
}

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  JobContext jobContext;
  TaskAttemptContext taskContext;
  try {
    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
        + String.format("%" + (6 - Integer.toString(1).length()) + "s", " ").replace(" ", "0")
        + Integer.toString(1)
        + "_0");
    jobContext = new JobContextImpl(this.configuration, new JobID());
    taskContext = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
    this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(taskContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
  jobContext.getCredentials().addAll(this.credentials);
  Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
  if (currentUserCreds != null) {
    jobContext.getCredentials().addAll(currentUserCreds);
  }
  // finalize HDFS output format
  if (this.outputCommitter != null) {
    this.outputCommitter.commitJob(jobContext);
  }
}

@Test
public void testCloneMapContext() throws Exception {
 TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
 TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);
 MapContext<IntWritable, IntWritable, IntWritable, IntWritable> mapContext =
 new MapContextImpl<IntWritable, IntWritable, IntWritable, IntWritable>(
   conf, taskAttemptid, null, null, null, null, null);
 Mapper<IntWritable, IntWritable, IntWritable, IntWritable>.Context mapperContext = 
  new WrappedMapper<IntWritable, IntWritable, IntWritable, IntWritable>().getMapContext(
    mapContext);
 ContextFactory.cloneMapContext(mapperContext, conf, null, null);
}

@Override
public org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf,
                                        org.apache.hadoop.mapreduce.TaskAttemptID taskId) {
 return new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(
     conf instanceof JobConf? new JobConf(conf) : conf,
     taskId);
}

  this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
  this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
  this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
} catch (Exception e) {
  throw new RuntimeException(e);

@Override
public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job) {
 return new JobContextImpl(job.getConfiguration(), job.getJobID());
}

 /**
  * Returns a TaskAttemptContext instance created from the given parameters.
  * @param job an instance of o.a.h.mapreduce.Job
  * @param taskId an identifier for the task attempt id. Should be parsable by
  *               {@link TaskAttemptID#forName(String)}
  * @return a concrete TaskAttemptContext instance of o.a.h.mapreduce.TaskAttemptContext
  */
 @Override
 @SuppressWarnings("unchecked")
 public <T, J> T createTestTaskAttemptContext(J job, String taskId) {
  Job j = (Job)job;
  return (T)new TaskAttemptContextImpl(j.getConfiguration(), TaskAttemptID.forName(taskId));
 }
}

public HadoopElementIterator(final HadoopGraph graph) {
  try {
    this.graph = graph;
    final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration());
    final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration);
    if (inputFormat instanceof FileInputFormat) {
      final Storage storage = FileSystemStorage.open(configuration);
      if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION))
        return; // there is no input location and thus, no data (empty graph)
      if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent())
        return; // there is no data at the input location (empty graph)
      configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get());
    }
    final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1)));
    for (final InputSplit split : splits) {
      this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID())));
    }
  } catch (final Exception e) {
    throw new IllegalStateException(e.getMessage(), e);
  }
}

@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
  // only gather base statistics for FileInputFormats
  if (!(mapreduceInputFormat instanceof FileInputFormat)) {
    return null;
  }
  JobContext jobContext = new JobContextImpl(configuration, null);
  final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
      (FileBaseStatistics) cachedStats : null;
  try {
    final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(jobContext);
    return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
  } catch (IOException ioex) {
    if (LOG.isWarnEnabled()) {
      LOG.warn("Could not determine statistics due to an io error: "
          + ioex.getMessage());
    }
  } catch (Throwable t) {
    if (LOG.isErrorEnabled()) {
      LOG.error("Unexpected problem while getting the file statistics: "
          + t.getMessage(), t);
    }
  }
  // no statistics available
  return null;
}

@Override
public void open(HadoopInputSplit split) throws IOException {
  TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID());
  try {
    this.recordReader = this.hCatInputFormat
        .createRecordReader(split.getHadoopInputSplit(), context);
    this.recordReader.initialize(split.getHadoopInputSplit(), context);
  } catch (InterruptedException e) {
    throw new IOException("Could not create RecordReader.", e);
  } finally {
    this.fetched = false;
  }
}

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
    throws IOException {
  configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  List<InputSplit> splits;
  try {
    splits = this.hCatInputFormat.getSplits(jobContext);
  } catch (InterruptedException e) {
    throw new IOException("Could not get Splits.", e);
  }
  HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
  for (int i = 0; i < hadoopInputSplits.length; i++){
    hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
  }
  return hadoopInputSplits;
}

@Override
public void open(HadoopInputSplit split) throws IOException {
  // enforce sequential open() calls
  synchronized (OPEN_MUTEX) {
    TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID());
    try {
      this.recordReader = this.mapreduceInputFormat
          .createRecordReader(split.getHadoopInputSplit(), context);
      this.recordReader.initialize(split.getHadoopInputSplit(), context);
    } catch (InterruptedException e) {
      throw new IOException("Could not create RecordReader.", e);
    } finally {
      this.fetched = false;
    }
  }
}

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits)
    throws IOException {
  configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
  JobContext jobContext = new JobContextImpl(configuration, new JobID());
  jobContext.getCredentials().addAll(this.credentials);
  Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
  if (currentUserCreds != null) {
    jobContext.getCredentials().addAll(currentUserCreds);
  }
  List<org.apache.hadoop.mapreduce.InputSplit> splits;
  try {
    splits = this.mapreduceInputFormat.getSplits(jobContext);
  } catch (InterruptedException e) {
    throw new IOException("Could not get Splits.", e);
  }
  HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
  for (int i = 0; i < hadoopInputSplits.length; i++) {
    hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
  }
  return hadoopInputSplits;
}

/**
 * Create a new task attempt context.
 * @param conf config
 * @return a new context
 */
private TaskAttemptContext taskAttempt(Configuration conf) {
 return new TaskAttemptContextImpl(conf, taskAttemptID);
}

@Before
public void setUp() throws Exception {
 conf = new Configuration();
 jobId = new JobID("test", 1);
 jobContext = new JobContextImpl(conf, jobId);
}

@Test
public void testRecordReader()
  throws Exception {
 List<String> paths = Lists.newArrayList("/path1", "/path2");
 GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);
 GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
 RecordReader<LongWritable, Text> recordReader =
   inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1,
   TaskType.MAP, 1, 1)));
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");
 Assert.assertFalse(recordReader.nextKeyValue());
}

@Test public void testRecordReader() {
 List<KafkaWritable>
   serRecords =
   expectedRecords.stream()
     .map((consumerRecord) -> new KafkaWritable(consumerRecord.partition(),
       consumerRecord.offset(),
       consumerRecord.timestamp(),
       consumerRecord.value(), consumerRecord.key()))
     .collect(Collectors.toList());
 KafkaRecordReader recordReader = new KafkaRecordReader();
 TaskAttemptContext context = new TaskAttemptContextImpl(this.conf, new TaskAttemptID());
 recordReader.initialize(new KafkaInputSplit(currentTopic, 0, 50L, 100L, null), context);
 for (int i = 50; i < 100; ++i) {
  KafkaWritable record = new KafkaWritable();
  Assert.assertTrue(recordReader.next(null, record));
  Assert.assertEquals(serRecords.get(i), record);
 }
 recordReader.close();
}

 private static SortedSet<byte[]> readFileToSearch(final Configuration conf,
   final FileSystem fs, final LocatedFileStatus keyFileStatus) throws IOException,
   InterruptedException {
  SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR);
  // Return entries that are flagged Counts.UNDEFINED in the value. Return the row. This is
  // what is missing.
  TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  try (SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader rr =
    new SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader()) {
   InputSplit is =
    new FileSplit(keyFileStatus.getPath(), 0, keyFileStatus.getLen(), new String [] {});
   rr.initialize(is, context);
   while (rr.nextKeyValue()) {
    rr.getCurrentKey();
    BytesWritable bw = rr.getCurrentValue();
    if (Verify.VerifyReducer.whichType(bw.getBytes()) == Verify.Counts.UNDEFINED) {
     byte[] key = new byte[rr.getCurrentKey().getLength()];
     System.arraycopy(rr.getCurrentKey().getBytes(), 0, key, 0, rr.getCurrentKey()
       .getLength());
     result.add(key);
    }
   }
  }
  return result;
 }
}

@Test
public void testInvalidVersionNumber() throws IOException {
 Job job = Job.getInstance();
 FileOutputFormat.setOutputPath(job, outDir);
 Configuration conf = job.getConfiguration();
 conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
 conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
 TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
 try {
  new FileOutputCommitter(outDir, tContext);
  fail("should've thrown an exception!");
 } catch (IOException e) {
  //test passed
 }
}

How to use org.apache.hadoop.mapreduce.task

Best Java code snippets using org.apache.hadoop.mapreduce.task (Showing top 20 results out of 2,178)