org.apache.hadoop.mapreduce.TaskAttemptID java code examples

Refine search

public ParquetRecordWriterWrapper(
  final OutputFormat<Void, ParquetHiveRecord> realOutputFormat,
  final JobConf jobConf,
  final String name,
  final Progressable progress, Properties tableProperties) throws
    IOException {
 try {
  // create a TaskInputOutputContext
  TaskAttemptID taskAttemptID = TaskAttemptID.forName(jobConf.get("mapred.task.id"));
  if (taskAttemptID == null) {
   taskAttemptID = new TaskAttemptID();
  }
  taskContext = ContextUtil.newTaskAttemptContext(jobConf, taskAttemptID);
  LOG.info("initialize serde with table properties.");
  initializeSerProperties(taskContext, tableProperties);
  LOG.info("creating real writer to write at " + name);
  realWriter =
      ((ParquetOutputFormat) realOutputFormat).getRecordWriter(taskContext, new Path(name));
  LOG.info("real writer: " + realWriter);
 } catch (final InterruptedException e) {
  throw new IOException(e);
 }
}

static synchronized String getOutputName(TaskAttemptContext context) {
 return context.getConfiguration().get("mapreduce.output.basename", "part")
   + "-" + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId());
}

try (Closer closer = Closer.create()) {
 this.isSpeculativeEnabled =
   isSpeculativeExecutionEnabled(HadoopUtils.getStateFromConf(context.getConfiguration()).getProperties());
 this.fs = FileSystem.get(context.getConfiguration());
 this.taskStateStore =
   new FsStateStore<>(this.fs, FileOutputFormat.getOutputPath(context).toUri().getPath(), TaskState.class);
 String jobStateFileName = context.getConfiguration().get(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME);
 boolean foundStateFile = false;
 for (Path dcPath : DistributedCache.getLocalCacheFiles(context.getConfiguration())) {
  if (dcPath.getName().equals(jobStateFileName)) {
   SerializationUtils.deserializeStateFromInputStream(
     closer.register(new FileInputStream(dcPath.toUri().getPath())), this.jobState);
   foundStateFile = true;
   break;
 configuration.set(entry.getKey(), entry.getValue().unwrapped().toString());
  configuration.get(ConfigurationKeys.METRICS_ENABLED_KEY, ConfigurationKeys.DEFAULT_METRICS_ENABLED))) {
 this.jobMetrics = Optional.of(JobMetrics.get(this.jobState));
 this.jobMetrics.get()
   .startMetricReportingWithFileSuffix(HadoopUtils.getStateFromConf(configuration),
     context.getTaskAttemptID().toString());

@Override
public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) {
 TaskAttemptID taskAttemptId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));
 if (taskAttemptId == null) {
  // If the caller is not within a mapper/reducer (if reading from the table via CliDriver),
  // then TaskAttemptID.forname() may return NULL. Fall back to using default constructor.
  taskAttemptId = new TaskAttemptID();
 }
 return new TaskAttemptContextImpl(conf, taskAttemptId) {
  @Override
  public void progress() {
   progressable.progress();
  }
 };
}

/**
 * @param ctx Context for IO operations.
 */
public HadoopV2Context(HadoopV2TaskContext ctx) {
  super(ctx.jobConf(), ctx.jobContext().getJobID());
  taskAttemptID = ctx.attemptId();
  conf.set(MRJobConfig.ID, taskAttemptID.getJobID().toString());
  conf.set(MRJobConfig.TASK_ID, taskAttemptID.getTaskID().toString());
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptID.toString());
  output = ctx.output();
  input = ctx.input();
  this.ctx = ctx;
}

FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
  version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
File expectedFile = new File(new Path(committer.getWorkPath(), partFile)
  .toString());
assertFalse("task temp dir still exists", expectedFile.exists());

public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
  throws IOException {
 // Set up context
 File testFile = new File(testFileUrl.getFile());
 long testFileSize = testFile.length();
 Path testFilePath = new Path(testFile.getAbsolutePath());
 Configuration conf = new Configuration();
 conf.setInt("io.file.buffer.size", 1);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // Gather the records returned by the record reader
 ArrayList<String> records = new ArrayList<String>();
 long offset = 0;
 while (offset < testFileSize) {
  FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  while (reader.nextKeyValue()) {
   records.add(reader.getCurrentValue().toString());
  }
  offset += splitSize;
 }
 return records;
}

private void writeThenReadByRecordReader(int intervalRecordCount,
           int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec)
 throws IOException, InterruptedException {
 Path testDir = new Path(System.getProperty("test.tmp.dir", ".")
  + "/mapred/testsmallfirstsplit");
 Path testFile = new Path(testDir, "test_rcfile");
 fs.delete(testFile, true);
 Configuration cloneConf = new Configuration(conf);
 RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
 cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
 Configuration jonconf = new Configuration(cloneConf);
 jonconf.set("mapred.input.dir", testDir.toString());
 JobContext context = new Job(jonconf);
 HiveConf.setLongVar(context.getConfiguration(),
 for (int i = 0; i < splits.size(); i++) {
  TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf,
    new TaskAttemptID());
  RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
  rr.initialize(splits.get(i), tac);

@Override
public void setup(Context context) throws IOException {
 Configuration conf = context.getConfiguration();
 verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
 filesGroup = conf.get(CONF_FILES_GROUP);
 filesUser = conf.get(CONF_FILES_USER);
 filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
 outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
 inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
 inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
 outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
  context.getCounter(c).increment(0);
 if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) {
  testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0);
  testing.injectedFailureCount = context.getTaskAttemptID().getId();

FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(
  FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION,
  version);
conf.setBoolean(
  FileOutputCommitter.FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED,
  taskCleanup);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
  new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
File taskOutputDir = new File(Path.getPathWithoutSchemeAndAuthority(
  committer.getWorkPath()).toString());

 throws FileNotFoundException, IllegalArgumentException, IOException {
Configuration conf = new Configuration(false);
conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
TaskAttemptID tid0 =
 new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3);
Path p = spy(new Path("/user/hadoop/out"));
Path a = new Path("hdfs://user/hadoop/out");
Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0");
Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1");
Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0");
when(context.getTaskAttemptID()).thenReturn(tid0);
when(context.getConfiguration()).thenReturn(conf);

this.configuration.set("mapreduce.output.basename", "tmp");
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
    + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
    + Integer.toString(taskNumber + 1)
    + "_0");
this.configuration.set("mapred.task.id", taskAttemptID.toString());
this.configuration.setInt("mapred.task.partition", taskNumber + 1);
this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
  this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
  this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
  this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
this.context.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
  this.context.getCredentials().addAll(currentUserCreds);
  this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());

@Test
public void testGetOutputCommitter() {
 try {
  TaskAttemptContext context = new TaskAttemptContextImpl(new Configuration(),
   new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
  context.getConfiguration().set("mapred.output.dir", "/out");
  Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
 } catch (IOException e) {
  LOG.error("Exception encountered ", e);
  Assert.fail("Unable to get output committer");
 }
}

@Override
protected void doSetup(Context context) throws IOException {
  super.bindCurrentConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
  CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
  cubeConfig = cube.getConfig();
  baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
  baseCuboidRowCountInMappers = Lists.newLinkedList();
  output = conf.get(BatchConstants.CFG_OUTPUT_PATH);
  samplingPercentage = Integer
      .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
  taskId = context.getTaskAttemptID().getTaskID().getId();
  cuboidHLLMap = Maps.newHashMap();
}

@Override
protected void setup(Context context) throws IOException,
  InterruptedException {
 Configuration conf = context.getConfiguration();
 this.deduplicate = conf.getBoolean("dedup", true);
 if (deduplicate)
  this.threshold = conf.getFloat("threshold", 1E-5f);
 this.columnBoundaries = SpatialSite.getReduceSpace(context.getConfiguration());
 Path outputPath = DelaunayTriangulationOutputFormat.getOutputPath(context);
 Path finalPath = new Path(outputPath, String.format("m-%05d.final", context.getTaskAttemptID().getTaskID().getId()));
 FileSystem fs = finalPath.getFileSystem(context.getConfiguration());
 writer = new TriangulationRecordWriter(fs, null, finalPath, context);
}

public static Path getTaskPath(JobID jobID, TaskAttemptID taskAttemptID, Path workingDirectory)
{
 return new Path(getJobPath(jobID, workingDirectory), taskAttemptID.toString());
}

private String generateKey(TaskAttemptContext context) throws IOException {
 String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
 if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE.
  throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID());
 }
 OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
 return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation();
}

public static Path makeTmpPath(
  final Path basePath,
  final FileSystem fs,
  final DataSegment segmentTemplate,
  final TaskAttemptID taskAttemptID,
  DataSegmentPusher dataSegmentPusher
)
{
 return new Path(
   prependFSIfNullScheme(fs, basePath),
   StringUtils.format(
     "./%s.%d",
     dataSegmentPusher.makeIndexPathName(segmentTemplate, JobHelper.INDEX_ZIP),
     taskAttemptID.getId()
   )
 );
}

private void testSplitRecordsForFile(Configuration conf,
  long firstSplitLength, long testFileSize, Path testFilePath)
  throws IOException {
 conf.setInt(org.apache.hadoop.mapreduce.lib.input.
   LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
 assertTrue("unexpected test data at " + testFilePath,
   testFileSize > firstSplitLength);
 String delimiter = conf.get("textinputformat.record.delimiter");
 byte[] recordDelimiterBytes = null;
 if (null != delimiter) {
  recordDelimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

@Test
public void testRecordReader()
  throws Exception {
 List<String> paths = Lists.newArrayList("/path1", "/path2");
 GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);
 GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
 RecordReader<LongWritable, Text> recordReader =
   inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1,
   TaskType.MAP, 1, 1)));
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");
 recordReader.nextKeyValue();
 Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
 Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");
 Assert.assertFalse(recordReader.nextKeyValue());
}

Javadoc

TaskAttemptID represents the immutable and unique identifier for a task attempt. Each task attempt is one particular instance of a Map or Reduce Task identified by its TaskID. TaskAttemptID consists of 2 parts. First part is the TaskID, that this TaskAttemptID belongs to. Second part is the task attempt number.
An example TaskAttemptID is : attempt_200707121733_0003_m_000005_0 , which represents the zeroth task attempt for the fifth map task in the third job running at the jobtracker started at 200707121733.

Applications should never construct or parse TaskAttemptID strings , but rather use appropriate constructors or #forName(String) method.

Most used methods

<init>
Constructs a TaskAttemptID object from given TaskID.
getTaskID
Returns the TaskID object that this task attempt belongs to
toString
forName
Construct a TaskAttemptID object from given string
getId
getJobID
Returns the JobID object that this task attempt belongs to
getTaskType
Returns the TaskType of the TaskAttemptID
appendTo
Add the unique string to the StringBuilder
equals
readFields
write
hashCode

Popular in Java

Finding current android device location
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getApplicationContext (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top plugins for Android Studio

How to useTaskAttemptID in org.apache.hadoop.mapreduce

Best Java code snippets using org.apache.hadoop.mapreduce.TaskAttemptID (Showing top 20 results out of 1,548)

Refine search

How to use
TaskAttemptID
in
org.apache.hadoop.mapreduce