org.apache.hadoop.mapreduce.lib.output java code examples

/**
 * Constructor.
 *
 * @param ctx Task attempt context.
 * @param delegate Delegate.
 * @throws IOException If failed.
 */
private TestOutputCommitter(TaskAttemptContext ctx, FileOutputCommitter delegate) throws IOException {
  super(FileOutputFormat.getOutputPath(ctx), ctx);
  this.delegate = delegate;
}

  /** {@inheritDoc} */
  @Override public void abortTask(TaskAttemptContext taskCtx) throws IOException {
    delegate.abortTask(taskCtx);
  }
}

/** {@inheritDoc} */
@Override public void setupTask(TaskAttemptContext taskCtx) throws IOException {
  delegate.setupTask(taskCtx);
}

private void configureInputAndOutputPaths(Job job) throws IOException {
 for (Path inputPath : getInputPaths()) {
  FileInputFormat.addInputPath(job, inputPath);
 }
 //MR output path must not exist when MR job starts, so delete if exists.
 this.tmpFs.delete(this.dataset.outputTmpPath(), true);
 FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath());
}

private void setupReducer(Path output, int numberOfReducers) throws IOException {
  job.setReducerClass(UHCDictionaryReducer.class);
  job.setPartitionerClass(UHCDictionaryPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  //prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

 static void setWorkOutputPath(TaskAttemptContext context) throws IOException {
  String outputPath = context.getConfiguration().get("mapred.output.dir");
  //we need to do this to get the task path and set it for mapred implementation
  //since it can't be done automatically because of mapreduce->mapred abstraction
  if (outputPath != null)
   context.getConfiguration().set("mapred.work.output.dir",
    new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString());
 }
}

/**
 * Gets the target output stream where the Avro container file should be written.
 *
 * @param context The task attempt context.
 * @return The target output stream.
 */
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
 Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(),
  getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT));
 return path.getFileSystem(context.getConfiguration()).create(path);
}

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException
{
 if (!FileOutputFormat.getCompressOutput(job)) {
  return fs.exists(inputPath);
 } else {
  Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
  CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
  return fs.exists(new Path(inputPath + codec.getDefaultExtension()));
 }
}

private void outputDimRangeInfo() throws IOException, InterruptedException {
  if (col != null && minValue != null) {
    // output written to baseDir/colName/colName.dci-r-00000 (etc)
    String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX;
    mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(),
        new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName);
    mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(),
        new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName);
    logger.info("write dimension range info for col : " + col.getName() + "  minValue:" + minValue
        + " maxValue:" + maxValue);
  }
}

/** {@inheritDoc} */
@Override public void commitTask(TaskAttemptContext taskCtx) throws IOException {
  delegate.commitTask(taskCtx);
}

/** {@inheritDoc} */
@Override public void setupJob(JobContext jobCtx) throws IOException {
  try {
    while (setupLockFile.exists())
      Thread.sleep(50);
  }
  catch (InterruptedException ignored) {
    throw new IOException("Interrupted.");
  }
  delegate.setupJob(jobCtx);
}

/** {@inheritDoc} */
@Override public boolean needsTaskCommit(TaskAttemptContext taskCtx) throws IOException {
  return delegate.needsTaskCommit(taskCtx);
}

  /** {@inheritDoc} */
  @Override public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext ctx)
    throws IOException {
    return new TestOutputCommitter(ctx, (FileOutputCommitter)super.getOutputCommitter(ctx));
  }
}

/**
 * Gets fully configured Job instance.
 *
 * @param input Input file name.
 * @param output Output directory name.
 * @return Job instance.
 * @throws IOException If fails.
 */
public static Job getJob(String input, String output) throws IOException {
  Job job = Job.getInstance();
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  setTasksClasses(job, true, true, true, false);
  FileInputFormat.setInputPaths(job, new Path(input));
  FileOutputFormat.setOutputPath(job, new Path(output));
  job.setJarByClass(HadoopWordCount2.class);
  return job;
}

private void setupReducer(Path output, CubeSegment cubeSeg)
    throws IOException {
  FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
  int numberOfReducers = reducerMapping.getTotalReducerNum();
  logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
  if (numberOfReducers > 250) {
    throw new IllegalArgumentException(
        "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
            + numberOfReducers
            + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
  }
  job.setReducerClass(FactDistinctColumnsReducer.class);
  job.setPartitionerClass(FactDistinctColumnPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  // make each reducer output to respective dir
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  // prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

@Override
public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
 if (this.committer == null) {
  this.committer = new AvroKeyCompactorOutputCommitter(FileOutputFormat.getOutputPath(context), context);
 }
 return this.committer;
}

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
  throws IOException
{
 if (!FileOutputFormat.getCompressOutput(job)) {
  return fileSystem.open(inputPath);
 } else {
  Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
  CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
  inputPath = new Path(inputPath + codec.getDefaultExtension());
  return codec.createInputStream(fileSystem.open(inputPath));
 }
}

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
  long cuboidID = rowKeySplitter.split(key.getBytes());
  if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) {
    return;
  }
  String baseOutputPath = PathNameCuboidOld;
  if (cuboidID == baseCuboid) {
    baseOutputPath = PathNameCuboidBase;
  }
  mos.write(key, value, generateFileName(baseOutputPath));
}

protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(c, jobName);
 initJob(scans, job);
 job.setReducerClass(ScanReducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 job.waitForCompletion(true);
 assertTrue(job.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
  long cuboidID = rowKeySplitter.split(key.getBytes());
  Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID);
  int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers());
  outputKey.set(newKeyBuf.array(), 0, fullKeySize);
  String baseOutputPath = PathNameCuboidOld;
  if (cuboidID == baseCuboid) {
    baseOutputPath = PathNameCuboidBase;
  }
  mos.write(outputKey, value, generateFileName(baseOutputPath));
}

How to use org.apache.hadoop.mapreduce.lib.output

Best Java code snippets using org.apache.hadoop.mapreduce.lib.output (Showing top 20 results out of 1,980)