/** * Constructor. * * @param ctx Task attempt context. * @param delegate Delegate. * @throws IOException If failed. */ private TestOutputCommitter(TaskAttemptContext ctx, FileOutputCommitter delegate) throws IOException { super(FileOutputFormat.getOutputPath(ctx), ctx); this.delegate = delegate; }
/** {@inheritDoc} */ @Override public void abortTask(TaskAttemptContext taskCtx) throws IOException { delegate.abortTask(taskCtx); } }
/** {@inheritDoc} */ @Override public void setupTask(TaskAttemptContext taskCtx) throws IOException { delegate.setupTask(taskCtx); }
private void configureInputAndOutputPaths(Job job) throws IOException { for (Path inputPath : getInputPaths()) { FileInputFormat.addInputPath(job, inputPath); } //MR output path must not exist when MR job starts, so delete if exists. this.tmpFs.delete(this.dataset.outputTmpPath(), true); FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath()); }
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
static void setWorkOutputPath(TaskAttemptContext context) throws IOException { String outputPath = context.getConfiguration().get("mapred.output.dir"); //we need to do this to get the task path and set it for mapred implementation //since it can't be done automatically because of mapreduce->mapred abstraction if (outputPath != null) context.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); } }
/** * Gets the target output stream where the Avro container file should be written. * * @param context The task attempt context. * @return The target output stream. */ protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException { Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(), getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT)); return path.getFileSystem(context.getConfiguration()).create(path); }
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath + codec.getDefaultExtension())); } }
private void outputDimRangeInfo() throws IOException, InterruptedException { if (col != null && minValue != null) { // output written to baseDir/colName/colName.dci-r-00000 (etc) String dimRangeFileName = col.getIdentity() + "/" + col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX; mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(minValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(maxValue.getBytes(StandardCharsets.UTF_8)), dimRangeFileName); logger.info("write dimension range info for col : " + col.getName() + " minValue:" + minValue + " maxValue:" + maxValue); } }
/** {@inheritDoc} */ @Override public void commitTask(TaskAttemptContext taskCtx) throws IOException { delegate.commitTask(taskCtx); }
/** {@inheritDoc} */ @Override public void setupJob(JobContext jobCtx) throws IOException { try { while (setupLockFile.exists()) Thread.sleep(50); } catch (InterruptedException ignored) { throw new IOException("Interrupted."); } delegate.setupJob(jobCtx); }
/** {@inheritDoc} */ @Override public boolean needsTaskCommit(TaskAttemptContext taskCtx) throws IOException { return delegate.needsTaskCommit(taskCtx); }
/** {@inheritDoc} */ @Override public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext ctx) throws IOException { return new TestOutputCommitter(ctx, (FileOutputCommitter)super.getOutputCommitter(ctx)); } }
/** * Gets fully configured Job instance. * * @param input Input file name. * @param output Output directory name. * @return Job instance. * @throws IOException If fails. */ public static Job getJob(String input, String output) throws IOException { Job job = Job.getInstance(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); setTasksClasses(job, true, true, true, false); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(HadoopWordCount2.class); return job; }
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance()); int numberOfReducers = reducerMapping.getTotalReducerNum(); logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers); if (numberOfReducers > 250) { throw new IllegalArgumentException( "The max reducer number for FactDistinctColumnsJob is 250, but now it is " + numberOfReducers + ", decrease 'kylin.engine.mr.uhc-reducer-count'"); } job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
@Override public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { if (this.committer == null) { this.committer = new AvroKeyCompactorOutputCommitter(FileOutputFormat.getOutputPath(context), context); } return this.committer; }
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); if (cuboidID != baseCuboid && !recommendCuboids.contains(cuboidID)) { return; } String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(key, value, generateFileName(baseOutputPath)); }
protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(c, jobName); initJob(scans, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); job.waitForCompletion(true); assertTrue(job.isSuccessful()); LOG.info("After map/reduce completion - job " + jobName); }
@Override public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidID = rowKeySplitter.split(key.getBytes()); Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID); int fullKeySize = buildKey(cuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(newKeyBuf.array(), 0, fullKeySize); String baseOutputPath = PathNameCuboidOld; if (cuboidID == baseCuboid) { baseOutputPath = PathNameCuboidBase; } mos.write(outputKey, value, generateFileName(baseOutputPath)); }