public static void makeTempPath( Configuration conf ) throws IOException { // create job specific temporary directory in output path Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ); if( outputPath != null ) { Path tmpDir = new Path( outputPath, TEMPORARY_PATH ); FileSystem fileSys = tmpDir.getFileSystem( conf ); if( !fileSys.exists( tmpDir ) && !fileSys.mkdirs( tmpDir ) ) LOG.error( "mkdirs failed to create {}", tmpDir ); } }
public static void makeTempPath( Configuration conf ) throws IOException { // create job specific temporary directory in output path Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ); if( outputPath != null ) { Path tmpDir = new Path( outputPath, TEMPORARY_PATH ); FileSystem fileSys = tmpDir.getFileSystem( conf ); if( !fileSys.exists( tmpDir ) && !fileSys.mkdirs( tmpDir ) ) LOG.error( "mkdirs failed to create {}", tmpDir ); } }
static void setWorkOutputPath( Configuration conf, Path outputDir ) { outputDir = new Path( asJobConfInstance( conf ).getWorkingDirectory(), outputDir ); conf.set( "mapred.work.output.dir", outputDir.toString() ); }
static void setWorkOutputPath( Configuration conf, Path outputDir ) { outputDir = new Path( asJobConfInstance( conf ).getWorkingDirectory(), outputDir ); conf.set( "mapred.work.output.dir", outputDir.toString() ); }
private static Path getTaskOutputPath( Configuration conf ) { String taskId = conf.get( "mapred.task.id", conf.get( "mapreduce.task.id" ) ); Path p = new Path( FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId ); try { FileSystem fs = p.getFileSystem( conf ); return p.makeQualified( fs ); } catch( IOException ie ) { return p; } }
private static Path getTaskOutputPath( Configuration conf ) { String taskId = conf.get( "mapred.task.id", conf.get( "mapreduce.task.id" ) ); Path p = new Path( FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId ); try { FileSystem fs = p.getFileSystem( conf ); return p.makeQualified( fs ); } catch( IOException ie ) { return p; } }
@Override public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf ) { JobConf jobConf = asJobConfInstance( conf ); String paths = jobConf.get( "mapred.input.dir", "" ); if( hasZippedFiles( paths ) ) throw new IllegalStateException( "cannot read zip files: " + paths ); conf.setBoolean( "mapred.mapper.new-api", false ); conf.setClass( "mapred.input.format.class", TextInputFormat.class, InputFormat.class ); }
@Override public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf ) { JobConf jobConf = asJobConfInstance( conf ); String paths = jobConf.get( "mapred.input.dir", "" ); if( hasZippedFiles( paths ) ) throw new IllegalStateException( "cannot read zip files: " + paths ); conf.setBoolean( "mapred.mapper.new-api", false ); conf.setClass( "mapred.input.format.class", TextInputFormat.class, InputFormat.class ); }
protected static void verifyNoDuplicates( Configuration conf ) { Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) ); Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) ); for( Path inputPath : inputPaths ) { if( !paths.add( inputPath ) ) throw new TapException( "may not add duplicate paths, found: " + inputPath ); } }
protected static void verifyNoDuplicates( Configuration conf ) { Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) ); Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) ); for( Path inputPath : inputPaths ) { if( !paths.add( inputPath ) ) throw new TapException( "may not add duplicate paths, found: " + inputPath ); } }
private RecordReader makeReader( int currentSplit ) throws IOException { LOG.debug( "reading split: {}", currentSplit ); Reporter reporter = Reporter.NULL; if( flowProcess instanceof MapRed ) reporter = ( (MapRed) flowProcess ).getReporter(); // may return Reporter.NULL return inputFormat.getRecordReader( splits[ currentSplit ], asJobConfInstance( conf ), reporter ); }
private RecordReader makeReader( int currentSplit ) throws IOException { LOG.debug( "reading split: {}", currentSplit ); Reporter reporter = Reporter.NULL; if( flowProcess instanceof MapRed ) reporter = ( (MapRed) flowProcess ).getReporter(); // may return Reporter.NULL return inputFormat.getRecordReader( splits[ currentSplit ], asJobConfInstance( conf ), reporter ); }
public static void writeSuccessMarker( Configuration conf ) throws IOException { writeSuccessMarker( conf, FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ) ); }
public static void writeSuccessMarker( Configuration conf ) throws IOException { writeSuccessMarker( conf, FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ) ); }
@SuppressWarnings("unchecked") @Override public void configure(Configuration config) { this.jobConf = HadoopUtil.asJobConfInstance(FlinkConfigConverter.toHadoopConfig(config)); // set the correct class loader // not necessary for Flink versions >= 0.10 but we set this anyway to be on the safe side jobConf.setClassLoader(this.getClass().getClassLoader()); this.mapredInputFormat = jobConf.getInputFormat(); if (this.mapredInputFormat instanceof JobConfigurable) { ((JobConfigurable) this.mapredInputFormat).configure(jobConf); } }
protected void initialize() throws IOException { tap.sinkConfInit( flowProcess, conf ); OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat(); // todo: use OutputCommitter class isFileOutputFormat = outputFormat instanceof FileOutputFormat; if( isFileOutputFormat ) { Hadoop18TapUtil.setupJob( conf ); Hadoop18TapUtil.setupTask( conf ); int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) ); long localSequence = sequence == -1 ? 0 : sequence; if( prefix != null ) filename = String.format( filenamePattern, prefix, "/", partition, localSequence ); else filename = String.format( filenamePattern, "", "", partition, localSequence ); } LOG.info( "creating path: {}", filename ); writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() ); }
/** * May only be called once. should only be called if not in a flow * * @param conf */ public static void cleanupJob( Configuration conf ) throws IOException { if( HadoopUtil.isInflow( conf ) ) return; Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ); cleanTempPath( conf, outputPath ); }
/** * May only be called once. should only be called if not in a flow * * @param conf */ public static void cleanupJob( Configuration conf ) throws IOException { if( HadoopUtil.isInflow( conf ) ) return; Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ); cleanTempPath( conf, outputPath ); }
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }