cascading.flow.hadoop.util.HadoopUtil.asJobConfInstance java code examples

public static void makeTempPath( Configuration conf ) throws IOException
 {
 // create job specific temporary directory in output path
 Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) );
 if( outputPath != null )
  {
  Path tmpDir = new Path( outputPath, TEMPORARY_PATH );
  FileSystem fileSys = tmpDir.getFileSystem( conf );
  if( !fileSys.exists( tmpDir ) && !fileSys.mkdirs( tmpDir ) )
   LOG.error( "mkdirs failed to create {}", tmpDir );
  }
 }

public static void makeTempPath( Configuration conf ) throws IOException
 {
 // create job specific temporary directory in output path
 Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) );
 if( outputPath != null )
  {
  Path tmpDir = new Path( outputPath, TEMPORARY_PATH );
  FileSystem fileSys = tmpDir.getFileSystem( conf );
  if( !fileSys.exists( tmpDir ) && !fileSys.mkdirs( tmpDir ) )
   LOG.error( "mkdirs failed to create {}", tmpDir );
  }
 }

static void setWorkOutputPath( Configuration conf, Path outputDir )
 {
 outputDir = new Path( asJobConfInstance( conf ).getWorkingDirectory(), outputDir );
 conf.set( "mapred.work.output.dir", outputDir.toString() );
 }

static void setWorkOutputPath( Configuration conf, Path outputDir )
 {
 outputDir = new Path( asJobConfInstance( conf ).getWorkingDirectory(), outputDir );
 conf.set( "mapred.work.output.dir", outputDir.toString() );
 }

private static Path getTaskOutputPath( Configuration conf )
 {
 String taskId = conf.get( "mapred.task.id", conf.get( "mapreduce.task.id" ) );
 Path p = new Path( FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId );
 try
  {
  FileSystem fs = p.getFileSystem( conf );
  return p.makeQualified( fs );
  }
 catch( IOException ie )
  {
  return p;
  }
 }

private static Path getTaskOutputPath( Configuration conf )
 {
 String taskId = conf.get( "mapred.task.id", conf.get( "mapreduce.task.id" ) );
 Path p = new Path( FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId );
 try
  {
  FileSystem fs = p.getFileSystem( conf );
  return p.makeQualified( fs );
  }
 catch( IOException ie )
  {
  return p;
  }
 }

@Override
public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf )
 {
 JobConf jobConf = asJobConfInstance( conf );
 String paths = jobConf.get( "mapred.input.dir", "" );
 if( hasZippedFiles( paths ) )
  throw new IllegalStateException( "cannot read zip files: " + paths );
 conf.setBoolean( "mapred.mapper.new-api", false );
 conf.setClass( "mapred.input.format.class", TextInputFormat.class, InputFormat.class );
 }

@Override
public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf )
 {
 JobConf jobConf = asJobConfInstance( conf );
 String paths = jobConf.get( "mapred.input.dir", "" );
 if( hasZippedFiles( paths ) )
  throw new IllegalStateException( "cannot read zip files: " + paths );
 conf.setBoolean( "mapred.mapper.new-api", false );
 conf.setClass( "mapred.input.format.class", TextInputFormat.class, InputFormat.class );
 }

protected static void verifyNoDuplicates( Configuration conf )
 {
 Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) );
 Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) );
 for( Path inputPath : inputPaths )
  {
  if( !paths.add( inputPath ) )
   throw new TapException( "may not add duplicate paths, found: " + inputPath );
  }
 }

protected static void verifyNoDuplicates( Configuration conf )
 {
 Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) );
 Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) );
 for( Path inputPath : inputPaths )
  {
  if( !paths.add( inputPath ) )
   throw new TapException( "may not add duplicate paths, found: " + inputPath );
  }
 }

private RecordReader makeReader( int currentSplit ) throws IOException
 {
 LOG.debug( "reading split: {}", currentSplit );
 Reporter reporter = Reporter.NULL;
 if( flowProcess instanceof MapRed )
  reporter = ( (MapRed) flowProcess ).getReporter(); // may return Reporter.NULL
 return inputFormat.getRecordReader( splits[ currentSplit ], asJobConfInstance( conf ), reporter );
 }

private RecordReader makeReader( int currentSplit ) throws IOException
 {
 LOG.debug( "reading split: {}", currentSplit );
 Reporter reporter = Reporter.NULL;
 if( flowProcess instanceof MapRed )
  reporter = ( (MapRed) flowProcess ).getReporter(); // may return Reporter.NULL
 return inputFormat.getRecordReader( splits[ currentSplit ], asJobConfInstance( conf ), reporter );
 }

public static void writeSuccessMarker( Configuration conf ) throws IOException
 {
 writeSuccessMarker( conf, FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ) );
 }

public static void writeSuccessMarker( Configuration conf ) throws IOException
 {
 writeSuccessMarker( conf, FileOutputFormat.getOutputPath( asJobConfInstance( conf ) ) );
 }

@SuppressWarnings("unchecked")
@Override
public void configure(Configuration config) {
  this.jobConf = HadoopUtil.asJobConfInstance(FlinkConfigConverter.toHadoopConfig(config));
  // set the correct class loader
  // not necessary for Flink versions >= 0.10 but we set this anyway to be on the safe side
  jobConf.setClassLoader(this.getClass().getClassLoader());
  this.mapredInputFormat = jobConf.getInputFormat();
  if (this.mapredInputFormat instanceof JobConfigurable) {
    ((JobConfigurable) this.mapredInputFormat).configure(jobConf);
  }
}

protected void initialize() throws IOException
 {
 tap.sinkConfInit( flowProcess, conf );
 OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat();
 // todo: use OutputCommitter class
 isFileOutputFormat = outputFormat instanceof FileOutputFormat;
 if( isFileOutputFormat )
  {
  Hadoop18TapUtil.setupJob( conf );
  Hadoop18TapUtil.setupTask( conf );
  int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) );
  long localSequence = sequence == -1 ? 0 : sequence;
  if( prefix != null )
   filename = String.format( filenamePattern, prefix, "/", partition, localSequence );
  else
   filename = String.format( filenamePattern, "", "", partition, localSequence );
  }
 LOG.info( "creating path: {}", filename );
 writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() );
 }

/**
 * May only be called once. should only be called if not in a flow
 *
 * @param conf
 */
public static void cleanupJob( Configuration conf ) throws IOException
 {
 if( HadoopUtil.isInflow( conf ) )
  return;
 Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) );
 cleanTempPath( conf, outputPath );
 }

/**
 * May only be called once. should only be called if not in a flow
 *
 * @param conf
 */
public static void cleanupJob( Configuration conf ) throws IOException
 {
 if( HadoopUtil.isInflow( conf ) )
  return;
 Path outputPath = FileOutputFormat.getOutputPath( asJobConfInstance( conf ) );
 cleanTempPath( conf, outputPath );
 }

private void initialize() throws IOException
 {
 // prevent collisions of configuration properties set client side if now cluster side
 String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) );
 if( property == null )
  {
  // default behavior is to accumulate paths, so remove any set prior
  conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2
  tap.sourceConfInit( flowProcess, conf );
  }
 JobConf jobConf = asJobConfInstance( conf );
 inputFormat = jobConf.getInputFormat();
 if( inputFormat instanceof JobConfigurable )
  ( (JobConfigurable) inputFormat ).configure( jobConf );
 // do not test for existence, let hadoop decide how to handle the given path
 // this delegates globbing to the inputformat on split generation.
 splits = inputFormat.getSplits( jobConf, 1 );
 if( splits.length == 0 )
  complete = true;
 }

private void initialize() throws IOException
 {
 // prevent collisions of configuration properties set client side if now cluster side
 String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) );
 if( property == null )
  {
  // default behavior is to accumulate paths, so remove any set prior
  conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2
  tap.sourceConfInit( flowProcess, conf );
  }
 JobConf jobConf = asJobConfInstance( conf );
 inputFormat = jobConf.getInputFormat();
 if( inputFormat instanceof JobConfigurable )
  ( (JobConfigurable) inputFormat ).configure( jobConf );
 // do not test for existence, let hadoop decide how to handle the given path
 // this delegates globbing to the inputformat on split generation.
 splits = inputFormat.getSplits( jobConf, 1 );
 if( splits.length == 0 )
  complete = true;
 }

How to use asJobConfInstancemethodin cascading.flow.hadoop.util.HadoopUtil

Best Java code snippets using cascading.flow.hadoop.util.HadoopUtil.asJobConfInstance (Showing top 20 results out of 315)

How to use
asJobConfInstance
method
in
cascading.flow.hadoop.util.HadoopUtil