cascading.tap.Tap.sourceConfInit java code examples

@Override
public void sourceConfInit(FlowProcess<Object> flowProcess, Object conf) {
  initInnerTapIfNotSetFromFlowProcess(flowProcess);
  actualTap.sourceConfInit(flowProcess, conf);
}

@Override
public void sourceConfInit( FlowProcess<? extends Config> flowProcess, Config conf )
 {
 original.sourceConfInit( flowProcess, conf );
 }

@Override
public void sourceConfInit( FlowProcess<? extends TConfig> flowProcess, TConfig conf )
 {
 original.sourceConfInit( processProvider.apply( flowProcess ), configProvider.apply( conf ) );
 }

@Override
public void sourceConfInit( FlowProcess<? extends Config> process, Config conf )
 {
 for( Tap tap : getTaps() )
  tap.sourceConfInit( process, conf );
 }

@Override
public void sourceConfInit(FlowProcess<Object> flowProcess, Object conf) {
  initInnerTapIfNotSetFromFlowProcess(flowProcess);
  actualTap.sourceConfInit(flowProcess, conf);
}

private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException {
 @SuppressWarnings("unchecked")
 Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source;
 JobConf conf = new JobConf();
 FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf);
 hadoopTap.sourceConfInit(flowProcess, conf);
 return hadoopTap.openForRead(flowProcess);
}

private TupleEntryIterator getLocalTupleEntryIterator() throws IOException {
 @SuppressWarnings("unchecked")
 Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source;
 Properties properties = new Properties();
 FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties);
 localTap.sourceConfInit(flowProcess, properties);
 return localTap.openForRead(flowProcess);
}

protected void initTaps( FlowProcess<Properties> flowProcess, Properties conf, Set<Tap> taps, boolean isSink )
 {
 if( !taps.isEmpty() )
  {
  for( Tap tap : taps )
   {
   Properties confCopy = flowProcess.copyConfig( conf );
   tapProperties.put( tap, confCopy ); // todo: store the diff, not the copy
   if( isSink )
    tap.sinkConfInit( flowProcess, confCopy );
   else
    tap.sourceConfInit( flowProcess, confCopy );
   }
  }
 }

protected void sourceConfInitComplete( FlowProcess<? extends Configuration> process, Configuration conf )
 {
 super.sourceConfInit( process, conf );
 TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow
 // use CombineFileInputFormat if that is enabled
 handleCombineFileInputFormat( conf );
 }

@Override
public void sourceConfInit( FlowProcess<? extends Configuration> process, Configuration conf )
 {
 if( username == null )
  DBConfiguration.configureDB( conf, driverClassName, connectionUrl );
 else
  DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password );
 super.sourceConfInit( process, conf );
 }

@Override
public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) {
 // a hack for MultiInputFormat to see that there is a child format
 FileInputFormat.setInputPaths( conf, getPath() );
 if(quorumNames != null) {
  conf.set("hbase.zookeeper.quorum", quorumNames);
 }
 LOG.debug("sourcing from table: {}", tableName);
 TableInputFormat.setTableName(conf, tableName);
 super.sourceConfInit(process, conf);
}

protected void sourceConfInitComplete( FlowProcess<? extends Configuration> process, Configuration conf )
 {
 super.sourceConfInit( process, conf );
 TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow
 // use CombineFileInputFormat if that is enabled
 handleCombineFileInputFormat( conf );
 }

 throw new IllegalStateException( "tap may not have null identifier: " + tap.toString() );
tap.sourceConfInit( flowProcess, current );
 throw new IllegalStateException( "tap may not have null identifier: " + tap.toString() );
tap.sourceConfInit( flowProcess, current );

tap.sourceConfInit( flowProcess, streamedJobs[ i ] );
tap.sourceConfInit( flowProcess, accumulatedJob );

tap.sourceConfInit( flowProcess, streamedJobs[ i ] );
tap.sourceConfInit( flowProcess, accumulatedJob );

private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {
  Tap tap = this.getSingle(node.getSourceTaps());
  JobConf tapConfig = new JobConf(this.getNodeConfig(node));
  tap.sourceConfInit(flowProcess, tapConfig);
  tapConfig.set( "cascading.step.source", Tap.id( tap ) );
  Fields outFields = tap.getSourceFields();
  registerKryoTypes(outFields);
  JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
  MultiInputFormat.addInputFormat(sourceConfig, tapConfig);
  DataSet<Tuple> src = env
      .createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
          .name(tap.getIdentifier())
          .setParallelism(dop)
          .withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));
  return src;
}

private void initialize() throws IOException
 {
 // prevent collisions of configuration properties set client side if now cluster side
 String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) );
 if( property == null )
  {
  // default behavior is to accumulate paths, so remove any set prior
  conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2
  tap.sourceConfInit( flowProcess, conf );
  }
 JobConf jobConf = asJobConfInstance( conf );
 inputFormat = jobConf.getInputFormat();
 if( inputFormat instanceof JobConfigurable )
  ( (JobConfigurable) inputFormat ).configure( jobConf );
 // do not test for existence, let hadoop decide how to handle the given path
 // this delegates globbing to the inputformat on split generation.
 splits = inputFormat.getSplits( jobConf, 1 );
 if( splits.length == 0 )
  complete = true;
 }

private void initialize() throws IOException
 {
 // prevent collisions of configuration properties set client side if now cluster side
 String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) );
 if( property == null )
  {
  // default behavior is to accumulate paths, so remove any set prior
  conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2
  tap.sourceConfInit( flowProcess, conf );
  }
 JobConf jobConf = asJobConfInstance( conf );
 inputFormat = jobConf.getInputFormat();
 if( inputFormat instanceof JobConfigurable )
  ( (JobConfigurable) inputFormat ).configure( jobConf );
 // do not test for existence, let hadoop decide how to handle the given path
 // this delegates globbing to the inputformat on split generation.
 splits = inputFormat.getSplits( jobConf, 1 );
 if( splits.length == 0 )
  complete = true;
 }

@Test
public void testCombinedHfs() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileLower );
 Hfs sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileUpper );
 // create a CombinedHfs instance on these files
 Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>( sourceLower, sourceUpper );
 FlowProcess<JobConf> process = getPlatform().getFlowProcess();
 JobConf conf = process.getConfigCopy();
 // set the combine flag
 conf.setBoolean( HfsProps.COMBINE_INPUT_FILES, true );
 conf.set( "cascading.flow.platform", "hadoop" ); // only supported on mr based platforms
 // test the input format and the split
 source.sourceConfInit( process, conf );
 InputFormat inputFormat = conf.getInputFormat();
 assertEquals( Hfs.CombinedInputFormat.class, inputFormat.getClass() );
 InputSplit[] splits = inputFormat.getSplits( conf, 1 );
 assertEquals( 1, splits.length );
 validateLength( source.openForRead( process ), 10 );
 }

@Test
public void testCombinedHfs() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileLower );
 Hfs sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileUpper );
 // create a CombinedHfs instance on these files
 Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>( sourceLower, sourceUpper );
 FlowProcess<JobConf> process = getPlatform().getFlowProcess();
 JobConf conf = process.getConfigCopy();
 // set the combine flag
 conf.setBoolean( HfsProps.COMBINE_INPUT_FILES, true );
 conf.set( "cascading.flow.platform", "hadoop" ); // only supported on mr based platforms
 // test the input format and the split
 source.sourceConfInit( process, conf );
 InputFormat inputFormat = conf.getInputFormat();
 assertEquals( Hfs.CombinedInputFormat.class, inputFormat.getClass() );
 InputSplit[] splits = inputFormat.getSplits( conf, 1 );
 assertEquals( 1, splits.length );
 validateLength( source.openForRead( process ), 10 );
 }

Javadoc

Method sourceConfInit initializes this instance as a source.

This method maybe called more than once if this Tap instance is used outside the scope of a cascading.flow.Flowinstance or if it participates in multiple times in a given Flow or across different Flows in a cascading.cascade.Cascade.

In the context of a Flow, it will be called after cascading.flow.FlowListener#onStarting(cascading.flow.Flow)

Note that no resources or services should be modified by this method.

Popular methods of Tap

Popular in Java

Finding current android device location
setContentView (Activity)
getExternalFilesDir (Context)
getSupportFragmentManager (FragmentActivity)
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Best IntelliJ plugins

How to use sourceConfInitmethodin cascading.tap.Tap

Best Java code snippets using cascading.tap.Tap.sourceConfInit (Showing top 20 results out of 315)

How to use
sourceConfInit
method
in
cascading.tap.Tap