@Override public void sourceConfInit(FlowProcess<Object> flowProcess, Object conf) { initInnerTapIfNotSetFromFlowProcess(flowProcess); actualTap.sourceConfInit(flowProcess, conf); }
@Override public void sourceConfInit( FlowProcess<? extends Config> flowProcess, Config conf ) { original.sourceConfInit( flowProcess, conf ); }
@Override public void sourceConfInit( FlowProcess<? extends TConfig> flowProcess, TConfig conf ) { original.sourceConfInit( processProvider.apply( flowProcess ), configProvider.apply( conf ) ); }
@Override public void sourceConfInit( FlowProcess<? extends Config> process, Config conf ) { for( Tap tap : getTaps() ) tap.sourceConfInit( process, conf ); }
@Override public void sourceConfInit(FlowProcess<Object> flowProcess, Object conf) { initInnerTapIfNotSetFromFlowProcess(flowProcess); actualTap.sourceConfInit(flowProcess, conf); }
private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException { @SuppressWarnings("unchecked") Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source; JobConf conf = new JobConf(); FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf); hadoopTap.sourceConfInit(flowProcess, conf); return hadoopTap.openForRead(flowProcess); }
private TupleEntryIterator getLocalTupleEntryIterator() throws IOException { @SuppressWarnings("unchecked") Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source; Properties properties = new Properties(); FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties); localTap.sourceConfInit(flowProcess, properties); return localTap.openForRead(flowProcess); }
protected void initTaps( FlowProcess<Properties> flowProcess, Properties conf, Set<Tap> taps, boolean isSink ) { if( !taps.isEmpty() ) { for( Tap tap : taps ) { Properties confCopy = flowProcess.copyConfig( conf ); tapProperties.put( tap, confCopy ); // todo: store the diff, not the copy if( isSink ) tap.sinkConfInit( flowProcess, confCopy ); else tap.sourceConfInit( flowProcess, confCopy ); } } }
protected void sourceConfInitComplete( FlowProcess<? extends Configuration> process, Configuration conf ) { super.sourceConfInit( process, conf ); TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow // use CombineFileInputFormat if that is enabled handleCombineFileInputFormat( conf ); }
@Override public void sourceConfInit( FlowProcess<? extends Configuration> process, Configuration conf ) { if( username == null ) DBConfiguration.configureDB( conf, driverClassName, connectionUrl ); else DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password ); super.sourceConfInit( process, conf ); }
@Override public void sourceConfInit(FlowProcess<JobConf> process, JobConf conf) { // a hack for MultiInputFormat to see that there is a child format FileInputFormat.setInputPaths( conf, getPath() ); if(quorumNames != null) { conf.set("hbase.zookeeper.quorum", quorumNames); } LOG.debug("sourcing from table: {}", tableName); TableInputFormat.setTableName(conf, tableName); super.sourceConfInit(process, conf); }
protected void sourceConfInitComplete( FlowProcess<? extends Configuration> process, Configuration conf ) { super.sourceConfInit( process, conf ); TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow // use CombineFileInputFormat if that is enabled handleCombineFileInputFormat( conf ); }
throw new IllegalStateException( "tap may not have null identifier: " + tap.toString() ); tap.sourceConfInit( flowProcess, current ); throw new IllegalStateException( "tap may not have null identifier: " + tap.toString() ); tap.sourceConfInit( flowProcess, current );
tap.sourceConfInit( flowProcess, streamedJobs[ i ] ); tap.sourceConfInit( flowProcess, accumulatedJob );
tap.sourceConfInit( flowProcess, streamedJobs[ i ] ); tap.sourceConfInit( flowProcess, accumulatedJob );
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) { Tap tap = this.getSingle(node.getSourceTaps()); JobConf tapConfig = new JobConf(this.getNodeConfig(node)); tap.sourceConfInit(flowProcess, tapConfig); tapConfig.set( "cascading.step.source", Tap.id( tap ) ); Fields outFields = tap.getSourceFields(); registerKryoTypes(outFields); JobConf sourceConfig = new JobConf(this.getNodeConfig(node)); MultiInputFormat.addInputFormat(sourceConfig, tapConfig); DataSet<Tuple> src = env .createInput(new TapInputFormat(node), new TupleTypeInfo(outFields)) .name(tap.getIdentifier()) .setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig))); return src; }
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }
@Test public void testCombinedHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileLower ); Hfs sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileUpper ); // create a CombinedHfs instance on these files Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>( sourceLower, sourceUpper ); FlowProcess<JobConf> process = getPlatform().getFlowProcess(); JobConf conf = process.getConfigCopy(); // set the combine flag conf.setBoolean( HfsProps.COMBINE_INPUT_FILES, true ); conf.set( "cascading.flow.platform", "hadoop" ); // only supported on mr based platforms // test the input format and the split source.sourceConfInit( process, conf ); InputFormat inputFormat = conf.getInputFormat(); assertEquals( Hfs.CombinedInputFormat.class, inputFormat.getClass() ); InputSplit[] splits = inputFormat.getSplits( conf, 1 ); assertEquals( 1, splits.length ); validateLength( source.openForRead( process ), 10 ); }
@Test public void testCombinedHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Hfs sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileLower ); Hfs sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputFileUpper ); // create a CombinedHfs instance on these files Tap source = new MultiSourceTap<Hfs, JobConf, RecordReader>( sourceLower, sourceUpper ); FlowProcess<JobConf> process = getPlatform().getFlowProcess(); JobConf conf = process.getConfigCopy(); // set the combine flag conf.setBoolean( HfsProps.COMBINE_INPUT_FILES, true ); conf.set( "cascading.flow.platform", "hadoop" ); // only supported on mr based platforms // test the input format and the split source.sourceConfInit( process, conf ); InputFormat inputFormat = conf.getInputFormat(); assertEquals( Hfs.CombinedInputFormat.class, inputFormat.getClass() ); InputSplit[] splits = inputFormat.getSplits( conf, 1 ); assertEquals( 1, splits.length ); validateLength( source.openForRead( process ), 10 ); }