@Override public void sinkConfInit(FlowProcess<Object> flowProcess, Object conf) { initInnerTapIfNotSetFromFlowProcess(flowProcess); actualTap.sinkConfInit(flowProcess, conf); }
@Override public void sinkConfInit( FlowProcess<? extends Config> flowProcess, Config conf ) { original.sinkConfInit( flowProcess, conf ); }
@Override public void sinkConfInit( FlowProcess<? extends TConfig> flowProcess, TConfig conf ) { original.sinkConfInit( processProvider.apply( flowProcess ), configProvider.apply( conf ) ); }
private void initFromTraps( FlowProcess<JobConf> flowProcess, JobConf conf, Map<String, Tap> traps ) { if( !traps.isEmpty() ) { JobConf trapConf = HadoopUtil.copyJobConf( conf ); for( Tap tap : traps.values() ) tap.sinkConfInit( flowProcess, trapConf ); } }
private void initFromTraps( FlowProcess<JobConf> flowProcess, JobConf conf, Map<String, Tap> traps ) { if( !traps.isEmpty() ) { JobConf trapConf = HadoopUtil.copyJobConf( conf ); for( Tap tap : traps.values() ) tap.sinkConfInit( flowProcess, trapConf ); } }
@Override public void sinkConfInit(FlowProcess<Object> flowProcess, Object conf) { initInnerTapIfNotSetFromFlowProcess(flowProcess); actualTap.sinkConfInit(flowProcess, conf); }
protected void initFromTraps( FlowNode flowNode, FlowProcess<? extends Configuration> flowProcess, Configuration conf ) { Map<String, Tap> traps = flowNode.getTrapMap(); if( !traps.isEmpty() ) { JobConf trapConf = new JobConf( conf ); for( Tap tap : traps.values() ) { tap.sinkConfInit( flowProcess, trapConf ); setLocalMode( conf, trapConf, tap ); } } }
protected void initFromTraps( FlowNode flowNode, FlowProcess<? extends Configuration> flowProcess, Configuration conf ) { Map<String, Tap> traps = flowNode.getTrapMap(); if( !traps.isEmpty() ) { JobConf trapConf = new JobConf( conf ); for( Tap tap : traps.values() ) { tap.sinkConfInit( flowProcess, trapConf ); setLocalMode( conf, trapConf, tap ); } } }
protected void initTaps( FlowProcess<Properties> flowProcess, Properties conf, Set<Tap> taps, boolean isSink ) { if( !taps.isEmpty() ) { for( Tap tap : taps ) { Properties confCopy = flowProcess.copyConfig( conf ); tapProperties.put( tap, confCopy ); // todo: store the diff, not the copy if( isSink ) tap.sinkConfInit( flowProcess, confCopy ); else tap.sourceConfInit( flowProcess, confCopy ); } } }
private void initialize() throws IOException { tap.sinkConfInit(hadoopFlowProcess, conf); OutputFormat outputFormat = conf.getOutputFormat(); LOG.info("Output format class is: " + outputFormat.getClass().toString()); writer = outputFormat.getRecordWriter(null, conf, tap.getIdentifier(), Reporter.NULL); sinkCall.setOutput(this); }
private void bridge( FlowProcess flowProcess, Object conf ) { childConfigs = new ArrayList<>(); for( int i = 0; i < getTaps().length; i++ ) { Tap tap = getTaps()[ i ]; Object newConfig = flowProcess.copyConfig( conf ); tap.sinkConfInit( flowProcess, newConfig ); childConfigs.add( flowProcess.diffConfigIntoMap( conf, newConfig ) ); } }
protected void initFromSink( FlowProcess<JobConf> flowProcess, JobConf conf ) { // init sink first so tempSink can take precedence if( getSink() != null ) getSink().sinkConfInit( flowProcess, conf ); Class<? extends OutputFormat> outputFormat = conf.getClass( "mapred.output.format.class", null, OutputFormat.class ); boolean isFileOutputFormat = false; if( outputFormat != null ) isFileOutputFormat = FileOutputFormat.class.isAssignableFrom( outputFormat ); Path outputPath = FileOutputFormat.getOutputPath( conf ); // if no output path is set, we need to substitute an alternative if the OutputFormat is file based // PartitionTap won't set the output, but will set an OutputFormat // MultiSinkTap won't set the output or set the OutputFormat // Non file based OutputFormats don't have an output path, but do have an OutputFormat set (JDBCTap..) if( outputPath == null && ( isFileOutputFormat || outputFormat == null ) ) tempSink = new TempHfs( conf, "tmp:/" + new Path( getSink().getIdentifier() ).toUri().getPath(), true ); // tempSink exists because sink is writeDirect if( tempSink != null ) tempSink.sinkConfInit( flowProcess, conf ); }
protected void initFromSink( FlowProcess<JobConf> flowProcess, JobConf conf ) { // init sink first so tempSink can take precedence if( getSink() != null ) getSink().sinkConfInit( flowProcess, conf ); Class<? extends OutputFormat> outputFormat = conf.getClass( "mapred.output.format.class", null, OutputFormat.class ); boolean isFileOutputFormat = false; if( outputFormat != null ) isFileOutputFormat = FileOutputFormat.class.isAssignableFrom( outputFormat ); Path outputPath = FileOutputFormat.getOutputPath( conf ); // if no output path is set, we need to substitute an alternative if the OutputFormat is file based // PartitionTap won't set the output, but will set an OutputFormat // MultiSinkTap won't set the output or set the OutputFormat // Non file based OutputFormats don't have an output path, but do have an OutputFormat set (JDBCTap..) if( outputPath == null && ( isFileOutputFormat || outputFormat == null ) ) tempSink = new TempHfs( conf, "tmp:/" + new Path( getSink().getIdentifier() ).toUri().getPath(), true ); // tempSink exists because sink is writeDirect if( tempSink != null ) tempSink.sinkConfInit( flowProcess, conf ); }
@Override public void sinkConfInit( FlowProcess<? extends Configuration> process, Configuration conf ) { Path qualifiedPath = new Path( getFullIdentifier( conf ) ); HadoopUtil.setOutputPath( conf, qualifiedPath ); super.sinkConfInit( process, conf ); makeLocal( conf, qualifiedPath, "forcing job to stand-alone mode, via sink: " ); TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow }
@Override public void sinkConfInit( FlowProcess<? extends Configuration> process, Configuration conf ) { Path qualifiedPath = new Path( getFullIdentifier( conf ) ); HadoopUtil.setOutputPath( conf, qualifiedPath ); super.sinkConfInit( process, conf ); makeLocal( conf, qualifiedPath, "forcing job to stand-alone mode, via sink: " ); TupleSerialization.setSerializations( conf ); // allows Hfs to be used independent of Flow }
@Override public void sinkConfInit( FlowProcess<? extends Configuration> process, Configuration conf ) { if( !isSink() ) return; if( username == null ) DBConfiguration.configureDB( conf, driverClassName, connectionUrl ); else DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password ); super.sinkConfInit( process, conf ); }
protected void initialize() throws IOException { tap.sinkConfInit( flowProcess, conf ); OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat(); // todo: use OutputCommitter class isFileOutputFormat = outputFormat instanceof FileOutputFormat; if( isFileOutputFormat ) { Hadoop18TapUtil.setupJob( conf ); Hadoop18TapUtil.setupTask( conf ); int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) ); long localSequence = sequence == -1 ? 0 : sequence; if( prefix != null ) filename = String.format( filenamePattern, prefix, "/", partition, localSequence ); else filename = String.format( filenamePattern, "", "", partition, localSequence ); } LOG.info( "creating path: {}", filename ); writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() ); }
protected void initialize() throws IOException { tap.sinkConfInit( flowProcess, conf ); OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat(); // todo: use OutputCommitter class isFileOutputFormat = outputFormat instanceof FileOutputFormat; if( isFileOutputFormat ) { Hadoop18TapUtil.setupJob( conf ); Hadoop18TapUtil.setupTask( conf ); int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) ); long localSequence = sequence == -1 ? 0 : sequence; if( prefix != null ) filename = String.format( filenamePattern, prefix, "/", partition, localSequence ); else filename = String.format( filenamePattern, "", "", partition, localSequence ); } LOG.info( "creating path: {}", filename ); writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() ); }
private void writeToHadoopTap(Tap<?, ?, ?> tap) throws IOException { @SuppressWarnings("unchecked") Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) tap; JobConf conf = new JobConf(); HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf); hadoopTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); }
private void writeToLocalTap(Tap<?, ?, ?> tap) throws IOException { @SuppressWarnings("unchecked") Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) tap; Properties conf = new Properties(); LocalFlowProcess flowProcess = new LocalFlowProcess(conf); flowProcess.setStepStats(new LocalStepStats(new NullFlowStep(), NullClientState.INSTANCE)); localTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = localTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); localTap.commitResource(conf); }