protected void initFromSink( FlowProcess<JobConf> flowProcess, JobConf conf ) { // init sink first so tempSink can take precedence if( getSink() != null ) getSink().sinkConfInit( flowProcess, conf ); Class<? extends OutputFormat> outputFormat = conf.getClass( "mapred.output.format.class", null, OutputFormat.class ); boolean isFileOutputFormat = false; if( outputFormat != null ) isFileOutputFormat = FileOutputFormat.class.isAssignableFrom( outputFormat ); Path outputPath = FileOutputFormat.getOutputPath( conf ); // if no output path is set, we need to substitute an alternative if the OutputFormat is file based // PartitionTap won't set the output, but will set an OutputFormat // MultiSinkTap won't set the output or set the OutputFormat // Non file based OutputFormats don't have an output path, but do have an OutputFormat set (JDBCTap..) if( outputPath == null && ( isFileOutputFormat || outputFormat == null ) ) tempSink = new TempHfs( conf, "tmp:/" + new Path( getSink().getIdentifier() ).toUri().getPath(), true ); // tempSink exists because sink is writeDirect if( tempSink != null ) tempSink.sinkConfInit( flowProcess, conf ); }
protected void initFromSink( FlowProcess<JobConf> flowProcess, JobConf conf ) { // init sink first so tempSink can take precedence if( getSink() != null ) getSink().sinkConfInit( flowProcess, conf ); Class<? extends OutputFormat> outputFormat = conf.getClass( "mapred.output.format.class", null, OutputFormat.class ); boolean isFileOutputFormat = false; if( outputFormat != null ) isFileOutputFormat = FileOutputFormat.class.isAssignableFrom( outputFormat ); Path outputPath = FileOutputFormat.getOutputPath( conf ); // if no output path is set, we need to substitute an alternative if the OutputFormat is file based // PartitionTap won't set the output, but will set an OutputFormat // MultiSinkTap won't set the output or set the OutputFormat // Non file based OutputFormats don't have an output path, but do have an OutputFormat set (JDBCTap..) if( outputPath == null && ( isFileOutputFormat || outputFormat == null ) ) tempSink = new TempHfs( conf, "tmp:/" + new Path( getSink().getIdentifier() ).toUri().getPath(), true ); // tempSink exists because sink is writeDirect if( tempSink != null ) tempSink.sinkConfInit( flowProcess, conf ); }
/** * Test a single piece Pipe, should not fail, inserts Identity pipe * * @throws IOException */ @Test public void testIdentity() throws Exception { Tap source = new Hfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); HadoopFlowStep step = (HadoopFlowStep) steps.get( 0 ); assertEquals( "not equal: step.sources.size()", 1, step.getSourceTaps().size() ); assertNull( "not null: step.groupBy", step.getGroup() ); assertNotNull( "null: step.sink", step.getSink() ); }
/** * Test a single piece Pipe, should not fail, inserts Identity pipe * * @throws IOException */ @Test public void testIdentity() throws Exception { Tap source = new Hfs( new TextLine(), "input/path" ); Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "wrong size", 1, steps.size() ); HadoopFlowStep step = (HadoopFlowStep) steps.get( 0 ); assertEquals( "not equal: step.sources.size()", 1, step.getSourceTaps().size() ); assertNull( "not null: step.groupBy", step.getGroup() ); assertNotNull( "null: step.sink", step.getSink() ); }
int numSinkParts = getSink().getScheme().getNumSinkParts();
int numSinkParts = getSink().getScheme().getNumSinkParts();