@Override public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Configuration conf ) { try { String[] childPartitions = getChildPartitionIdentifiers( flowProcess, true ); ( (Hfs) getParent() ).applySourceConfInitIdentifiers( flowProcess, conf, childPartitions ); } catch( IOException exception ) { throw new TapException( "unable to retrieve child partitions", exception ); } }
@Override public Tap getPartitionTap( Tap sink, Partition partition, int openThreshold ) { return new PartitionTap( (Hfs) sink, partition, openThreshold ); }
private CombineInputPartitionTupleEntryIterator createPartitionEntryIterator( FlowProcess<? extends Configuration> flowProcess, RecordReader input, String parentIdentifier ) throws IOException { TupleEntrySchemeIterator schemeIterator = createTupleEntrySchemeIterator( flowProcess, parent, null, input ); return new CombineInputPartitionTupleEntryIterator( flowProcess, getSourceFields(), partition, parentIdentifier, schemeIterator ); } }
public void accumulate( Set<Hfs> taps, Tap value ) { if( value == null ) return; if( value instanceof Hfs ) taps.add( (Hfs) value ); else if( value instanceof PartitionTap ) taps.add( (Hfs) ( (PartitionTap) value ).getParent() ); else if( value instanceof MultiSourceTap ) iterate( taps, (MultiSourceTap) value ); else throw new IllegalArgumentException( "unsupprted Tap type: " + value.getClass().getName() ); }
private void testFilteredPartitionTapFields( Fields partitionFields, Fields argumentSelector ) throws Exception { Tap tap = getPlatform().getTextFile( "dummy" ); Partition partition = new DelimitedPartition( partitionFields ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( tap, partition, 1 ); partitionTap.addSourcePartitionFilter( argumentSelector, new TrueFilter() ); }
public long getDuration( int size ) { long start = System.currentTimeMillis(); createPartitionTap( size ).sourceConfInit( new HadoopFlowProcess( new JobConf() ), new JobConf() ); return System.currentTimeMillis() - start; }
public CombinePartitionIterator( final FlowProcess<? extends Configuration> flowProcess, RecordReader input ) throws IOException { super( getSourceFields() ); List<Iterator<Tuple>> iterators = new ArrayList<Iterator<Tuple>>(); if( input == null ) throw new IOException( "input cannot be null" ); String identifier = parent.getFullIdentifier( flowProcess ); iterators.add( createPartitionEntryIterator( flowProcess, input, identifier ) ); reset( iterators ); }
public void accumulate( Set<Hfs> taps, Tap value ) { if( value == null ) return; if( value instanceof Hfs ) taps.add( (Hfs) value ); else if( value instanceof PartitionTap ) taps.add( (Hfs) ( (PartitionTap) value ).getParent() ); else if( value instanceof MultiSourceTap ) iterate( taps, (MultiSourceTap) value ); else throw new IllegalArgumentException( "unsupprted Tap type: " + value.getClass().getName() ); }
private void testFilteredPartitionTapFields( Fields partitionFields, Fields argumentSelector ) throws Exception { Tap tap = getPlatform().getTextFile( "dummy" ); Partition partition = new DelimitedPartition( partitionFields ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( tap, partition, 1 ); partitionTap.addSourcePartitionFilter( argumentSelector, new TrueFilter() ); }
public long getDuration( int size ) { long start = System.currentTimeMillis(); createPartitionTap( size ).sourceConfInit( new HadoopFlowProcess( new JobConf() ), new JobConf() ); return System.currentTimeMillis() - start; }
public CombinePartitionIterator( final FlowProcess<? extends Configuration> flowProcess, RecordReader input ) throws IOException { super( getSourceFields() ); List<Iterator<Tuple>> iterators = new ArrayList<Iterator<Tuple>>(); if( input == null ) throw new IOException( "input cannot be null" ); String identifier = parent.getFullIdentifier( flowProcess ); iterators.add( createPartitionEntryIterator( flowProcess, input, identifier ) ); reset( iterators ); }
@Override public void sourceConfInit( FlowProcess<? extends Configuration> flowProcess, Configuration conf ) { try { String[] childPartitions = getChildPartitionIdentifiers( flowProcess, true ); ( (Hfs) getParent() ).applySourceConfInitIdentifiers( flowProcess, conf, childPartitions ); } catch( IOException exception ) { throw new TapException( "unable to retrieve child partitions", exception ); } }
private CombineInputPartitionTupleEntryIterator createPartitionEntryIterator( FlowProcess<? extends Configuration> flowProcess, RecordReader input, String parentIdentifier ) throws IOException { TupleEntrySchemeIterator schemeIterator = createTupleEntrySchemeIterator( flowProcess, parent, null, input ); return new CombineInputPartitionTupleEntryIterator( flowProcess, getSourceFields(), partition, parentIdentifier, schemeIterator ); } }
@Override public Tap getPartitionTap( Tap sink, Partition partition, int openThreshold ) { return new PartitionTap( (Hfs) sink, partition, openThreshold ); }
@Test public void testFilteredPartitionTap_Typical() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); partitionTap.addSourcePartitionFilter( new Fields( "number" ), new PartitionFilter( Arrays.asList( "2", "4" ) ) ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 3, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testPartitionedWriteReadHDFS() throws Exception { copyFromLocal( inputFileLhs ); Tap source = new FileTap( new cascading.scheme.local.TextDelimited( new Fields( "num", "char" ), " " ), inputFileLhs ); Hfs original = new Hfs( new TextDelimited( new Fields( "num", "char" ), " " ), getOutputPath( "/intermediate" ), SinkMode.REPLACE ); Tap intermediate = new LocalHfsAdaptor( new PartitionTap( original, new DelimitedPartition( new Fields( "num" ), "/" ) ) ); Tap sink = new FileTap( new cascading.scheme.local.TextDelimited( new Fields( "num", "char" ), " " ), getOutputPath( "/final" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Flow first = new LocalFlowConnector( getPlatform().getProperties() ).connect( source, intermediate, pipe ); first.complete(); validateLength( first, 13 ); Flow second = new LocalFlowConnector( getPlatform().getProperties() ).connect( intermediate, sink, pipe ); second.complete(); validateLength( second, 13 ); } }
@Test public void testFilteredPartitionTap_Typical() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); partitionTap.addSourcePartitionFilter( new Fields( "number" ), new PartitionFilter( Arrays.asList( "2", "4" ) ) ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 3, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }