private void verifyCoGrouper() { if( isJoin() && joiner instanceof BufferJoin ) throw new IllegalArgumentException( "invalid joiner, may not use BufferJoiner in a HashJoin" ); if( joiner == null ) { joiner = new InnerJoin(); return; } if( joiner.numJoins() == -1 ) return; int joins = Math.max( numSelfJoins, keyFieldsMap.size() - 1 ); // joining two streams is one join if( joins != joiner.numJoins() ) throw new IllegalArgumentException( "invalid joiner, only accepts " + joiner.numJoins() + " joins, there are: " + joins ); }
/** * Constructor Splice creates a new Splice instance. * * @param spliceName of type String * @param pipes of type Pipe[] * @param groupFields of type Fields * @param sortFields of type Fields * @param reverseOrder of type boolean */ protected Splice( String spliceName, Pipe[] pipes, Fields groupFields, Fields sortFields, boolean reverseOrder ) { if( pipes == null ) throw new IllegalArgumentException( "pipes array may not be null" ); if( groupFields == null ) throw new IllegalArgumentException( "groupFields may not be null" ); setKind(); this.spliceName = spliceName; for( Pipe pipe : pipes ) { addPipe( pipe ); this.keyFieldsMap.put( pipe.getName(), groupFields ); if( sortFields != null ) this.sortFieldsMap.put( pipe.getName(), sortFields ); } this.reverseOrder = reverseOrder; this.joiner = new InnerJoin(); }
private Joiner getJoiner() { switch( getJoinType() ) { case INNER: return new InnerJoin(); case LEFT: return new LeftJoin(); case RIGHT: return new RightJoin(); case FULL: return new OuterJoin(); default: throw new IllegalStateException( "unknown join type" ); } } }
Pipe lhs, rhs; Fields lhsFields, rhsFields; Joiner joiner = new InnerJoin();
results.add( new Tuple( null, "h1", null, "H1" ) ); handleJoins( "joininner", new InnerJoin(), results );
results.add( new Tuple( null, "h1", null, "H1" ) ); handleJoins( "joininner", new InnerJoin(), results );
join = new GroupBy( Pipe.pipes( pipeLower, pipeUpper ), numLHS ); else if( !isMerge && isGroup ) join = new CoGroup( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() ); else if( isMerge && !isGroup ) join = new Merge( pipeLower, pipeUpper ); else join = new HashJoin( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() );
join = new GroupBy( Pipe.pipes( pipeLower, pipeUpper ), numLHS ); else if( !isMerge && isGroup ) join = new CoGroup( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() ); else if( isMerge && !isGroup ) join = new Merge( pipeLower, pipeUpper ); else join = new HashJoin( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() );
@Test public void testCross() throws Exception { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Map sources = new HashMap(); sources.put( "lhs", getPlatform().getTextFile( inputFileLhs ) ); sources.put( "rhs", getPlatform().getTextFile( inputFileRhs ) ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE ); Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) ); Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) ); Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross ); flow.complete(); validateLength( flow, 37, null ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "1\ta\t1\tB" ) ) ); }
@Test public void testCross() throws Exception { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Map sources = new HashMap(); sources.put( "lhs", getPlatform().getTextFile( inputFileLhs ) ); sources.put( "rhs", getPlatform().getTextFile( inputFileRhs ) ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE ); Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) ); Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) ); Pipe cross = new HashJoin( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross ); flow.complete(); validateLength( flow, 37, null ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "1\ta\t1\tB" ) ) ); }
@Test public void testCross() throws Exception { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Map sources = new HashMap(); sources.put( "lhs", getPlatform().getTextFile( inputFileLhs ) ); sources.put( "rhs", getPlatform().getTextFile( inputFileRhs ) ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE ); Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) ); Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) ); Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross ); flow.complete(); validateLength( flow, 37, null ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "1\ta\t1\tB" ) ) ); }
@Test public void testCoGroup() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroup" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new InnerJoin( Fields.size( 4 ) ) ); Map<Object, Object> properties = getProperties(); // make sure hasher is getting called, but does nothing special FlowProps.setDefaultTupleElementComparator( properties, getPlatform().getStringComparator( false ).getClass().getCanonicalName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testCoGroup() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroup" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new InnerJoin( Fields.size( 4 ) ) ); Map<Object, Object> properties = getProperties(); // make sure hasher is getting called, but does nothing special FlowProps.setDefaultTupleElementComparator( properties, getPlatform().getStringComparator( false ).getClass().getCanonicalName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testCross() throws Exception { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Map sources = new HashMap(); sources.put( "lhs", getPlatform().getTextFile( inputFileLhs ) ); sources.put( "rhs", getPlatform().getTextFile( inputFileRhs ) ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE ); Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) ); Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) ); Pipe cross = new HashJoin( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross ); flow.complete(); validateLength( flow, 37, null ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "1\ta\t1\tB" ) ) ); }
Pipe merge = new HashJoin( "join", Pipe.pipes( left, right ), fields, Fields.size( 4 ), new InnerJoin() );