public Branch visitChild( Stack stack ) { Branch branch = ( (CascadingRelNode) getChild() ).visitChild( stack ); Fields fields = createFields(); String name = stack.getNameFor( GroupBy.class, branch.current ); Pipe current = new GroupBy( name, branch.current, fields ); current = stack.addDebug( this, current ); return new Branch( current, branch ); }
public GroupByInGate(FlowProcess flowProcess, GroupBy splice, IORole ioRole) { super(flowProcess, splice, ioRole); this.isBufferJoin = splice.getJoiner() instanceof BufferJoin; }
Fields groupKeyFields = groupBy.getKeySelectors().get(inScope.getName()); Fields sortKeyFields = groupBy.getSortingSelectors().get(inScope.getName()); sortKeys = registerKeyFields(input, sortKeyFields); Order sortOrder = groupBy.isSortReversed() ? Order.DESCENDING : Order.ASCENDING;
@Override public List<Pipe> resolveTails( Context context ) { Pipe pipe = new GroupBy( context.getTails().get( 0 ), new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); return Arrays.asList( pipe ); }
@Override public List<Pipe> resolveTails( Context context ) { Pipe pipe = new GroupBy( context.getTails().get( 0 ), new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); return Arrays.asList( pipe ); }
/** * This is an alternative to having two pipes with the same name, but uses one pipe that is split * across two branches. * * @throws IOException */ @Test public void testSameSourceForBranch() throws IOException { Map sources = new HashMap(); Map sinks = new HashMap(); sources.put( "a", new Hfs( new TextLine( new Fields( "first", "second" ) ), "input/path/a" ) ); Pipe pipeA = new Pipe( "a" ); Pipe group1 = new GroupBy( "a1", pipeA, Fields.FIRST ); Pipe group2 = new GroupBy( "a2", pipeA, Fields.FIRST ); Pipe merge = new GroupBy( "tail", Pipe.pipes( group1, group2 ), new Fields( "first", "second" ) ); sinks.put( merge.getName(), new Hfs( new TextLine(), "output/path" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); assertEquals( "not equal: steps.size()", 3, flow.getFlowSteps().size() ); }
@Test public void testPipeAssemblySplit() { Pipe pipe = new TestAssembly( "test" ); Pipe pipe1 = new GroupBy( "left", pipe, new Fields( "ip" ) ); Pipe pipe2 = new GroupBy( "right", pipe, new Fields( "ip" ) ); Tap source = getPlatform().getTextFile( "foo" ); Tap sink1 = getPlatform().getTextFile( "foo/split1", SinkMode.REPLACE ); Tap sink2 = getPlatform().getTextFile( "foo/split2", SinkMode.REPLACE ); Map sources = new HashMap(); sources.put( "test", source ); Map sinks = new HashMap(); sinks.put( "left", sink1 ); sinks.put( "right", sink2 ); List<FlowStep> steps = getPlatform().getFlowConnector().connect( sources, sinks, pipe1, pipe2 ).getFlowSteps(); if( getPlatform().isMapReduce() ) assertEquals( "not equal: steps.size()", 2, steps.size() ); }
@Test public void testPipeAssemblySplit() { Pipe pipe = new TestAssembly( "test" ); Pipe pipe1 = new GroupBy( "left", pipe, new Fields( "ip" ) ); Pipe pipe2 = new GroupBy( "right", pipe, new Fields( "ip" ) ); Tap source = getPlatform().getTextFile( "foo" ); Tap sink1 = getPlatform().getTextFile( "foo/split1", SinkMode.REPLACE ); Tap sink2 = getPlatform().getTextFile( "foo/split2", SinkMode.REPLACE ); Map sources = new HashMap(); sources.put( "test", source ); Map sinks = new HashMap(); sinks.put( "left", sink1 ); sinks.put( "right", sink2 ); List<FlowStep> steps = getPlatform().getFlowConnector().connect( sources, sinks, pipe1, pipe2 ).getFlowSteps(); if( getPlatform().isMapReduce() ) assertEquals( "not equal: steps.size()", 2, steps.size() ); }
public Pipe createAssembly( Pipe pipe, Fields argFields, Fields declFields, String fieldValue, Fields selectFields ) { pipe = new GroupBy( pipe, Fields.ALL ); return new Every( pipe, argFields, new TestAggregator( declFields, new Tuple( fieldValue ) ), selectFields ); } }
public Pipe createAssembly( Pipe pipe, Fields argFields, Fields declFields, String fieldValue, Fields selectFields ) { pipe = new GroupBy( pipe, Fields.ALL ); return new Every( pipe, argFields, new TestAggregator( declFields, new Tuple( fieldValue ) ), selectFields ); } }
public FirstAssembly( Pipe previous ) { Pipe pipe = new Pipe( "first", previous ); pipe = new Each( pipe, new Identity() ); pipe = new GroupBy( pipe, Fields.ALL ); pipe = new Every( pipe, new First(), Fields.RESULTS ); setTails( pipe ); } }
public FirstAssembly( Pipe previous ) { Pipe pipe = new Pipe( "first", previous ); pipe = new Each( pipe, new Identity() ); pipe = new GroupBy( pipe, Fields.ALL ); pipe = new Every( pipe, new First(), Fields.RESULTS ); setTails( pipe ); } }
/** Tests that proper pipe graph is assembled without throwing an internal error */ @Test public void testPipeAssembly() { Pipe pipe = new TestAssembly( "test" ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); Tap source = getPlatform().getTextFile( "foo" ); Tap sink = getPlatform().getTextFile( "foo/split1", SinkMode.REPLACE ); List<FlowStep> steps = getPlatform().getFlowConnector().connect( source, sink, pipe ).getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
/** Tests that proper pipe graph is assembled without throwing an internal error */ @Test public void testPipeAssembly() { Pipe pipe = new TestAssembly( "test" ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); Tap source = getPlatform().getTextFile( "foo" ); Tap sink = getPlatform().getTextFile( "foo/split1", SinkMode.REPLACE ); List<FlowStep> steps = getPlatform().getFlowConnector().connect( source, sink, pipe ).getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
@Override public List<Pipe> resolveTails( Context context ) { Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); return Arrays.asList( pipe ); } };
@Override public List<Pipe> resolveTails( Context context ) { Pipe pipe = new Pipe( (String) context.getFlow().getSourceNames().get( 0 ) ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); return Arrays.asList( pipe ); } };
@Test public void testGroupByResolver() throws Exception { Fields sourceFields = new Fields( "first", "second" ); Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP ); Fields sinkFields = new Fields( "third", "fourth" ); Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new GroupBy( pipe, new Fields( "third" ) ); verify( source, sink, pipe ); }
@Test public void testPipeGroupBy() { Pipe pipe = new Pipe( "foo" ); pipe = new Each( pipe, new Fields( "a" ), new Identity() ); pipe = new GroupBy( pipe, new Fields( "b" ) ); assertEqualsTrace( "cascading.TraceTest.testPipeGroupBy(TraceTest.java", pipe.getTrace() ); }
@Test public void testGroupByResolver() throws Exception { Fields sourceFields = new Fields( "first", "second" ); Tap source = getPlatform().getTabDelimitedFile( sourceFields, "input/path", SinkMode.KEEP ); Fields sinkFields = new Fields( "third", "fourth" ); Tap sink = getPlatform().getTabDelimitedFile( sinkFields, "output/path", SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new GroupBy( pipe, new Fields( "third" ) ); verify( source, sink, pipe ); }
public CreateBloomFilter(Pipe keys, String bloomFilterID, String approxCountPartsDir, String bloomPartsDir, String keyBytesField, HashFunctionFactory hashFactory) throws IOException { super(keys); Pipe smallPipe = new Each(keys, new Fields(keyBytesField), new GetIndices(hashFactory), new Fields("split", "index", "hash_num")); smallPipe = new Each(smallPipe, new Fields("split", "index", "hash_num"), new Unique.FilterPartialDuplicates()); smallPipe = new GroupBy(smallPipe, new Fields("split")); smallPipe = new Every(smallPipe, new Fields("index", "hash_num"), new CreateBloomFilterFromIndices(), Fields.ALL); ConfigDef bloomDef = smallPipe.getStepConfigDef(); bloomDef.setProperty(BloomProps.BLOOM_FILTER_PARTS_DIR, bloomPartsDir); bloomDef.setProperty(BloomProps.BLOOM_KEYS_COUNTS_DIR, approxCountPartsDir); bloomDef.setProperty(BloomProps.TARGET_BLOOM_FILTER_ID, bloomFilterID); setTails(smallPipe); }