Pipe joined = new CoGroup(source1, new Fields("field1"), source2, new Fields("field3"));
private DataSet<?> prepareCoGroupInput(List<DataSet<Tuple>> inputs, FlowNode node, int dop) { CoGroup coGroup = (CoGroup)getSingle(node.getSinkElements()); Joiner joiner = coGroup.getJoiner(); int numJoinInputs = coGroup.isSelfJoin() ? coGroup.getNumSelfJoins() + 1 : inputs.size(); Fields[] inputFields = new Fields[numJoinInputs]; Fields[] keyFields = new Fields[numJoinInputs]; String[][] flinkKeys = new String[numJoinInputs][]; List<DataSet<Tuple>> joinInputs = computeSpliceInputsFieldsKeys(coGroup, node, inputs, inputFields, keyFields, flinkKeys); if(joiner.getClass().equals(InnerJoin.class)) { if(!keyFields[0].isNone()) { return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } else { // Cartesian product return prepareInnerCrossInput(joinInputs, node, inputFields, dop); } } else if(joiner.getClass().equals(BufferJoin.class)) { return prepareBufferCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } else { return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } }
Fields keyFields = coGroup.getKeySelectors().get(inScopes.get(0).getName()); Joiner joiner = coGroup.getJoiner();
public CoGroupBufferInGate(FlowProcess flowProcess, CoGroup splice, IORole ioRole) { super(flowProcess, splice, ioRole); this.isBufferJoin = splice.getJoiner() instanceof BufferJoin; }
public static void main(String[] args) throws IOException { if (args.length != 1) { System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.SimpleFlowExample <output dir>"); return; } String outputDir = args[0]; Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir); Pipe source1 = new Pipe("source1"); Pipe source2 = new Pipe("source2"); Pipe joined = new CoGroup(source1, new Fields("field1"), source2, new Fields("field3")); Map<String, Tap> sources = new HashMap<String, Tap>(); sources.put("source1", ExampleFixtures.SOURCE_TAP_1); sources.put("source2", ExampleFixtures.SOURCE_TAP_2); CascadingUtil.get().getFlowConnector().connect("Example flow", sources, sink, joined).complete(); // Take a look at the output tuples TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess()); System.out.println("Output tuples from flow:"); while (output.hasNext()) { System.out.println(output.next().getTuple()); } } }
protected void init(Pipe[] pipes, Fields[] groupFields, Fields groupRename, MultiBuffer operation) { Fields outputFields = groupRename.append(operation.getResultFields()); Pipe grouped = new CoGroup(pipes, groupFields, null, null, new BufferJoin()); grouped = new Every(grouped, new MultiBufferOperation(groupRename, operation), outputFields ); grouped = new Retain(grouped, outputFields ); setTails(grouped); } }
@Test public void testDupeSourceRepeat() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe pipe = new Pipe( "pipe" ); Pipe merge = new CoGroup( "cogroup", pipe, new Fields( "offset" ), 1, Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "pipe", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
@Test public void testDupeSourceRepeat() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe pipe = new Pipe( "pipe" ); Pipe merge = new CoGroup( "cogroup", pipe, new Fields( "offset" ), 1, Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "pipe", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
@Test public void testDupeSource2() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe right = new Pipe( "right" ); Pipe merge = new CoGroup( "cogroup", left, new Fields( "offset" ), right, new Fields( "offset" ), Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
@Test public void testDupeSource2() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe right = new Pipe( "right" ); Pipe merge = new CoGroup( "cogroup", left, new Fields( "offset" ), right, new Fields( "offset" ), Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
public CoGroupAroundCoGroupGraph() { Map sources = new HashMap(); NonTap source10 = new NonTap( "source10", new Fields( "offset", "line" ) ); NonTap source20 = new NonTap( "source20", new Fields( "offset", "line" ) ); sources.put( "source20", source20 ); sources.put( "source101", source10 ); sources.put( "source102", source10 ); Map sinks = new HashMap(); sinks.put( "sink", new NonTap( "sink", new Fields( "offset", "line" ) ) ); Pipe pipeNum20 = new Pipe( "source20" ); Pipe pipeNum101 = new Pipe( "source101" ); Pipe pipeNum102 = new Pipe( "source102" ); Pipe splice1 = new CoGroup( pipeNum20, new Fields( "num" ), pipeNum101, new Fields( "num" ), new Fields( "num1", "num2" ) ); Pipe splice2 = new CoGroup( splice1, new Fields( "num1" ), pipeNum102, new Fields( "num" ), new Fields( "num1", "num2", "num3" ) ); splice2 = new Each( splice2, new Identity() ); splice2 = new Pipe( "sink", splice2 ); initialize( sources, sinks, splice2 ); } }
@Test public void testGetFirstJoin() { Pipe pipeFirst = new Pipe( "first" ); Pipe pipeSecond = new Pipe( "second" ); Pipe pipe = new CoGroup( pipeFirst, pipeSecond ); pipe = new Pipe( pipe ); pipe = new Pipe( pipe ); pipe = new Pipe( pipe ); assertTrue( pipe.getHeads()[ 0 ] == pipeFirst || pipe.getHeads()[ 0 ] == pipeSecond ); }
pipe = new CoGroup( pipes[ i - 1 ], new Fields( "key" + ( i - 1 ) ), pipe, new Fields( "key" + i ) ); pipe = new Every( pipe, new Fields( "key" + ( i - 1 ) ), new Sum() ); count += 2; // 2 pipes
pipe = new CoGroup( pipes[ i - 1 ], new Fields( "key" + ( i - 1 ) ), pipe, new Fields( "key" + i ) ); pipe = new Every( pipe, new Fields( "key" + ( i - 1 ) ), new Sum() ); count += 2; // 2 pipes
@Test public void testDupeSource3() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap source2 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe middle = new Pipe( "middle" ); Pipe right = new Pipe( "right" ); Pipe[] pipes = Pipe.pipes( left, middle, right ); Fields[] fields = Fields.fields( new Fields( "offset" ), new Fields( "offset" ), new Fields( "offset" ) ); Pipe merge = new CoGroup( "cogroup", pipes, fields, Fields.size( 6 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "middle", source2 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
@Test public void testDupeSource3() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap source2 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe middle = new Pipe( "middle" ); Pipe right = new Pipe( "right" ); Pipe[] pipes = Pipe.pipes( left, middle, right ); Fields[] fields = Fields.fields( new Fields( "offset" ), new Fields( "offset" ), new Fields( "offset" ) ); Pipe merge = new CoGroup( "cogroup", pipes, fields, Fields.size( 6 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "middle", source2 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
public SelfCoGroupGraph() { Map sources = new HashMap(); NonTap sourceLower = new NonTap( "lower", new Fields( "offset", "line" ) ); sources.put( "lower", sourceLower ); Map sinks = new HashMap(); sinks.put( "sink", new NonTap( "sink", new Fields( "offset", "line" ) ) ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe lower = new Pipe( "lower" ); Pipe pipeLower = new Each( new Pipe( "lhs", lower ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "rhs", lower ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( "sink", pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); initialize( sources, sinks, splice ); } }
@Test public void testCoGroupRelativeUnknown() throws Exception { copyFromLocal( inputFileLower ); Tap sourceLower = getPlatform().getDelimitedFile( Fields.UNKNOWN, " ", inputFileLower ); Tap sourceUpper = getPlatform().getDelimitedFile( Fields.UNKNOWN, " ", inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); // using null pos so all fields are written Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( "complexcogrouprelativeunknown" ), SinkMode.REPLACE ); Pipe pipeLower = new Pipe( "lower" ); Pipe pipeUpper = new Pipe( "upper" ); Pipe splice = new CoGroup( pipeLower, new Fields( -2 ), pipeUpper, new Fields( -2 ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( results.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testCoGroupAroundCoGroupOptimized() throws Exception { Tap source10 = new Hfs( new TextLine( new Fields( "num" ) ), "foo" ); Tap source20 = new Hfs( new TextLine( new Fields( "num" ) ), "bar" ); Map sources = new HashMap(); sources.put( "source20", source20 ); sources.put( "source101", source10 ); sources.put( "source102", source10 ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "baz", SinkMode.REPLACE ); Pipe pipeNum20 = new Pipe( "source20" ); Pipe pipeNum101 = new Pipe( "source101" ); Pipe pipeNum102 = new Pipe( "source102" ); Pipe splice1 = new CoGroup( pipeNum20, new Fields( "num" ), pipeNum101, new Fields( "num" ), new Fields( "num1", "num2" ) ); Pipe splice2 = new CoGroup( splice1, new Fields( "num1" ), pipeNum102, new Fields( "num" ), new Fields( "num1", "num2", "num3" ) ); Properties properties = new Properties(); FlowConnectorProps.setIntermediateSchemeClass( properties, TextLine.class ); FlowConnector flowConnector = getPlatform().getFlowConnector( properties ); Flow flow = flowConnector.connect( sources, sink, splice2 ); assertEquals( "not equal: steps.size()", 2, flow.getFlowSteps().size() ); }