@Override public void prepare() { if( role != IORole.source ) { throw new UnsupportedOperationException("Non-source group by not supported in GroupByInGate"); } if( role != IORole.sink ) { Fields[] keyFields; Fields[] valuesFields; if(splice.isSelfJoin()) { keyFields = new Fields[splice.getNumSelfJoins() + 1]; valuesFields = new Fields[splice.getNumSelfJoins() + 1]; for(int i=0; i<keyFields.length; i++) { keyFields[i] = super.keyFields[0]; valuesFields[i] = super.valuesFields[0]; } } else { keyFields = super.keyFields; valuesFields = super.valuesFields; } closure = new JoinClosure(flowProcess, keyFields, valuesFields); } if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) { grouping.joinerClosure = closure; } this.resultIterator = new JoinResultIterator(closure, this.splice.getJoiner()); }
@Override public String getName() { if( spliceName != null ) return spliceName; StringBuffer buffer = new StringBuffer(); for( Pipe pipe : pipes ) { if( buffer.length() != 0 ) { if( isGroupBy() || isMerge() ) buffer.append( "+" ); else if( isCoGroup() || isJoin() ) buffer.append( "*" ); // more semantically correct } buffer.append( pipe.getName() ); } spliceName = buffer.toString(); return spliceName; }
private static List<String> getIncomingFieldNames( Pipe pipe ) { if( pipe.getPrevious().length != 1 ) return null; final Pipe previous = pipe.getPrevious()[ 0 ]; if( !( previous instanceof Splice ) ) return null; final Splice splice = (Splice) previous; if( splice.getDeclaredFields() == null ) return null; return fieldNames( splice.getDeclaredFields() ); }
@Override public void prepare() { if( role != IORole.source ) { throw new UnsupportedOperationException("Non-source group by not supported in CoGroupBufferInGate"); } if( role != IORole.sink ) { closure = new CoGroupBufferClosure(flowProcess, this.getSplice().getNumSelfJoins(), keyFields, valuesFields); } if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) { grouping.joinerClosure = closure; } }
@Override public void prepare() { int numJoinInputs = this.splice.isSelfJoin() ? this.splice.getNumSelfJoins() + 1 : this.splice.getPrevious().length; Fields[] keyFields = new Fields[numJoinInputs]; Fields[] valueFields = new Fields[numJoinInputs]; Scope outgoingScope = outgoingScopes.get( 0 ); if(!this.splice.isSelfJoin()) { for(int i=0; i < numJoinInputs; i++) { Scope incomingScope = incomingScopes.get( i ); int ordinal = incomingScope.getOrdinal(); keyFields[ordinal] = outgoingScope.getKeySelectors().get(incomingScope.getName()); valueFields[ordinal] = incomingScope.getIncomingSpliceFields(); } } else { Scope incomingScope = incomingScopes.get(0); keyFields[0] = outgoingScope.getKeySelectors().get(incomingScope.getName()); valueFields[0] = incomingScope.getIncomingSpliceFields(); for (int i = 1; i < numJoinInputs; i++) { keyFields[i] = keyFields[0]; valueFields[i] = valueFields[0]; } } this.closure = new JoinClosure(this.flowProcess, keyFields, valueFields); this.joiner = this.splice.getJoiner(); this.entryIterator = new TupleEntryChainIterator(outgoingScope.getOutValuesFields()); }
Fields declaredFields = getJoinDeclaredFields(); if( !isCoGroup() ) throw new IllegalArgumentException( "Fields.NONE may only be declared as the join fields when using a CoGroup" ); if( incomingScopes.size() != pipes.size() && isSelfJoin() ) throw new OperatorException( this, "self joins without intermediate operators are not permitted, see 'numSelfJoins' constructor or identity function" ); boolean foundUnknown = false; List<Fields> appendableFields = getOrderedResolvedFields( incomingScopes ); if( isSelfJoin() ) throw new OperatorException( this, "declared grouped fields not same size as grouped values, declared: " + declaredFields.printVerbose() + " != size: " + size * ( numSelfJoins + 1 ) ); else if( isGroupBy() || isMerge() ) List<Fields> appendableFields = getOrderedResolvedFields( incomingScopes ); Fields appendedFields = new Fields();
private List<DataSet<Tuple>> computeSpliceInputsFieldsKeys(Splice splice, FlowNode node, List<DataSet<Tuple>> inputs, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys) { int numJoinInputs = splice.isSelfJoin() ? splice.getNumSelfJoins() + 1 : inputs.size(); List<Scope> inScopes = getInputScopes(node, splice); List<DataSet<Tuple>> inputs2; if(!splice.isSelfJoin()) { keyFields[i] = splice.getKeySelectors().get(inScope.getName()); flinkKeys[i] = registerKeyFields(inputs.get(i), keyFields[i]); keyFields[0] = splice.getKeySelectors().get(inScope.getName()); flinkKeys[0] = registerKeyFields(inputs.get(0), keyFields[0]);
private static int hash( FlowElement flowElement ) { int lhs = flowElement.getClass().getName().hashCode(); int rhs = 0; if( flowElement instanceof Operator && ( (Operator) flowElement ).getOperation() != null ) rhs = ( (Operator) flowElement ).getOperation().getClass().getName().hashCode(); else if( flowElement instanceof Tap && ( (Tap) flowElement ).getScheme() != null ) rhs = ( (Tap) flowElement ).getScheme().getClass().getName().hashCode(); else if( flowElement instanceof Splice ) rhs = ( (Splice) flowElement ).getJoiner().getClass().getName().hashCode() + 31 * ( (Splice) flowElement ).getNumSelfJoins(); return pair( lhs, rhs ); }
@Override protected HadoopCoGroupClosure createClosure() { return new HadoopCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields ); }
groupComparators = new Comparator[ getNumDeclaredIncomingBranches() ]; if( splice.isSorted() ) valueComparators = new Comparator[ getNumDeclaredIncomingBranches() ]; int size = splice.isGroupBy() ? 1 : getNumDeclaredIncomingBranches(); int pos = splice.isGroupBy() ? 0 : splice.getPipePos().get( incomingScope.getName() ); Fields groupFields = splice.getKeySelectors().get( incomingScope.getName() ); groupComparators[ pos ] = new SparseTupleComparator( Fields.asDeclaration( groupFields ), defaultComparator ); groupComparators[ pos ] = splice.isSortReversed() ? NullSafeReverseComparator.reverseOrder( groupComparators[ pos ] ) : groupComparators[ pos ]; Fields sortFields = splice.getSortingSelectors().get( incomingScope.getName() ); valueComparators[ pos ] = new SparseTupleComparator( valuesFields[ pos ], sortFields, defaultComparator ); if( splice.isSortReversed() ) valueComparators[ pos ] = NullSafeReverseComparator.reverseOrder( valueComparators[ pos ] );
public HadoopGroupGate( FlowProcess flowProcess, Splice splice, IORole role ) { super( flowProcess, splice, role ); isBufferJoin = splice.getJoiner() instanceof BufferJoin; }
@Override public void prepare() { if( role != IORole.source ) { throw new UnsupportedOperationException("Non-source group by not supported in GroupByInGate"); } if( role != IORole.sink ) { closure = new GroupByClosure(flowProcess, keyFields, valuesFields); } if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) { grouping.joinerClosure = closure; } }
@Override public boolean applies( PlannerContext plannerContext, ElementGraph elementGraph, FlowElement flowElement ) { return super.applies( plannerContext, elementGraph, flowElement ) && !( (Splice) flowElement ).isSelfJoin(); } },
if( splice.isSorted() ) currentKey = ( (TuplePair) currentKey ).getLhs();
String type = ( (Splice) flowElement ).isMerge() ? "merged" : "grouped"; LOG.info( "adding {} edge between: {} and {}", type, Util.join( sourceVerticesIDs, "," ), targetVertex.getName() ); dag.addEdge( GroupInputEdge.create( vertexGroup, targetVertex, edgeProperty, inputDescriptor ) );
throw new IllegalArgumentException( "pipes array may not be null" ); setKind(); this.spliceName = spliceName; if( isMerge() ) throw new IllegalArgumentException( "may not merge a pipe with itself without intermediate operations after the split" ); throw new IllegalArgumentException( "all groupFields must be identical" ); addPipe( pipes[ 0 ] ); this.numSelfJoins = pipes.length - 1; this.keyFieldsMap.put( pipes[ 0 ].getName(), groupFields[ 0 ] ); for( int i = 0; i < pipes.length; i++ ) addPipe( pipes[ i ] ); addGroupFields( pipes[ i ], Fields.FIRST ); continue; addGroupFields( pipes[ i ], groupFields[ i ] ); this.joiner = joiner; verifyCoGrouper();
private void setOrdinal( FlowElement previous, Pipe current, Scope scope ) { if( current instanceof Splice ) { Splice splice = (Splice) current; Integer ordinal; if( previous instanceof Tap ) // revert to pipe name ordinal = splice.getPipePos().get( scope.getName() ); else // GroupBy allows for duplicate pipe names, this guarantees correct ordinality ordinal = FlowElements.findOrdinal( splice, (Pipe) previous ); scope.setOrdinal( ordinal ); Set<Scope> scopes = new HashSet<>( incomingEdgesOf( current ) ); scopes.remove( scope ); for( Scope other : scopes ) { if( other.getOrdinal() == scope.getOrdinal() ) throw new IllegalStateException( "duplicate ordinals" ); } if( splice.isJoin() && ordinal != 0 ) scope.setNonBlocking( false ); } }
Map<String, Fields> resolveGroupingSelectors( Set<Scope> incomingScopes ) { try { Map<String, Fields> groupingSelectors = getKeySelectors(); Map<String, Fields> groupingFields = resolveSelectorsAgainstIncoming( incomingScopes, groupingSelectors, "grouping" ); if( !verifySameSize( groupingFields ) ) throw new OperatorException( this, "all grouping fields must be same size: " + toString() ); verifySameTypes( groupingSelectors, groupingFields ); return groupingFields; } catch( FieldsResolverException exception ) { throw new OperatorException( this, OperatorException.Kind.grouping, exception.getSourceFields(), exception.getSelectorFields(), exception ); } catch( RuntimeException exception ) { throw new OperatorException( this, "could not resolve grouping selector in: " + this, exception ); } }