/** * Retrieve communication pattern of the edge. * * @param src source vertex. * @param dst destination vertex. * @return the communication pattern. */ public static CommunicationPatternProperty.Value getEdgeCommunicationPattern(final IRVertex src, final IRVertex dst) { if (dst instanceof OperatorVertex && (((OperatorVertex) dst).getTransform() instanceof ReduceByKeyTransform || ((OperatorVertex) dst).getTransform() instanceof GroupByKeyTransform)) { return CommunicationPatternProperty.Value.Shuffle; } else { return CommunicationPatternProperty.Value.OneToOne; } }
@Override public void emitWatermark(final Watermark watermark) { operatorVertex.getTransform().onWatermark(watermark); }
@Override public void emitWatermark(final Watermark watermark) { operatorVertex.getTransform().onWatermark(watermark); }
@Override public void emit(final O output) { nextOperatorVertex.getTransform().onData(output); }
@Override public void emit(final O output) { nextOperatorVertex.getTransform().onData(output); }
private void emit(final OperatorVertex vertex, final O output) { vertex.getTransform().onData(output); }
private void emit(final OperatorVertex vertex, final O output) { vertex.getTransform().onData(output); }
/** * @param src source IR vertex. * @param dst destination IR vertex. * @return the communication pattern property value. */ private CommunicationPatternProperty.Value getCommPattern(final IRVertex src, final IRVertex dst) { final Class<?> constructUnionTableFn; try { constructUnionTableFn = Class.forName("org.apache.beam.sdk.transforms.join.CoGroupByKey$ConstructUnionTableFn"); } catch (final ClassNotFoundException e) { throw new RuntimeException(e); } final Transform srcTransform = src instanceof OperatorVertex ? ((OperatorVertex) src).getTransform() : null; final Transform dstTransform = dst instanceof OperatorVertex ? ((OperatorVertex) dst).getTransform() : null; final DoFn srcDoFn = srcTransform instanceof DoFnTransform ? ((DoFnTransform) srcTransform).getDoFn() : null; if (srcDoFn != null && srcDoFn.getClass().equals(constructUnionTableFn)) { return CommunicationPatternProperty.Value.Shuffle; } if (srcTransform instanceof FlattenTransform) { return CommunicationPatternProperty.Value.OneToOne; } if (dstTransform instanceof GroupByKeyAndWindowDoFnTransform || dstTransform instanceof GroupByKeyTransform) { return CommunicationPatternProperty.Value.Shuffle; } if (dstTransform instanceof CreateViewTransform) { return CommunicationPatternProperty.Value.BroadCast; } return CommunicationPatternProperty.Value.OneToOne; }
/** * Helper method to check that all execution properties are correct and makes sense. */ private void executionPropertyCheck() { // DataSizeMetricCollection is not compatible with Push (All data have to be stored before the data collection) vertices.forEach(v -> incomingEdges.get(v).stream().filter(e -> e instanceof IREdge).map(e -> (IREdge) e) .filter(e -> e.getPropertyValue(MetricCollectionProperty.class).isPresent()) .filter(e -> !(e.getDst() instanceof OperatorVertex && ((OperatorVertex) e.getDst()).getTransform() instanceof AggregateMetricTransform)) .filter(e -> DataFlowProperty.Value.Push.equals(e.getPropertyValue(DataFlowProperty.class).get())) .forEach(e -> { throw new CompileTimeOptimizationException("DAG execution property check: " + "DataSizeMetricCollection edge is not compatible with push" + e.getId()); })); }
/** * @param dag that contains the {@code v}. * @param v to inspect. * @return whether or not the vertex has parent with MetricCollectTransform. */ private boolean hasParentWithMetricCollectTransform(final DAG<IRVertex, IREdge> dag, final IRVertex v) { List<IRVertex> parents = dag.getParents(v.getId()); for (IRVertex parent : parents) { if (parent instanceof OperatorVertex && ((OperatorVertex) v).getTransform() instanceof MetricCollectTransform) { return true; } } return false; }
if (irVertex instanceof OperatorVertex) { final OperatorVertex operatorVertex = (OperatorVertex) irVertex; operatorVerticesToBeMerged.putIfAbsent(operatorVertex.getTransform(), new ArrayList<>()); operatorVerticesToBeMerged.get(operatorVertex.getTransform()).add(operatorVertex);
if (irVertex instanceof OperatorVertex) { final OperatorVertex operatorVertex = (OperatorVertex) irVertex; operatorVerticesToBeMerged.putIfAbsent(operatorVertex.getTransform(), new ArrayList<>()); operatorVerticesToBeMerged.get(operatorVertex.getTransform()).add(operatorVertex);
@Override public DAG<IRVertex, IREdge> apply(final DAG<IRVertex, IREdge> dag) { dag.getVertices().forEach(v -> { if (v instanceof OperatorVertex && ((OperatorVertex) v).getTransform() instanceof AggregateMetricTransform) { final List<IREdge> outEdges = dag.getOutgoingEdgesOf(v); outEdges.forEach(edge -> { // double checking. if (MetricCollectionProperty.Value.DataSkewRuntimePass .equals(edge.getPropertyValue(MetricCollectionProperty.class).get())) { edge.setPropertyPermanently(PartitionerProperty.of(PartitionerProperty.Value.DataSkewHashPartitioner)); } }); } }); return dag; } }
private void prepareTransform(final VertexHarness vertexHarness) { final IRVertex irVertex = vertexHarness.getIRVertex(); final Transform transform; if (irVertex instanceof OperatorVertex) { transform = ((OperatorVertex) irVertex).getTransform(); transform.prepare(vertexHarness.getContext(), vertexHarness.getOutputCollector()); } }
private void prepareTransform(final VertexHarness vertexHarness) { final IRVertex irVertex = vertexHarness.getIRVertex(); final Transform transform; if (irVertex instanceof OperatorVertex) { transform = ((OperatorVertex) irVertex).getTransform(); transform.prepare(vertexHarness.getContext(), vertexHarness.getOutputCollector()); } }
@Override public DAG<IRVertex, IREdge> apply(final DAG<IRVertex, IREdge> dag) { dag.topologicalDo(v -> { // we only care about metric collection vertices. if (v instanceof OperatorVertex && ((OperatorVertex) v).getTransform() instanceof MetricCollectTransform) { dag.getOutgoingEdgesOf(v).forEach(edge -> { // double checking. if (edge.getPropertyValue(CommunicationPatternProperty.class).get() .equals(CommunicationPatternProperty.Value.Shuffle)) { edge.setPropertyPermanently(MetricCollectionProperty.of( MetricCollectionProperty.Value.DataSkewRuntimePass)); } }); } }); return dag; } }
@Override public DAG<IRVertex, IREdge> apply(final DAG<IRVertex, IREdge> dag) { dag.getVertices().stream() .filter(v -> v instanceof OperatorVertex && ((OperatorVertex) v).getTransform() instanceof MetricCollectTransform) .forEach(v -> v.setProperty(DynamicOptimizationProperty .of(DynamicOptimizationProperty.Value.DataSkewRuntimePass))); dag.getVertices().stream() .filter(v -> hasParentWithMetricCollectTransform(dag, v) && !v.getExecutionProperties().containsKey(ResourceSkewedDataProperty.class)) .forEach(childV -> { childV.getExecutionProperties().put(ResourceSkewedDataProperty.of(true)); dag.getDescendants(childV.getId()).forEach(descendentV -> { descendentV.getExecutionProperties().put(ResourceSkewedDataProperty.of(true)); }); }); return dag; } }
&& ((OperatorVertex) irVertex).getTransform() instanceof AggregateMetricTransform) { outputCollector = new DynOptDataOutputCollector( irVertex, persistentConnectionToMasterMap, this);
private void closeTransform(final VertexHarness vertexHarness) { final IRVertex irVertex = vertexHarness.getIRVertex(); final Transform transform; if (irVertex instanceof OperatorVertex) { transform = ((OperatorVertex) irVertex).getTransform(); transform.close(); } vertexHarness.getContext().getSerializedData().ifPresent(data -> persistentConnectionToMasterMap.getMessageSender(MessageEnvironment.RUNTIME_MASTER_MESSAGE_LISTENER_ID).send( ControlMessage.Message.newBuilder() .setId(RuntimeIdManager.generateMessageId()) .setListenerId(MessageEnvironment.RUNTIME_MASTER_MESSAGE_LISTENER_ID) .setType(ControlMessage.MessageType.ExecutorDataCollected) .setDataCollected(ControlMessage.DataCollectMessage.newBuilder().setData(data).build()) .build())); }
private void closeTransform(final VertexHarness vertexHarness) { final IRVertex irVertex = vertexHarness.getIRVertex(); final Transform transform; if (irVertex instanceof OperatorVertex) { transform = ((OperatorVertex) irVertex).getTransform(); transform.close(); } vertexHarness.getContext().getSerializedData().ifPresent(data -> persistentConnectionToMasterMap.getMessageSender(MessageEnvironment.RUNTIME_MASTER_MESSAGE_LISTENER_ID).send( ControlMessage.Message.newBuilder() .setId(RuntimeIdManager.generateMessageId()) .setListenerId(MessageEnvironment.RUNTIME_MASTER_MESSAGE_LISTENER_ID) .setType(ControlMessage.MessageType.ExecutorDataCollected) .setDataCollected(ControlMessage.DataCollectMessage.newBuilder().setData(data).build()) .build())); }