@Override public OperatorVertex getClone() { return new OperatorVertex(this); }
@Override public OperatorVertex getClone() { return new OperatorVertex(this); }
/** * @return the generated vertex. */ private OperatorVertex generateMetricAggregationVertex() { // Define a custom data aggregator for skew handling. // Here, the aggregator gathers key frequency data used in shuffle data repartitioning. final BiFunction<Object, Map<Object, Long>, Map<Object, Long>> dynOptDataAggregator = (BiFunction<Object, Map<Object, Long>, Map<Object, Long>> & Serializable) (element, aggregatedDynOptData) -> { final Object key = ((Pair<Object, Long>) element).left(); final Long count = ((Pair<Object, Long>) element).right(); final Map<Object, Long> aggregatedDynOptDataMap = (Map<Object, Long>) aggregatedDynOptData; if (aggregatedDynOptDataMap.containsKey(key)) { aggregatedDynOptDataMap.compute(key, (existingKey, accumulatedCount) -> accumulatedCount + count); } else { aggregatedDynOptDataMap.put(key, count); } return aggregatedDynOptData; }; final AggregateMetricTransform abt = new AggregateMetricTransform<Pair<Object, Long>, Map<Object, Long>>(new HashMap<>(), dynOptDataAggregator); return new OperatorVertex(abt); }
/** * @return the generated vertex. */ private OperatorVertex generateMetricAggregationVertex() { // Define a custom data aggregator for skew handling. // Here, the aggregator gathers key frequency data used in shuffle data repartitioning. final BiFunction<Object, Map<Object, Long>, Map<Object, Long>> dynOptDataAggregator = (BiFunction<Object, Map<Object, Long>, Map<Object, Long>> & Serializable) (element, aggregatedDynOptData) -> { final Object key = ((Pair<Object, Long>) element).left(); final Long count = ((Pair<Object, Long>) element).right(); final Map<Object, Long> aggregatedDynOptDataMap = (Map<Object, Long>) aggregatedDynOptData; if (aggregatedDynOptDataMap.containsKey(key)) { aggregatedDynOptDataMap.compute(key, (existingKey, accumulatedCount) -> accumulatedCount + count); } else { aggregatedDynOptDataMap.put(key, count); } return aggregatedDynOptData; }; final AggregateMetricTransform abt = new AggregateMetricTransform<Pair<Object, Long>, Map<Object, Long>>(new HashMap<>(), dynOptDataAggregator); return new OperatorVertex(abt); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(Flatten.PCollections.class) private static void flattenTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Flatten.PCollections<?> transform) { final IRVertex vertex = new OperatorVertex(new FlattenTransform()); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
return new OperatorVertex(mct);
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(GroupByKey.class) private static void groupByKeyTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final GroupByKey<?, ?> transform) { final IRVertex vertex = new OperatorVertex(createGBKTransform(ctx, beamNode)); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(ParDo.SingleOutput.class) private static void parDoSingleOutputTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final ParDo.SingleOutput<?, ?> transform) { final Map<Integer, PCollectionView<?>> sideInputMap = getSideInputMap(transform.getSideInputs()); final AbstractDoFnTransform doFnTransform = createDoFnTransform(ctx, beamNode, sideInputMap); final IRVertex vertex = new OperatorVertex(doFnTransform); ctx.addVertex(vertex); beamNode.getInputs().values().stream() .filter(input -> !transform.getAdditionalInputs().values().contains(input)) .forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.addSideInputEdges(vertex, sideInputMap); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(ParDo.MultiOutput.class) private static void parDoMultiOutputTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final ParDo.MultiOutput<?, ?> transform) { final Map<Integer, PCollectionView<?>> sideInputMap = getSideInputMap(transform.getSideInputs()); final AbstractDoFnTransform doFnTransform = createDoFnTransform(ctx, beamNode, sideInputMap); final IRVertex vertex = new OperatorVertex(doFnTransform); ctx.addVertex(vertex); beamNode.getInputs().values().stream() .filter(input -> !transform.getAdditionalInputs().values().contains(input)) .forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.addSideInputEdges(vertex, sideInputMap); beamNode.getOutputs().entrySet().stream() .filter(pValueWithTupleTag -> pValueWithTupleTag.getKey().equals(transform.getMainOutputTag())) .forEach(pValueWithTupleTag -> ctx.registerMainOutputFrom(beamNode, vertex, pValueWithTupleTag.getValue())); beamNode.getOutputs().entrySet().stream() .filter(pValueWithTupleTag -> !pValueWithTupleTag.getKey().equals(transform.getMainOutputTag())) .forEach(pValueWithTupleTag -> ctx.registerAdditionalOutputFrom(beamNode, vertex, pValueWithTupleTag.getValue(), pValueWithTupleTag.getKey())); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator({Window.class, Window.Assign.class}) private static void windowTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final PTransform<?, ?> transform) { final WindowFn windowFn; if (transform instanceof Window) { windowFn = ((Window) transform).getWindowFn(); } else if (transform instanceof Window.Assign) { windowFn = ((Window.Assign) transform).getWindowFn(); } else { throw new UnsupportedOperationException(String.format("%s is not supported", transform)); } final IRVertex vertex = new OperatorVertex( new WindowFnTransform(windowFn, DisplayData.from(beamNode.getTransform()))); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(View.CreatePCollectionView.class) private static void createPCollectionViewTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final View.CreatePCollectionView<?, ?> transform) { final IRVertex vertex = new OperatorVertex(new CreateViewTransform(transform.getView().getViewFn())); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.registerMainOutputFrom(beamNode, vertex, transform.getView()); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
final IRVertex map = new OperatorVertex(new EmptyComponents.EmptyTransform("MapVertex")); final IRVertex reduce = new OperatorVertex(new EmptyComponents.EmptyTransform("ReduceVertex"));
final IRVertex map = new OperatorVertex(new EmptyComponents.EmptyTransform("MapVertex")); final IRVertex reduce = new OperatorVertex(new EmptyComponents.EmptyTransform("ReduceVertex"));
/** * Builds dummy IR DAG for testing. * @return the dummy IR DAG. */ public static DAG<IRVertex, IREdge> buildEmptyDAG() { DAGBuilder<IRVertex, IREdge> dagBuilder = new DAGBuilder<>(); final IRVertex s = new EmptyComponents.EmptySourceVertex<>("s"); final IRVertex t1 = new OperatorVertex(new EmptyComponents.EmptyTransform("t1")); final IRVertex t2 = new OperatorVertex(new EmptyComponents.EmptyTransform("t2")); final IRVertex t3 = new OperatorVertex(new EmptyComponents.EmptyTransform("t3")); final IRVertex t4 = new OperatorVertex(new EmptyComponents.EmptyTransform("t4")); final IRVertex t5 = new OperatorVertex(new EmptyComponents.EmptyTransform("t5")); dagBuilder.addVertex(s); dagBuilder.addVertex(t1); dagBuilder.addVertex(t2); dagBuilder.addVertex(t3); dagBuilder.addVertex(t4); dagBuilder.addVertex(t5); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, s, t1)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.Shuffle, t1, t2)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, t2, t3)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.Shuffle, t3, t4)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, t2, t5)); return dagBuilder.build(); }
/** * Builds dummy IR DAG for testing. * @return the dummy IR DAG. */ public static DAG<IRVertex, IREdge> buildEmptyDAG() { DAGBuilder<IRVertex, IREdge> dagBuilder = new DAGBuilder<>(); final IRVertex s = new EmptyComponents.EmptySourceVertex<>("s"); final IRVertex t1 = new OperatorVertex(new EmptyComponents.EmptyTransform("t1")); final IRVertex t2 = new OperatorVertex(new EmptyComponents.EmptyTransform("t2")); final IRVertex t3 = new OperatorVertex(new EmptyComponents.EmptyTransform("t3")); final IRVertex t4 = new OperatorVertex(new EmptyComponents.EmptyTransform("t4")); final IRVertex t5 = new OperatorVertex(new EmptyComponents.EmptyTransform("t5")); dagBuilder.addVertex(s); dagBuilder.addVertex(t1); dagBuilder.addVertex(t2); dagBuilder.addVertex(t3); dagBuilder.addVertex(t4); dagBuilder.addVertex(t5); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, s, t1)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.Shuffle, t1, t2)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, t2, t3)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.Shuffle, t3, t4)); dagBuilder.connectVertices(new IREdge(CommunicationPatternProperty.Value.OneToOne, t2, t5)); return dagBuilder.build(); }
final IRVertex v1 = new OperatorVertex(t); v1.setProperty(ParallelismProperty.of(3)); v1.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE)); dagBuilder.addVertex(v1); final IRVertex v2 = new OperatorVertex(t); v2.setProperty(ParallelismProperty.of(2)); v2.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE)); dagBuilder.addVertex(v2); final IRVertex v3 = new OperatorVertex(t); v3.setProperty(ParallelismProperty.of(3)); v3.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE)); dagBuilder.addVertex(v3); final IRVertex v4 = new OperatorVertex(t); v4.setProperty(ParallelismProperty.of(2)); v4.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE)); dagBuilder.addVertex(v4); final IRVertex v5 = new OperatorVertex(t); v5.setProperty(ParallelismProperty.of(2)); v5.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE));
final IRVertex sideInputTransformVertex = new OperatorVertex(new SideInputTransform(index)); addVertex(sideInputTransformVertex); final Coder viewCoder = getCoderForView(view, this);
final IRVertex v1 = new OperatorVertex(t); v1.setProperty(ParallelismProperty.of(3)); v1.setProperty(ResourcePriorityProperty.of(ResourcePriorityProperty.COMPUTE)); dagBuilder.addVertex(v1); final IRVertex v2 = new OperatorVertex(t); v2.setProperty(ParallelismProperty.of(2)); if (sameContainerType) { final IRVertex v3 = new OperatorVertex(t); v3.setProperty(ParallelismProperty.of(2)); if (sameContainerType) {
DAGBuilder<IRVertex, IREdge> dagBuilder = new DAGBuilder<>(); final IRVertex s = new EmptyComponents.EmptySourceVertex<>("s"); final IRVertex t1 = new OperatorVertex(new EmptyComponents.EmptyTransform("t1")); final IRVertex t2 = new OperatorVertex(new EmptyComponents.EmptyTransform("t2")); final IRVertex t3 = new OperatorVertex(new EmptyComponents.EmptyTransform("t3")); final IRVertex t4 = new OperatorVertex(new EmptyComponents.EmptyTransform("t4")); final IRVertex t5 = new OperatorVertex(new EmptyComponents.EmptyTransform("t5"));
/** * Collect data by running the DAG. * * @param dag the DAG to execute. * @param loopVertexStack loop vertex stack. * @param lastVertex last vertex added to the dag. * @param serializer serializer for the edges. * @param <T> type of the return data. * @return the data collected. */ public static <T> List<T> collect(final DAG<IRVertex, IREdge> dag, final Stack<LoopVertex> loopVertexStack, final IRVertex lastVertex, final Serializer serializer) { final DAGBuilder<IRVertex, IREdge> builder = new DAGBuilder<>(dag); final IRVertex collectVertex = new OperatorVertex(new CollectTransform<>()); builder.addVertex(collectVertex, loopVertexStack); final IREdge newEdge = new IREdge(getEdgeCommunicationPattern(lastVertex, collectVertex), lastVertex, collectVertex); newEdge.setProperty(EncoderProperty.of(new SparkEncoderFactory(serializer))); newEdge.setProperty(DecoderProperty.of(new SparkDecoderFactory(serializer))); newEdge.setProperty(SPARK_KEY_EXTRACTOR_PROP); builder.connectVertices(newEdge); // launch DAG JobLauncher.launchDAG(builder.build(), SparkBroadcastVariables.getAll(), ""); return (List<T>) JobLauncher.getCollectedData(); }