@Override public void visitPrimitiveTransform(Node node) { unconsumed.removeAll(node.getInputs().values()); }
@Override public void visitValue(PValue value, TransformHierarchy.Node producer) { boolean inputsAreKeyed = true; for (PValue input : producer.getInputs().values()) { inputsAreKeyed = inputsAreKeyed && keyedValues.contains(input); } if (PRODUCES_KEYED_OUTPUTS.contains(producer.getTransform().getClass()) || (isKeyPreserving(producer.getTransform()) && inputsAreKeyed)) { keyedValues.add(value); } }
@Override public void doVisitTransform(TransformHierarchy.Node node) { // we populate cache candidates by updating the map with inputs of each node. // The goal is to detect the PCollections accessed more than one time, and so enable cache // on the underlying RDDs or DStreams. for (PValue value : node.getInputs().values()) { if (value instanceof PCollection) { long count = 1L; if (ctxt.getCacheCandidates().get(value) != null) { count = ctxt.getCacheCandidates().get(value) + 1; } ctxt.getCacheCandidates().put((PCollection) value, count); } } } }
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { AppliedPTransform<?, ?, ?> appliedTransform = getAppliedTransform(node); stepNames.put(appliedTransform, genStepName()); if (node.getInputs().isEmpty()) { rootTransforms.add(appliedTransform); } else { Collection<PValue> mainInputs = TransformInputs.nonAdditionalInputs(node.toAppliedPTransform(getPipeline())); if (!mainInputs.containsAll(node.getInputs().values())) { LOG.debug( "Inputs reduced to {} from {} by removing additional inputs", mainInputs, node.getInputs().values()); } for (PValue value : mainInputs) { perElementConsumers.put(value, appliedTransform); } for (PValue value : node.getInputs().values()) { allConsumers.put(value, appliedTransform); } } if (node.getTransform() instanceof ParDo.MultiOutput) { consumedViews.addAll(((ParDo.MultiOutput<?, ?>) node.getTransform()).getSideInputs()); } else if (node.getTransform() instanceof ViewOverrideFactory.WriteView) { viewWriters.put( ((WriteView) node.getTransform()).getView(), node.toAppliedPTransform(getPipeline())); } }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator({Window.class, Window.Assign.class}) private static void windowTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final PTransform<?, ?> transform) { final WindowFn windowFn; if (transform instanceof Window) { windowFn = ((Window) transform).getWindowFn(); } else if (transform instanceof Window.Assign) { windowFn = ((Window.Assign) transform).getWindowFn(); } else { throw new UnsupportedOperationException(String.format("%s is not supported", transform)); } final IRVertex vertex = new OperatorVertex( new WindowFnTransform(windowFn, DisplayData.from(beamNode.getTransform()))); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * Determine if this Node belongs to a Bounded branch of the pipeline, or Unbounded, and * translate with the proper translator. */ protected <TransformT extends PTransform<? super PInput, POutput>> TransformEvaluator<TransformT> translate( TransformHierarchy.Node node, TransformT transform) { // --- determine if node is bounded/unbounded. // usually, the input determines if the PCollection to apply the next transformation to // is BOUNDED or UNBOUNDED, meaning RDD/DStream. Map<TupleTag<?>, PValue> pValues; if (node.getInputs().isEmpty()) { // in case of a PBegin, it's the output. pValues = node.getOutputs(); } else { pValues = node.getInputs(); } PCollection.IsBounded isNodeBounded = isBoundedCollection(pValues.values()); // translate accordingly. LOG.debug("Translating {} as {}", transform, isNodeBounded); return isNodeBounded.equals(PCollection.IsBounded.BOUNDED) ? translator.translateBounded(transform) : translator.translateUnbounded(transform); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(ParDo.SingleOutput.class) private static void parDoSingleOutputTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final ParDo.SingleOutput<?, ?> transform) { final Map<Integer, PCollectionView<?>> sideInputMap = getSideInputMap(transform.getSideInputs()); final AbstractDoFnTransform doFnTransform = createDoFnTransform(ctx, beamNode, sideInputMap); final IRVertex vertex = new OperatorVertex(doFnTransform); ctx.addVertex(vertex); beamNode.getInputs().values().stream() .filter(input -> !transform.getAdditionalInputs().values().contains(input)) .forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.addSideInputEdges(vertex, sideInputMap); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
@Override public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof View.AsMap || node.getTransform() instanceof View.AsMultimap) { PCollection<KV<?, ?>> input = (PCollection<KV<?, ?>>) Iterables.getOnlyElement(node.getInputs().values()); KvCoder<?, ?> inputCoder = (KvCoder) input.getCoder(); try { inputCoder.getKeyCoder().verifyDeterministic(); } catch (NonDeterministicException e) { ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName()); } } if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) { ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName()); } return CompositeBehavior.ENTER_TRANSFORM; }
@Override public void visitPrimitiveTransform(Node node) { if (node.getTransform() instanceof WriteView) { assertThat( "There should only be one WriteView primitive in the graph", writeViewVisited.getAndSet(true), is(false)); PCollectionView<?> replacementView = ((WriteView) node.getTransform()).getView(); // replacementView.getPCollection() is null, but that is not a requirement // so not asserted one way or the other assertThat( replacementView.getTagInternal(), equalTo((TupleTag) view.getTagInternal())); assertThat(replacementView.getViewFn(), equalTo(view.getViewFn())); assertThat(replacementView.getWindowMappingFn(), equalTo(view.getWindowMappingFn())); assertThat(node.getInputs().entrySet(), hasSize(1)); } } });
@Override public CompositeBehavior enterCompositeTransform(Node node) { for (PValue input : node.getInputs().values()) { assertThat(visitedValues, hasItem(input)); } assertThat( "Nodes should not be visited more than once", visitedNodes, not(hasItem(node))); if (!node.isRootNode()) { assertThat( "Nodes should always be visited after their enclosing nodes", visitedNodes, hasItem(node.getEnclosingNode())); } visitedNodes.add(node); return CompositeBehavior.ENTER_TRANSFORM; }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(GroupByKey.class) private static void groupByKeyTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final GroupByKey<?, ?> transform) { final IRVertex vertex = new OperatorVertex(createGBKTransform(ctx, beamNode)); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(Read.Bounded.class) private static void boundedReadTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Read.Bounded<?> transform) { final IRVertex vertex = new BeamBoundedSourceVertex<>(transform.getSource(), DisplayData.from(transform)); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(View.CreatePCollectionView.class) private static void createPCollectionViewTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final View.CreatePCollectionView<?, ?> transform) { final IRVertex vertex = new OperatorVertex(new CreateViewTransform(transform.getView().getViewFn())); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.registerMainOutputFrom(beamNode, vertex, transform.getView()); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(Flatten.PCollections.class) private static void flattenTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Flatten.PCollections<?> transform) { final IRVertex vertex = new OperatorVertex(new FlattenTransform()); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(Read.Unbounded.class) private static void unboundedReadTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Read.Unbounded<?> transform) { final IRVertex vertex = new BeamUnboundedSourceVertex<>(transform.getSource(), DisplayData.from(transform)); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
/** * Finish specifying all of the input {@link PValue PValues} of the current {@link Node}. Ensures * that all of the inputs to the current node have been fully specified, and have been produced by * a node in this graph. */ public void finishSpecifyingInput() { // Inputs must be completely specified before they are consumed by a transform. for (PValue inputValue : current.getInputs().values()) { Node producerNode = getProducer(inputValue); PInput input = producerInput.remove(inputValue); inputValue.finishSpecifying(input, producerNode.getTransform()); } }
@Override public void visitPrimitiveTransform(Node node) { consumedOutputs.addAll(node.getInputs().values()); }
@Override public void visitPrimitiveTransform(Node node) { assertThat(visitedNodes, hasItem(node.getEnclosingNode())); assertThat(exitedNodes, not(hasItem(node.getEnclosingNode()))); assertThat( "Nodes should not be visited more than once", visitedNodes, not(hasItem(node))); for (PValue input : node.getInputs().values()) { assertThat(visitedValues, hasItem(input)); } visitedNodes.add(node); }
@Override public void visitPrimitiveTransform(Node node) { // The output should only be consumed by a single consumer if (node.getInputs().values().contains(output)) { assertThat(node.getTransform(), Matchers.is(consumer)); } } });
@Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof Flatten.PCollections) { assertThat(node.getInputs(), not(equalTo(inputList.expand()))); } } });