/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator({Window.class, Window.Assign.class}) private static void windowTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final PTransform<?, ?> transform) { final WindowFn windowFn; if (transform instanceof Window) { windowFn = ((Window) transform).getWindowFn(); } else if (transform instanceof Window.Assign) { windowFn = ((Window.Assign) transform).getWindowFn(); } else { throw new UnsupportedOperationException(String.format("%s is not supported", transform)); } final IRVertex vertex = new OperatorVertex( new WindowFnTransform(windowFn, DisplayData.from(beamNode.getTransform()))); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
@Override public CompositeBehavior enterCompositeTransform(Node node) { if (!node.isRootNode() && freedNodes.contains(node.getEnclosingNode())) { // This node will be freed because its parent will be freed. freedNodes.add(node); return CompositeBehavior.ENTER_TRANSFORM; } if (!node.isRootNode() && override.getMatcher().matches(node.toAppliedPTransform(getPipeline()))) { matches.add(node); // This node will be freed. When we visit any of its children, they will also be freed freedNodes.add(node); } return CompositeBehavior.ENTER_TRANSFORM; }
"Replacing a node when the graph has an unexpanded input. This is an SDK bug."); Node replacement = new Node(existing.getEnclosingNode(), transform, existing.getFullName(), input); for (PValue output : existing.getOutputs().values()) { Node producer = producers.get(output); boolean producedInExisting = false; producedInExisting = true; } else { producer = producer.getEnclosingNode(); } while (!producedInExisting && !producer.isRootNode()); if (producedInExisting) { producers.remove(output); "Removed producer for value {} as it is part of a replaced composite {}", output, existing.getFullName()); } else { LOG.debug("Value {} not produced in existing node {}", output, existing.getFullName()); existing.getEnclosingNode().replaceChild(existing, replacement); unexpandedInputs.remove(existing); unexpandedInputs.put(replacement, input);
Set<Node> visitedNodes, Set<Node> skippedComposites) { if (getEnclosingNode() != null && !visitedNodes.contains(getEnclosingNode())) { getEnclosingNode().visit(visitor, visitedValues, visitedNodes, skippedComposites); LOG.debug("Not revisiting previously visited node {}", this); return; } else if (childNodeOf(skippedComposites)) { finishSpecifying(); if (!isRootNode()) { valueProducer.visit(visitor, visitedValues, visitedNodes, skippedComposites); if (isCompositeNode()) { LOG.debug("Visiting composite node {}", this); PipelineVisitor.CompositeBehavior recurse = visitor.enterCompositeTransform(this); child.visit(visitor, visitedValues, visitedNodes, skippedComposites); if (!isRootNode()) { checkNotNull(outputs, "Outputs for non-root node %s are null", getFullName());
replacementParNode.getOutputs().keySet(), Matchers.contains(replacementLongs.getKey())); assertThat(replacementParNode.getOutputs().values(), Matchers.contains(output)); assertThat( compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet())); assertThat(compositeNode.getOutputs().values(), Matchers.contains(output)); hierarchy.popNode();
hierarchy.finishSpecifyingInput(); assertThat(hierarchy.getCurrent(), equalTo(compositeNode)); assertThat(compositeNode.getInputs().entrySet(), Matchers.empty()); assertThat(compositeNode.getTransform(), equalTo(create)); assertThat(compositeNode.getOutputs().entrySet(), Matchers.emptyIterable()); assertThat(compositeNode.getEnclosingNode().isRootNode(), is(true)); hierarchy.setOutput(created); hierarchy.popNode(); assertThat(primitiveNode.getOutputs().values(), containsInAnyOrder(created)); assertThat(primitiveNode.getInputs().entrySet(), Matchers.emptyIterable()); assertThat(primitiveNode.getTransform(), equalTo(read)); assertThat(primitiveNode.getEnclosingNode(), equalTo(compositeNode)); assertThat(compositeNode.getOutputs().values(), containsInAnyOrder(created));
beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(partialCombine, input)); TransformInputs.nonAdditionalInputs(beamNode.toAppliedPTransform(ctx.getPipeline()))); final KvCoder inputCoder = (KvCoder) input.getCoder(); final Coder accumulatorCoder; beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, finalCombine, output));
final KvCoder<?, ?> inputKVCoder = (KvCoder) src.getOutputs().values().stream() .filter(v -> v instanceof PCollection) .map(v -> (PCollection) v)
hierarchy.replaceOutputs(replacementOutputs); assertThat(replacement.getInputs(), equalTo(original.getInputs())); assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode())); assertThat(replacement.getEnclosingNode(), equalTo(enclosing)); assertThat(replacement.getTransform(), equalTo(replacementTransform)); assertThat(replacement.getOutputs().keySet(), Matchers.contains(taggedReplacement.getTag())); assertThat(replacement.getOutputs().values(), Matchers.contains(originalOutput)); hierarchy.popNode();
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(ParDo.MultiOutput.class) private static void parDoMultiOutputTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final ParDo.MultiOutput<?, ?> transform) { final Map<Integer, PCollectionView<?>> sideInputMap = getSideInputMap(transform.getSideInputs()); final AbstractDoFnTransform doFnTransform = createDoFnTransform(ctx, beamNode, sideInputMap); final IRVertex vertex = new OperatorVertex(doFnTransform); ctx.addVertex(vertex); beamNode.getInputs().values().stream() .filter(input -> !transform.getAdditionalInputs().values().contains(input)) .forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.addSideInputEdges(vertex, sideInputMap); beamNode.getOutputs().entrySet().stream() .filter(pValueWithTupleTag -> pValueWithTupleTag.getKey().equals(transform.getMainOutputTag())) .forEach(pValueWithTupleTag -> ctx.registerMainOutputFrom(beamNode, vertex, pValueWithTupleTag.getValue())); beamNode.getOutputs().entrySet().stream() .filter(pValueWithTupleTag -> !pValueWithTupleTag.getKey().equals(transform.getMainOutputTag())) .forEach(pValueWithTupleTag -> ctx.registerAdditionalOutputFrom(beamNode, vertex, pValueWithTupleTag.getValue(), pValueWithTupleTag.getKey())); }
/** * Determine if this Node belongs to a Bounded branch of the pipeline, or Unbounded, and * translate with the proper translator. */ protected <TransformT extends PTransform<? super PInput, POutput>> TransformEvaluator<TransformT> translate( TransformHierarchy.Node node, TransformT transform) { // --- determine if node is bounded/unbounded. // usually, the input determines if the PCollection to apply the next transformation to // is BOUNDED or UNBOUNDED, meaning RDD/DStream. Map<TupleTag<?>, PValue> pValues; if (node.getInputs().isEmpty()) { // in case of a PBegin, it's the output. pValues = node.getOutputs(); } else { pValues = node.getInputs(); } PCollection.IsBounded isNodeBounded = isBoundedCollection(pValues.values()); // translate accordingly. LOG.debug("Translating {} as {}", transform, isNodeBounded); return isNodeBounded.equals(PCollection.IsBounded.BOUNDED) ? translator.translateBounded(transform) : translator.translateUnbounded(transform); }
private < InputT extends PInput, OutputT extends POutput, TransformT extends PTransform<? super InputT, OutputT>> void applyReplacement( Node original, PTransformOverrideFactory<InputT, OutputT, TransformT> replacementFactory) { PTransformReplacement<InputT, OutputT> replacement = replacementFactory.getReplacementTransform( (AppliedPTransform<InputT, OutputT, TransformT>) original.toAppliedPTransform(this)); if (replacement.getTransform() == original.getTransform()) { return; } InputT originalInput = replacement.getInput(); LOG.debug("Replacing {} with {}", original, replacement); transforms.replaceNode(original, originalInput, replacement.getTransform()); try { OutputT newOutput = replacement.getTransform().expand(originalInput); Map<PValue, ReplacementOutput> originalToReplacement = replacementFactory.mapOutputs(original.getOutputs(), newOutput); // Ensure the internal TransformHierarchy data structures are consistent. transforms.setOutput(newOutput); transforms.replaceOutputs(originalToReplacement); } finally { transforms.popNode(); } }
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator(ParDo.SingleOutput.class) private static void parDoSingleOutputTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final ParDo.SingleOutput<?, ?> transform) { final Map<Integer, PCollectionView<?>> sideInputMap = getSideInputMap(transform.getSideInputs()); final AbstractDoFnTransform doFnTransform = createDoFnTransform(ctx, beamNode, sideInputMap); final IRVertex vertex = new OperatorVertex(doFnTransform); ctx.addVertex(vertex); beamNode.getInputs().values().stream() .filter(input -> !transform.getAdditionalInputs().values().contains(input)) .forEach(input -> ctx.addEdgeTo(vertex, input)); ctx.addSideInputEdges(vertex, sideInputMap); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
@Override public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof View.AsMap || node.getTransform() instanceof View.AsMultimap) { PCollection<KV<?, ?>> input = (PCollection<KV<?, ?>>) Iterables.getOnlyElement(node.getInputs().values()); KvCoder<?, ?> inputCoder = (KvCoder) input.getCoder(); try { inputCoder.getKeyCoder().verifyDeterministic(); } catch (NonDeterministicException e) { ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName()); } } if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) { ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName()); } return CompositeBehavior.ENTER_TRANSFORM; }
@Override public void leaveCompositeTransform(Node node) { if (node.isRootNode()) { for (AppliedPTransform<?, ?, ?> pipelineRoot : children.get(node)) { rootIds.add(components.getExistingPTransformId(pipelineRoot)); } } else { // TODO: Include DisplayData in the proto children.put(node.getEnclosingNode(), node.toAppliedPTransform(pipeline)); try { components.registerPTransform( node.toAppliedPTransform(pipeline), children.get(node)); } catch (IOException e) { throw new RuntimeException(e); } } }
@Override public void doVisitTransform(TransformHierarchy.Node node) { // we populate cache candidates by updating the map with inputs of each node. // The goal is to detect the PCollections accessed more than one time, and so enable cache // on the underlying RDDs or DStreams. for (PValue value : node.getInputs().values()) { if (value instanceof PCollection) { long count = 1L; if (ctxt.getCacheCandidates().get(value) != null) { count = ctxt.getCacheCandidates().get(value) + 1; } ctxt.getCacheCandidates().put((PCollection) value, count); } } } }
@Override public void visitPrimitiveTransform(Node node) { if (node.getTransform() instanceof WriteView) { assertThat( "There should only be one WriteView primitive in the graph", writeViewVisited.getAndSet(true), is(false)); PCollectionView<?> replacementView = ((WriteView) node.getTransform()).getView(); // replacementView.getPCollection() is null, but that is not a requirement // so not asserted one way or the other assertThat( replacementView.getTagInternal(), equalTo((TupleTag) view.getTagInternal())); assertThat(replacementView.getViewFn(), equalTo(view.getViewFn())); assertThat(replacementView.getWindowMappingFn(), equalTo(view.getWindowMappingFn())); assertThat(node.getInputs().entrySet(), hasSize(1)); } } });
@Override public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { if (node.isRootNode()) { return CompositeBehavior.ENTER_TRANSFORM; } PTransform<?, ?> transform = node.getTransform(); if (sample.getClass().equals(transform.getClass())) { assertTrue(visited.add(TransformsSeen.SAMPLE)); assertNotNull(node.getEnclosingNode()); assertTrue(node.isCompositeNode()); } else if (transform instanceof WriteFiles) { assertTrue(visited.add(TransformsSeen.WRITE)); assertNotNull(node.getEnclosingNode()); assertTrue(node.isCompositeNode()); } assertThat(transform, not(instanceOf(Read.Bounded.class))); return CompositeBehavior.ENTER_TRANSFORM; }
@Override public void leaveCompositeTransform(TransformHierarchy.Node node) { checkState( !finalized, "Attempted to use a %s that has already been finalized on a pipeline (visiting node %s)", KeyedPValueTrackingVisitor.class.getSimpleName(), node); if (node.isRootNode()) { finalized = true; } else if (PRODUCES_KEYED_OUTPUTS.contains(node.getTransform().getClass())) { Map<TupleTag<?>, PValue> outputs = node.getOutputs(); for (PValue output : outputs.values()) { keyedValues.add(output); } } }
@Override public CompositeBehavior enterCompositeTransform(Node node) { for (PValue input : node.getInputs().values()) { assertThat(visitedValues, hasItem(input)); } assertThat( "Nodes should not be visited more than once", visitedNodes, not(hasItem(node))); if (!node.isRootNode()) { assertThat( "Nodes should always be visited after their enclosing nodes", visitedNodes, hasItem(node.getEnclosingNode())); } visitedNodes.add(node); return CompositeBehavior.ENTER_TRANSFORM; }