private SdkComponents(RunnerApi.Components components) { if (components == null) { return; } reservedIds.addAll(components.getTransformsMap().keySet()); reservedIds.addAll(components.getPcollectionsMap().keySet()); reservedIds.addAll(components.getWindowingStrategiesMap().keySet()); reservedIds.addAll(components.getCodersMap().keySet()); reservedIds.addAll(components.getEnvironmentsMap().keySet()); componentsBuilder.mergeFrom(components); }
/** Produces a {@link RunnerApi.Components} which contains only primitive transforms. */ @VisibleForTesting static Collection<String> getPrimitiveTransformIds(RunnerApi.Components components) { Collection<String> ids = new LinkedHashSet<>(); for (Map.Entry<String, PTransform> transformEntry : components.getTransformsMap().entrySet()) { PTransform transform = transformEntry.getValue(); boolean isPrimitive = isPrimitiveTransform(transform); if (isPrimitive) { // Sometimes "primitive" transforms have sub-transforms (and even deeper-nested descendents), due to runners // either rewriting them in terms of runner-specific transforms, or SDKs constructing them in terms of other // underlying transforms (see https://issues.apache.org/jira/browse/BEAM-5441). // We consider any "leaf" descendents of these "primitive" transforms to be the true "primitives" that we // preserve here; in the common case, this is just the "primitive" itself, which has no descendents). Deque<String> transforms = new ArrayDeque<>(); transforms.push(transformEntry.getKey()); while (!transforms.isEmpty()) { String id = transforms.pop(); PTransform next = components.getTransformsMap().get(id); List<String> subtransforms = next.getSubtransformsList(); if (subtransforms.isEmpty()) { ids.add(id); } else { transforms.addAll(subtransforms); } } } } return ids; }
pipeline .getComponents() .getTransformsMap() .forEach( (transformId, transform) -> { pipeline .getComponents() .getTransformsMap() .forEach( (transformId, transform) -> {
hasItems(introducedIds.toArray(new String[0]))); assertThat( result.getDeduplicatedComponents().getTransformsMap().entrySet(), hasItems( result
hasItems(stageOutputs.toArray(new String[0]))); assertThat( result.getDeduplicatedComponents().getTransformsMap(), hasEntry(introduced.getId(), introduced.getTransform()));
hasItems(introducedOutputs.toArray(new String[0]))); assertThat( result.getDeduplicatedComponents().getTransformsMap(), hasEntry(introduced.getId(), introduced.getTransform()));
private static void validateComponents(String context, Components components) { for (String transformId : components.getTransformsMap().keySet()) { PTransform transform = components.getTransformsOrThrow(transformId); String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId);
fusedPipelineProto.getComponents().getTransformsMap().keySet(), hasItems(fusedPipelineProto.getRootTransformIdsList().toArray(new String[0]))); assertThat( fusedPipelineProto.getComponents().getTransformsMap(), allOf(hasKey(startsWith("map")), hasKey(startsWith("key")), hasKey(startsWith("values")))); assertThat(
assertThat(updatedThird.getSpec().getPayload(), equalTo(newPayload)); assertThat(updated.getComponents().getTransformsMap(), hasKey("second_sub")); assertThat(updated.getComponents().getTransformsMap(), hasKey("third_sub")); assertThat( updated.getComponents().getTransformsOrThrow("second_sub").getUniqueName(),
/** * Update all composites present in the {@code originalPipeline} with an URN equal to the provided * {@code urn} using the provided {@link TransformReplacement}. */ public static Pipeline updateTransform( String urn, Pipeline originalPipeline, TransformReplacement compositeBuilder) { Components.Builder resultComponents = originalPipeline.getComponents().toBuilder(); for (Map.Entry<String, PTransform> pt : originalPipeline.getComponents().getTransformsMap().entrySet()) { if (pt.getValue().getSpec() != null && urn.equals(pt.getValue().getSpec().getUrn())) { MessageWithComponents updated = compositeBuilder.getReplacement(pt.getKey(), originalPipeline.getComponents()); checkArgument( updated.getPtransform().getOutputsMap().equals(pt.getValue().getOutputsMap()), "A %s must produce all of the outputs of the original %s", TransformReplacement.class.getSimpleName(), PTransform.class.getSimpleName()); removeSubtransforms(pt.getValue(), resultComponents); resultComponents .mergeFrom(updated.getComponents()) .putTransforms(pt.getKey(), updated.getPtransform()); } } return originalPipeline.toBuilder().setComponents(resultComponents).build(); }
/** * Return a map of IDs to {@link PTransform} which are executed by an SDK Harness. * * <p>The transforms that are present in the returned map are the {@link RunnerApi.PTransform} * versions of the {@link ExecutableStage ExecutableStages} returned in {@link #getFusedStages()}. * The IDs of the returned transforms will not collide with any transform ID present in {@link * #getComponents()}. */ private Map<String, PTransform> getEnvironmentExecutedTransforms() { Map<String, PTransform> topLevelTransforms = new HashMap<>(); for (ExecutableStage stage : getFusedStages()) { String baseName = String.format( "%s/%s", stage.getInputPCollection().getPCollection().getUniqueName(), stage.getEnvironment().getUrn()); Set<String> usedNames = Sets.union(topLevelTransforms.keySet(), getComponents().getTransformsMap().keySet()); String uniqueId = SyntheticComponents.uniqueId(baseName, usedNames::contains); topLevelTransforms.put(uniqueId, stage.toPTransform(uniqueId)); } return topLevelTransforms; } }
@Test public void retainOnlyPrimitivesComposites() { Pipeline p = Pipeline.create(); p.apply( new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() { @Override public PCollection<Long> expand(PBegin input) { return input .apply(GenerateSequence.from(2L)) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L)))) .apply(MapElements.into(TypeDescriptors.longs()).via(l -> l + 1)); } }); Components originalComponents = PipelineTranslation.toProto(p).getComponents(); Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents); // Read, Window.Assign, ParDo. This will need to be updated if the expansions change. assertThat(primitiveComponents, hasSize(3)); for (String transformId : primitiveComponents) { assertThat(originalComponents.getTransformsMap(), hasKey(transformId)); } }
private RunnerApi.Pipeline makeKnownUrnsPrimitives( RunnerApi.Pipeline pipeline, Set<String> knownUrns) { RunnerApi.Pipeline.Builder trimmedPipeline = pipeline.toBuilder(); for (String ptransformId : pipeline.getComponents().getTransformsMap().keySet()) { if (knownUrns.contains( pipeline.getComponents().getTransformsOrThrow(ptransformId).getSpec().getUrn())) { LOG.debug("Removing descendants of known PTransform {}" + ptransformId); removeDescendants(trimmedPipeline, ptransformId); } } return trimmedPipeline.build(); }
@Test public void retainOnlyPrimitivesWithOnlyPrimitivesUnchanged() { Pipeline p = Pipeline.create(); p.apply("Read", Read.from(CountingSource.unbounded())) .apply( "multi-do", ParDo.of(new TestFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty())); Components originalComponents = PipelineTranslation.toProto(p).getComponents(); Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents); assertThat(primitiveComponents, equalTo(originalComponents.getTransformsMap().keySet())); }