/** * Returns a {@link Map} from the ID of a {@link PCollectionNode PCollection} to a {@link * PCollectionNode} that contains part of that {@link PCollectionNode PCollection}. */ private static Map<String, PCollectionNode> createPartialPCollections( Collection<PCollectionNode> duplicates, Predicate<String> existingPCollectionIds) { Map<String, PCollectionNode> unzippedOutputs = new LinkedHashMap<>(); Predicate<String> existingOrNewIds = existingPCollectionIds.or( id -> unzippedOutputs.values().stream().map(PCollectionNode::getId).anyMatch(id::equals)); for (PCollectionNode duplicateOutput : duplicates) { String id = SyntheticComponents.uniqueId(duplicateOutput.getId(), existingOrNewIds); PCollection partial = duplicateOutput.getPCollection().toBuilder().setUniqueName(id).build(); // Check to make sure there is only one duplicated output with the same id - which ensures we // only introduce one 'partial output' per producer of that output. PCollectionNode alreadyDeduplicated = unzippedOutputs.put(duplicateOutput.getId(), PipelineNode.pCollection(id, partial)); checkArgument(alreadyDeduplicated == null, "a duplicate should only appear once per stage"); } return unzippedOutputs; }
input.toBuilder().setUniqueName(kwiCollectionId).setCoderId(kwiCoderId).build(); String gbkoId = uniqueId(String.format("%s/GBKO", gbkId), components::containsTransforms); PTransform gbko =
input.toBuilder().setUniqueName(kwiCollectionId).setCoderId(kwiCoderId).build(); String rawGbkId = uniqueId(String.format("%s/RawGBK", spkId), components::containsTransforms); input.toBuilder().setUniqueName(feedSDFCollectionId).setCoderId(feedSDFCoderId).build(); String feedSDFId = uniqueId(String.format("%s/FeedSDF", spkId), components::containsTransforms);