@Test public void duplicatesInsertsMultipliers() { PTransform<PCollectionList<String>, PCollection<String>> replacement = new DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs<>(); final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); inputList.apply(replacement); pipeline.traverseTopologically( new Defaults() { @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof Flatten.PCollections) { assertThat(node.getInputs(), not(equalTo(inputList.expand()))); } } }); }
@Override public PCollection<KV<URI, String>> expand(PBegin input) { Pipeline pipeline = input.getPipeline(); // Create one TextIO.Read transform for each document // and add its output to a PCollectionList PCollectionList<KV<URI, String>> urisToLines = PCollectionList.empty(pipeline); // TextIO.Read supports: // - file: URIs and paths locally // - gs: URIs on the service for (final URI uri : uris) { String uriString; if (uri.getScheme().equals("file")) { uriString = new File(uri).getPath(); } else { uriString = uri.toString(); } PCollection<KV<URI, String>> oneUriToLines = pipeline .apply("TextIO.Read(" + uriString + ")", TextIO.Read.from(uriString)) .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri)); urisToLines = urisToLines.and(oneUriToLines); } return urisToLines.apply(Flatten.<KV<URI, String>>pCollections()); } }
/** * {@inheritDoc}. * * <p>The input {@link PCollectionList} that is constructed will have the same values in the same */ private PCollectionList<T> getInput(Map<TupleTag<?>, PValue> inputs, Pipeline p) { PCollectionList<T> pCollections = PCollectionList.empty(p); for (PValue input : inputs.values()) { PCollection<T> pcollection = (PCollection<T>) input; pCollections = pCollections.and(pcollection); } return pCollections; }
/** * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}. * * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling {@link #and} on * the result. */ public static <T> PCollectionList<T> of(PCollection<T> pc) { return new PCollectionList<T>(pc.getPipeline()).and(pc); }
/** Transforms the inputs into a PInput. */ private static PCollectionList<Row> buildPCollectionList( List<RelNode> inputRels, Pipeline pipeline, Map<Integer, PCollection<Row>> cache) { if (inputRels.isEmpty()) { return PCollectionList.empty(pipeline); } else { return PCollectionList.of( inputRels .stream() .map(input -> BeamSqlRelUtils.toPCollection(pipeline, (BeamRelNode) input, cache)) .collect(Collectors.toList())); } }
@Override public PCollection<FeatureRowExtended> expand(PCollectionTuple tuple) { List<PCollection<FeatureRowExtended>> outputList = Lists.newArrayList(); for (TupleTag<FeatureRowExtended> tag : transforms.keySet()) { Write write = transforms.get(tag); Preconditions.checkNotNull(write, String.format("Null transform for tag=%s", tag.getId())); PCollection<FeatureRowExtended> input = tuple.get(tag); input.apply(String.format("Write to %s", tag.getId()), write); outputList.add(input); } // FeatureRows with no matching write transform end up in `input.get(mainTag)` and considered // discardible, we return them in the main output so they are considered written, but don't // actually write them to any store. outputList.add(tuple.get(mainTag)); return PCollectionList.of(outputList).apply("Flatten main", Flatten.pCollections()); } }
@Test public void testExpandWithDuplicates() { Pipeline p = TestPipeline.create(); PCollection<Long> createOne = p.apply("CreateOne", Create.of(1L, 2L, 3L)); PCollectionList<Long> list = PCollectionList.of(createOne).and(createOne).and(createOne); assertThat(list.expand().values(), containsInAnyOrder(createOne, createOne, createOne)); }
@Override public PCollection<T> expand(PCollectionList<T> input) { Map<PCollection<T>, Integer> instances = new HashMap<>(); for (PCollection<T> pCollection : input.getAll()) { int existing = instances.get(pCollection) == null ? 0 : instances.get(pCollection); instances.put(pCollection, existing + 1); } PCollectionList<T> output = PCollectionList.empty(input.getPipeline()); for (Map.Entry<PCollection<T>, Integer> instanceEntry : instances.entrySet()) { if (instanceEntry.getValue().equals(1)) { output = output.and(instanceEntry.getKey()); } else { String duplicationName = String.format("Multiply %s", instanceEntry.getKey().getName()); PCollection<T> duplicated = instanceEntry .getKey() .apply(duplicationName, ParDo.of(new DuplicateFn<>(instanceEntry.getValue()))); output = output.and(duplicated); } } return output.apply(Flatten.pCollections()); } }
ImmutableList.of(boundedCount, maxReadTimeCount, unboundedCount); PCollectionList<Long> pcList = PCollectionList.of(counts); assertThat(pcList.getAll(), contains(boundedCount, maxReadTimeCount, unboundedCount)); PCollectionList<Long> withOneCreate = pcList.and(createTwo); assertThat( withOneCreate.getAll(), contains(boundedCount, maxReadTimeCount, unboundedCount, createTwo)); PCollectionList.<Long>empty(p) .and(unboundedCount) .and(createOne) .and(ImmutableList.of(boundedCount, maxReadTimeCount)); assertThat( fromEmpty.getAll(), contains(unboundedCount, createOne, boundedCount, maxReadTimeCount)); Map<TupleTag<?>, PValue> expansion = fromEmpty.expand(); assertThat(expansion, equalTo(fromEmpty.expand()));
@Test public void testEquals() { Pipeline p = TestPipeline.create(); PCollection<String> first = p.apply("Meta", Create.of("foo", "bar")); PCollection<String> second = p.apply("Pythonic", Create.of("spam, ham")); PCollection<String> third = p.apply("Syntactic", Create.of("eggs", "baz")); EqualsTester tester = new EqualsTester(); // tester.addEqualityGroup(PCollectionList.empty(p), PCollectionList.empty(p)); // tester.addEqualityGroup(PCollectionList.of(first).and(second)); // Constructors should all produce equivalent tester.addEqualityGroup( PCollectionList.of(first).and(second).and(third), PCollectionList.of(first).and(second).and(third), // PCollectionList.<String>empty(p).and(first).and(second).and(third), // PCollectionList.of(ImmutableList.of(first, second, third)), // PCollectionList.of(first).and(ImmutableList.of(second, third)), PCollectionList.of(ImmutableList.of(first, second)).and(third)); // Order is considered tester.addEqualityGroup(PCollectionList.of(first).and(third).and(second)); tester.addEqualityGroup(PCollectionList.empty(TestPipeline.create())); tester.testEquals(); } }
@Test @Category(NeedsRunner.class) public void testDroppedPartition() { // Compute the set of integers either 1 or 2 mod 3, the hard way. PCollectionList<Integer> outputs = pipeline .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) .apply(Partition.of(3, new ModFn())); List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll()); outputsList.remove(0); outputs = PCollectionList.of(outputsList); assertTrue(outputs.size() == 2); PCollection<Integer> output = outputs.apply(Flatten.pCollections()); PAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11); pipeline.run(); }
@Test @Category(ValidatesRunner.class) public void testFlattenPCollections() { List<List<String>> inputs = Arrays.asList(LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES); PCollection<String> output = makePCollectionListOfStrings(p, inputs).apply(Flatten.pCollections()); PAssert.that(output).containsInAnyOrder(flattenLists(inputs)); p.run(); }
@Test public void testEmptyListFailure() { try { PCollectionList.of(Collections.<PCollection<String>>emptyList()); fail("should have failed"); } catch (IllegalArgumentException exn) { assertThat( exn.toString(), containsString( "must either have a non-empty list of PCollections, " + "or must first call empty(Pipeline)")); } }
WindowingStrategy<?, ?> windowingStrategy; IsBounded isBounded = IsBounded.BOUNDED; if (!inputs.getAll().isEmpty()) { windowingStrategy = inputs.get(0).getWindowingStrategy(); for (PCollection<?> input : inputs.getAll()) { WindowingStrategy<?, ?> other = input.getWindowingStrategy(); if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) { inputs.getPipeline(), windowingStrategy, isBounded, inputs.getAll().isEmpty() ? null : inputs.get(0).getCoder());
@Override public PCollection<Long> expand(PCollectionList<Long> input) { return input.get(0); } });
@Override public PTransformReplacement<PCollectionList<T>, PCollection<T>> getReplacementTransform( AppliedPTransform<PCollectionList<T>, PCollection<T>, PCollections<T>> transform) { checkArgument( transform.getInputs().isEmpty(), "Unexpected nonempty input %s for %s", transform.getInputs(), getClass().getSimpleName()); return PTransformReplacement.of( PCollectionList.empty(transform.getPipeline()), new CreateEmptyFromList<T>()); }
@Override public PCollection<Row> expand(PCollectionList<Row> input) { checkArgument( input.size() == 0, "Should not have received input for %s: %s", BeamIOSourceRel.class.getSimpleName(), input); return sqlTable.buildIOReader(input.getPipeline().begin()); } }