org.apache.beam.sdk.values.PCollectionList.empty java code examples

/**
 * {@inheritDoc}.
 *
 * <p>The input {@link PCollectionList} that is constructed will have the same values in the same
 */
private PCollectionList<T> getInput(Map<TupleTag<?>, PValue> inputs, Pipeline p) {
 PCollectionList<T> pCollections = PCollectionList.empty(p);
 for (PValue input : inputs.values()) {
  PCollection<T> pcollection = (PCollection<T>) input;
  pCollections = pCollections.and(pcollection);
 }
 return pCollections;
}

/** Transforms the inputs into a PInput. */
private static PCollectionList<Row> buildPCollectionList(
  List<RelNode> inputRels, Pipeline pipeline, Map<Integer, PCollection<Row>> cache) {
 if (inputRels.isEmpty()) {
  return PCollectionList.empty(pipeline);
 } else {
  return PCollectionList.of(
    inputRels
      .stream()
      .map(input -> BeamSqlRelUtils.toPCollection(pipeline, (BeamRelNode) input, cache))
      .collect(Collectors.toList()));
 }
}

@Override
public PTransformReplacement<PCollectionList<T>, PCollection<T>> getReplacementTransform(
  AppliedPTransform<PCollectionList<T>, PCollection<T>, PCollections<T>> transform) {
 checkArgument(
   transform.getInputs().isEmpty(),
   "Unexpected nonempty input %s for %s",
   transform.getInputs(),
   getClass().getSimpleName());
 return PTransformReplacement.of(
   PCollectionList.empty(transform.getPipeline()), new CreateEmptyFromList<T>());
}

 @Override
 public PCollection<T> expand(PCollectionList<T> input) {
  Map<PCollection<T>, Integer> instances = new HashMap<>();
  for (PCollection<T> pCollection : input.getAll()) {
   int existing = instances.get(pCollection) == null ? 0 : instances.get(pCollection);
   instances.put(pCollection, existing + 1);
  }
  PCollectionList<T> output = PCollectionList.empty(input.getPipeline());
  for (Map.Entry<PCollection<T>, Integer> instanceEntry : instances.entrySet()) {
   if (instanceEntry.getValue().equals(1)) {
    output = output.and(instanceEntry.getKey());
   } else {
    String duplicationName = String.format("Multiply %s", instanceEntry.getKey().getName());
    PCollection<T> duplicated =
      instanceEntry
        .getKey()
        .apply(duplicationName, ParDo.of(new DuplicateFn<>(instanceEntry.getValue())));
    output = output.and(duplicated);
   }
  }
  return output.apply(Flatten.pCollections());
 }
}

 @Override
 public PCollection<KV<URI, String>> expand(PBegin input) {
  Pipeline pipeline = input.getPipeline();
  // Create one TextIO.Read transform for each document
  // and add its output to a PCollectionList
  PCollectionList<KV<URI, String>> urisToLines =
    PCollectionList.empty(pipeline);
  // TextIO.Read supports:
  //  - file: URIs and paths locally
  //  - gs: URIs on the service
  for (final URI uri : uris) {
   String uriString;
   if (uri.getScheme().equals("file")) {
    uriString = new File(uri).getPath();
   } else {
    uriString = uri.toString();
   }
   PCollection<KV<URI, String>> oneUriToLines = pipeline
     .apply("TextIO.Read(" + uriString + ")", TextIO.Read.from(uriString))
     .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri));
   urisToLines = urisToLines.and(oneUriToLines);
  }
  return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
 }
}

 @Override
 public PCollection<KV<URI, String>> expand(PBegin input) {
  Pipeline pipeline = input.getPipeline();
  // Create one TextIO.Read transform for each document
  // and add its output to a PCollectionList
  PCollectionList<KV<URI, String>> urisToLines = PCollectionList.empty(pipeline);
  // TextIO.Read supports:
  //  - file: URIs and paths locally
  //  - gs: URIs on the service
  for (final URI uri : uris) {
   String uriString;
   if ("file".equals(uri.getScheme())) {
    uriString = new File(uri).getPath();
   } else {
    uriString = uri.toString();
   }
   PCollection<KV<URI, String>> oneUriToLines =
     pipeline
       .apply("TextIO.Read(" + uriString + ")", TextIO.read().from(uriString))
       .apply("WithKeys(" + uriString + ")", WithKeys.of(uri))
       .setCoder(KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of()));
   urisToLines = urisToLines.and(oneUriToLines);
  }
  return urisToLines.apply(Flatten.pCollections());
 }
}

 @Override
 public PCollection<T> expand(PBegin input) {
  PCollectionList<T> empty = PCollectionList.empty(input.getPipeline());
  return empty.apply(Flatten.pCollections());
 }
}

@Override
public PCollectionList<T> expand(PCollection<T> in) {
 final TupleTagList outputTags = partitionDoFn.getOutputTags();
 PCollectionTuple outputs =
   in.apply(ParDo.of(partitionDoFn).withOutputTags(new TupleTag<Void>() {}, outputTags));
 PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline());
 Coder<T> coder = in.getCoder();
 for (TupleTag<?> outputTag : outputTags.getAll()) {
  // All the tuple tags are actually TupleTag<T>
  // And all the collections are actually PCollection<T>
  @SuppressWarnings("unchecked")
  TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag;
  pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder));
 }
 return pcs;
}

 @Test
 @Category(NeedsRunner.class)
 public void testOverride() {
  PCollectionList<Long> empty = PCollectionList.empty(pipeline);
  PCollection<Long> emptyFlattened =
    empty.apply(
      factory
        .getReplacementTransform(
          AppliedPTransform.of(
            "nonEmptyInput",
            Collections.emptyMap(),
            Collections.emptyMap(),
            Flatten.pCollections(),
            pipeline))
        .getTransform());
  PAssert.that(emptyFlattened).empty();
  pipeline.run();
 }
}

@Test
@Category(NeedsRunner.class)
public void testFlattenNoListsNoCoder() {
 // not ValidatesRunner because it should fail at pipeline construction time anyhow.
 thrown.expect(IllegalStateException.class);
 thrown.expectMessage("Unable to return a default Coder");
 PCollectionList.<ClassWithoutCoder>empty(p).apply(Flatten.pCollections());
 p.run();
}

@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testUnboundedSourceSplits() throws Exception {
 long numElements = 1000;
 int numSplits = 10;
 UnboundedSource<Long, ?> initial = CountingSource.unbounded();
 List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions());
 assertEquals("Expected exact splitting", numSplits, splits.size());
 long elementsPerSplit = numElements / numSplits;
 assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits);
 PCollectionList<Long> pcollections = PCollectionList.empty(p);
 for (int i = 0; i < splits.size(); ++i) {
  pcollections =
    pcollections.and(
      p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)));
 }
 PCollection<Long> input = pcollections.apply(Flatten.pCollections());
 addCountingAsserts(input, numElements);
 p.run();
}

@Test
public void getRootTransformsContainsEmptyFlatten() {
 PCollections<String> flatten = Flatten.pCollections();
 PCollectionList<String> emptyList = PCollectionList.empty(p);
 PCollection<String> empty = emptyList.apply(flatten);
 empty.setCoder(StringUtf8Coder.of());
 p.traverseTopologically(visitor);
 DirectGraph graph = visitor.getGraph();
 assertThat(
   graph.getRootTransforms(),
   Matchers.containsInAnyOrder(new Object[] {graph.getProducer(empty)}));
 AppliedPTransform<?, ?, ?> onlyRoot = Iterables.getOnlyElement(graph.getRootTransforms());
 assertThat((Object) onlyRoot.getTransform(), equalTo(flatten));
 assertThat(onlyRoot.getInputs().entrySet(), emptyIterable());
 assertThat(onlyRoot.getOutputs(), equalTo(empty.expand()));
}

@Test
@Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class})
public void testBoundedSourceSplits() throws Exception {
 long numElements = 1000;
 long numSplits = 10;
 long splitSizeBytes = numElements * 8 / numSplits; // 8 bytes per long element.
 BoundedSource<Long> initial = CountingSource.upTo(numElements);
 List<? extends BoundedSource<Long>> splits = initial.split(splitSizeBytes, p.getOptions());
 assertEquals("Expected exact splitting", numSplits, splits.size());
 // Assemble all the splits into one flattened PCollection, also verify their sizes.
 PCollectionList<Long> pcollections = PCollectionList.empty(p);
 for (int i = 0; i < splits.size(); ++i) {
  BoundedSource<Long> split = splits.get(i);
  pcollections = pcollections.and(p.apply("split" + i, Read.from(split)));
  assertEquals(
    "Expected even splitting", splitSizeBytes, split.getEstimatedSizeBytes(p.getOptions()));
 }
 PCollection<Long> input = pcollections.apply(Flatten.pCollections());
 addCountingAsserts(input, numElements);
 p.run();
}

@Test
@Category(ValidatesRunner.class)
public void testFlattenPCollectionsEmpty() {
 PCollection<String> output =
   PCollectionList.<String>empty(p)
     .apply(Flatten.pCollections())
     .setCoder(StringUtf8Coder.of());
 PAssert.that(output).empty();
 p.run();
}

 @Test
 public void testFlattenInMemoryEvaluatorWithEmptyPCollectionList() throws Exception {
  PCollectionList<Integer> list = PCollectionList.empty(p);

  PCollection<Integer> flattened = list.apply(Flatten.pCollections());
  flattened.setCoder(VarIntCoder.of());

  EvaluationContext evaluationContext = mock(EvaluationContext.class);
  when(evaluationContext.createBundle(flattened))
    .thenReturn(bundleFactory.createBundle(flattened));

  FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory(evaluationContext);
  AppliedPTransform<?, ?, ?> flattendProducer = DirectGraphs.getProducer(flattened);
  TransformEvaluator<Integer> emptyEvaluator =
    factory.forApplication(
      flattendProducer,
      bundleFactory.createRootBundle().commit(BoundedWindow.TIMESTAMP_MAX_VALUE));

  TransformResult<Integer> leftSideResult = emptyEvaluator.finishBundle();

  CommittedBundle<?> outputBundle =
    Iterables.getOnlyElement(leftSideResult.getOutputBundles()).commit(Instant.now());
  assertThat(outputBundle.getElements(), emptyIterable());
  assertThat(
    leftSideResult.getTransform(),
    Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattendProducer));
 }
}

@Test
@Category(ValidatesRunner.class)
public void testFlattenPCollectionsEmptyThenParDo() {
 PCollection<String> output =
   PCollectionList.<String>empty(p)
     .apply(Flatten.pCollections())
     .setCoder(StringUtf8Coder.of())
     .apply(ParDo.of(new IdentityFn<>()));
 PAssert.that(output).empty();
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testUnboundedSourceRateSplits() throws Exception {
 int elementsPerPeriod = 10;
 Duration period = Duration.millis(5);
 long numElements = 1000;
 int numSplits = 10;
 UnboundedCountingSource initial =
   CountingSource.createUnboundedFrom(0).withRate(elementsPerPeriod, period);
 List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions());
 assertEquals("Expected exact splitting", numSplits, splits.size());
 long elementsPerSplit = numElements / numSplits;
 assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits);
 PCollectionList<Long> pcollections = PCollectionList.empty(p);
 for (int i = 0; i < splits.size(); ++i) {
  pcollections =
    pcollections.and(
      p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)));
 }
 PCollection<Long> input = pcollections.apply(Flatten.pCollections());
 addCountingAsserts(input, numElements);
 Instant startTime = Instant.now();
 p.run();
 Instant endTime = Instant.now();
 // 500 ms if the readers are all initialized in parallel; 5000 ms if they are evaluated serially
 long expectedMinimumMillis = (numElements * period.getMillis()) / elementsPerPeriod;
 assertThat(expectedMinimumMillis, lessThan(endTime.getMillis() - startTime.getMillis()));
}

@Test
public void testUnboundedSourceSplits() throws Exception {
 int numElements = 1000;
 int numSplits = 10;
 // Coders must be specified explicitly here due to the way the transform
 // is used in the test.
 UnboundedSource<KafkaRecord<Integer, Long>, ?> initial =
   mkKafkaReadTransform(numElements, null)
     .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of())
     .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of())
     .makeSource();
 List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits =
   initial.split(numSplits, p.getOptions());
 assertEquals("Expected exact splitting", numSplits, splits.size());
 long elementsPerSplit = numElements / numSplits;
 assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits);
 PCollectionList<Long> pcollections = PCollectionList.empty(p);
 for (int i = 0; i < splits.size(); ++i) {
  pcollections =
    pcollections.and(
      p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))
        .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>()))
        .apply("collection " + i, Values.create()));
 }
 PCollection<Long> input = pcollections.apply(Flatten.pCollections());
 addCountingAsserts(input, numElements);
 p.run();
}

@Test
@Category(ValidatesRunner.class)
public void testEmptyFlattenAsSideInput() {
 final PCollectionView<Iterable<String>> view =
   PCollectionList.<String>empty(p)
     .apply(Flatten.pCollections())
     .setCoder(StringUtf8Coder.of())
     .apply(View.asIterable());
 PCollection<String> output =
   p.apply(Create.of((Void) null).withCoder(VoidCoder.of()))
     .apply(
       ParDo.of(
           new DoFn<Void, String>() {
            @ProcessElement
            public void processElement(ProcessContext c) {
             for (String side : c.sideInput(view)) {
              c.output(side);
             }
            }
           })
         .withSideInputs(view));
 PAssert.that(output).empty();
 p.run();
}

 @Test
 public void testEquals() {
  Pipeline p = TestPipeline.create();
  PCollection<String> first = p.apply("Meta", Create.of("foo", "bar"));
  PCollection<String> second = p.apply("Pythonic", Create.of("spam, ham"));
  PCollection<String> third = p.apply("Syntactic", Create.of("eggs", "baz"));

  EqualsTester tester = new EqualsTester();
  //    tester.addEqualityGroup(PCollectionList.empty(p), PCollectionList.empty(p));
  //    tester.addEqualityGroup(PCollectionList.of(first).and(second));
  // Constructors should all produce equivalent
  tester.addEqualityGroup(
    PCollectionList.of(first).and(second).and(third),
    PCollectionList.of(first).and(second).and(third),
    //        PCollectionList.<String>empty(p).and(first).and(second).and(third),
    //        PCollectionList.of(ImmutableList.of(first, second, third)),
    //        PCollectionList.of(first).and(ImmutableList.of(second, third)),
    PCollectionList.of(ImmutableList.of(first, second)).and(third));
  // Order is considered
  tester.addEqualityGroup(PCollectionList.of(first).and(third).and(second));
  tester.addEqualityGroup(PCollectionList.empty(TestPipeline.create()));

  tester.testEquals();
 }
}

Javadoc

Returns an empty PCollectionList that is part of the given Pipeline.

Longer PCollectionList can be created by calling #and on the result.

Popular methods of PCollectionList

apply
Like #apply(String,PTransform) but defaulting to the name of the PTransform.
of
Returns a singleton PCollectionList containing the given PCollection.Longer PCollectionList can be c
and
Returns a new PCollectionList that has all the PCollection of this PCollectionList plus the given PC
get
Returns the PCollection at the given index (origin zero).
getAll
Returns an immutable List of all the PCollection in this PCollectionList.
getPipeline
size
Returns the number of PCollection in this PCollectionList.
expand
<init>

Popular in Java

Creating JSON documents from java classes using gson
compareTo (BigDecimal)
onCreateOptionsMenu (Activity)
getResourceAsStream (ClassLoader)
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
From CI to AI: The AI layer in your organization

How to use emptymethodin org.apache.beam.sdk.values.PCollectionList

Best Java code snippets using org.apache.beam.sdk.values.PCollectionList.empty (Showing top 20 results out of 315)

How to use
empty
method
in
org.apache.beam.sdk.values.PCollectionList