org.apache.beam.sdk.transforms.ParDo$SingleOutput.withSideInputs java code examples

Refine search

@Override
public PCollection<KV<TableDestination, TableRow>> expand(
  PCollection<KV<DestinationT, TableRow>> input) {
 List<PCollectionView<?>> sideInputs = Lists.newArrayList();
 sideInputs.addAll(dynamicDestinations.getSideInputs());
 return input.apply(ParDo.of(new CreateTablesFn()).withSideInputs(sideInputs));
}

@Test
public void getMainInputSingleOutputSideInputs() {
 AppliedPTransform<PCollection<Long>, ?, ?> application =
   AppliedPTransform.of(
     "application",
     ImmutableMap.<TupleTag<?>, PValue>builder()
       .put(new TupleTag<Long>(), mainInput)
       .put(sideInput.getTagInternal(), sideInput.getPCollection())
       .build(),
     Collections.singletonMap(new TupleTag<Long>(), output),
     ParDo.of(new TestDoFn()).withSideInputs(sideInput),
     pipeline);
 PCollection<Long> input = PTransformReplacements.getSingletonMainInput(application);
 assertThat(input, equalTo(mainInput));
}

 @Override
 public PCollection<KV<Integer, float[]>> expand(final PCollection<KV<Integer, float[]>> itemMatrix) {
  // Make Item Matrix view.
  final PCollectionView<Map<Integer, float[]>> itemMatrixView =
    itemMatrix.apply(GroupByKey.create()).apply(ParDo.of(new UngroupSingleVectorList())).apply(View.asMap());
  // Get new User Matrix
  final PCollectionView<Map<Integer, float[]>> userMatrixView = parsedUserData
    .apply(ParDo.of(new CalculateNextMatrix(numFeatures, lambda, itemMatrixView)).withSideInputs(itemMatrixView))
    .apply(GroupByKey.create()).apply(ParDo.of(new UngroupSingleVectorList())).apply(View.asMap());
  // return new Item Matrix
  return parsedItemData.apply(ParDo.of(new CalculateNextMatrix(numFeatures, lambda, userMatrixView))
    .withSideInputs(userMatrixView));
 }
}

@Test
public void getMainInputNoMainInputsThrows() {
 ImmutableMap<TupleTag<?>, PValue> inputs =
   ImmutableMap.<TupleTag<?>, PValue>builder()
     .put(sideInput.getTagInternal(), sideInput.getPCollection())
     .build();
 AppliedPTransform<PCollection<Long>, ?, ?> application =
   AppliedPTransform.of(
     "application",
     inputs,
     Collections.singletonMap(new TupleTag<Long>(), output),
     ParDo.of(new TestDoFn()).withSideInputs(sideInput),
     pipeline);
 thrown.expect(IllegalArgumentException.class);
 thrown.expectMessage("No main input");
 PTransformReplacements.getSingletonMainInput(application);
}

 @Override
 public PCollection<Mutation> expand(PCollection<KV<String, String>> input) {
  return input.apply(
    ParDo.of(
        new DoFn<KV<String, String>, Mutation>() {
         @ProcessElement
         public void processElement(ProcessContext c) {
          KV<String, String> kv = c.element();
          Ddl ddl = c.sideInput(ddlView);
          String tableName = kv.getKey();
          Table table = ddl.table(tableName);
          SerializableFunction<GenericRecord, Mutation> parseFn =
            new AvroRecordConverter(table);
          AvroSource<Mutation> source =
            AvroSource.from(kv.getValue())
              .withParseFn(parseFn, SerializableCoder.of(Mutation.class));
          try {
           BoundedSource.BoundedReader<Mutation> reader =
             source.createReader(c.getPipelineOptions());
           for (boolean more = reader.start(); more; more = reader.advance()) {
            c.output(reader.getCurrent());
           }
          } catch (IOException e) {
           throw new RuntimeException(e);
          }
         }
        })
      .withSideInputs(ddlView));
 }
}

@Test
public void getMainInputExtraMainInputsThrows() {
 PCollection<Long> notInParDo = pipeline.apply("otherPCollection", Create.of(1L, 2L, 3L));
 ImmutableMap<TupleTag<?>, PValue> inputs =
   ImmutableMap.<TupleTag<?>, PValue>builder()
     .putAll(mainInput.expand())
     // Not represnted as an input
     .put(new TupleTag<Long>(), notInParDo)
     .put(sideInput.getTagInternal(), sideInput.getPCollection())
     .build();
 AppliedPTransform<PCollection<Long>, ?, ?> application =
   AppliedPTransform.of(
     "application",
     inputs,
     Collections.singletonMap(new TupleTag<Long>(), output),
     ParDo.of(new TestDoFn()).withSideInputs(sideInput),
     pipeline);
 thrown.expect(IllegalArgumentException.class);
 thrown.expectMessage("multiple inputs");
 thrown.expectMessage("not additional inputs");
 thrown.expectMessage(mainInput.toString());
 thrown.expectMessage(notInParDo.toString());
 PTransformReplacements.getSingletonMainInput(application);
}

PCollection<Double> meanTemps = rows.apply(ParDo.of(new ExtractTempFn()));
  meanTemps.apply(Mean.globally()).apply(View.asSingleton());
  rows.apply(ParDo.of(new FilterSingleMonthDataFn(monthFilter)));
  monthFilteredRows.apply(
    "ParseAndFilter",
    ParDo.of(
        new DoFn<TableRow, TableRow>() {
         @ProcessElement
      .withSideInputs(globalMeanTemp));

 @Override
 public PCollection<KV<Integer, float[]>> expand(final PCollection<KV<Integer, float[]>> itemMatrix) {
  // Parse data for user
  final PCollection<KV<Integer, KV<int[], float[]>>> parsedUserData = rawData
    .apply(ParDo.of(new AlternatingLeastSquare.ParseLine(true)))
    .apply(Combine.perKey(new AlternatingLeastSquare.TrainingDataCombiner()));
  // Make Item Matrix view.
  final PCollectionView<Map<Integer, float[]>> itemMatrixView = itemMatrix.apply(View.asMap());
  // Get new User Matrix
  final PCollectionView<Map<Integer, float[]>> userMatrixView = parsedUserData
    .apply(ParDo.of(new AlternatingLeastSquare.CalculateNextMatrix(numFeatures, lambda, itemMatrixView))
      .withSideInputs(itemMatrixView))
    .apply(View.asMap());
  // return new Item Matrix
  return parsedItemData.apply(
    ParDo.of(new AlternatingLeastSquare.CalculateNextMatrix(numFeatures, lambda, userMatrixView))
    .withSideInputs(userMatrixView));
 }
}

 @Override
 public PCollection<KV<Integer, KV<KeyT, ValueT>>> expand(PCollection<KV<KeyT, ValueT>> input) {
  return input
    .apply(
      "AssignTask",
      ParDo.of(new AssignTaskFn<KeyT, ValueT>(configView)).withSideInputs(configView))
    .setTypeDescriptor(
      TypeDescriptors.kvs(TypeDescriptors.integers(), input.getTypeDescriptor()))
    .apply("GroupByTaskId", GroupByKey.create())
    .apply("FlattenGroupedTasks", ParDo.of(new FlattenGroupedTasks<>()));
 }
}

 @Override
 public PCollection<KV<String, String>> expand(PCollection<KV<String, String>> input) {
  // reparallelize mimics the same behavior as in JdbcIO
  // breaking fusion
  PCollectionView<Iterable<KV<String, String>>> empty =
    input
      .apply("Consume", Filter.by(SerializableFunctions.constant(false)))
      .apply(View.asIterable());
  PCollection<KV<String, String>> materialized =
    input.apply(
      "Identity",
      ParDo.of(
          new DoFn<KV<String, String>, KV<String, String>>() {
           @ProcessElement
           public void processElement(ProcessContext context) {
            context.output(context.element());
           }
          })
        .withSideInputs(empty));
  return materialized.apply(Reshuffle.viaRandomKey());
 }
}

private PCollection<Result<DestinationT>> writeShardedRecords(
  PCollection<KV<ShardedKey<DestinationT>, TableRow>> shardedRecords,
  PCollectionView<String> tempFilePrefix) {
 return shardedRecords
   .apply("GroupByDestination", GroupByKey.create())
   .apply(
     "WriteGroupedRecords",
     ParDo.of(new WriteGroupedRecordsToFiles<DestinationT>(tempFilePrefix, maxFileSize))
       .withSideInputs(tempFilePrefix))
   .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
}

 @Override
 public PCollection<T> expand(PCollection<T> input) {
  // See https://issues.apache.org/jira/browse/BEAM-2803
  // We use a combined approach to "break fusion" here:
  // (see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion)
  // 1) force the data to be materialized by passing it as a side input to an identity fn,
  // then 2) reshuffle it with a random key. Initial materialization provides some parallelism
  // and ensures that data to be shuffled can be generated in parallel, while reshuffling
  // provides perfect parallelism.
  // In most cases where a "fusion break" is needed, a simple reshuffle would be sufficient.
  // The current approach is necessary only to support the particular case of JdbcIO where
  // a single query may produce many gigabytes of query results.
  PCollectionView<Iterable<T>> empty =
    input
      .apply("Consume", Filter.by(SerializableFunctions.constant(false)))
      .apply(View.asIterable());
  PCollection<T> materialized =
    input.apply(
      "Identity",
      ParDo.of(
          new DoFn<T, T>() {
           @ProcessElement
           public void process(ProcessContext c) {
            c.output(c.element());
           }
          })
        .withSideInputs(empty));
  return materialized.apply(Reshuffle.viaRandomKey());
 }
}

@Override
public PCollection<KV<DestinationT, TableRow>> expand(PCollection<T> input) {
 return input.apply(
   ParDo.of(
       new DoFn<T, KV<DestinationT, TableRow>>() {
        @ProcessElement
     .withSideInputs(dynamicDestinations.getSideInputs()));

@Override
public PCollection<Ddl> expand(PBegin p) {
 return p.apply("Create empty", Create.of((Void) null))
   .apply(
     "Read Information Schema",
     ParDo.of(new ReadInformationSchemaFn(spannerConfig, tx)).withSideInputs(tx));
}

 @Override
 public PCollection<T> expand(PCollection<T> input) {
  // See https://issues.apache.org/jira/browse/BEAM-2803
  // We use a combined approach to "break fusion" here:
  // (see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion)
  // 1) force the data to be materialized by passing it as a side input to an identity fn,
  // then 2) reshuffle it with a random key. Initial materialization provides some parallelism
  // and ensures that data to be shuffled can be generated in parallel, while reshuffling
  // provides perfect parallelism.
  // In most cases where a "fusion break" is needed, a simple reshuffle would be sufficient.
  // The current approach is necessary only to support the particular case of JdbcIO where
  // a single query may produce many gigabytes of query results.
  PCollectionView<Iterable<T>> empty =
    input
      .apply("Consume", Filter.by(SerializableFunctions.constant(false)))
      .apply(View.asIterable());
  PCollection<T> materialized =
    input.apply(
      "Identity",
      ParDo.of(
          new DoFn<T, T>() {
           @ProcessElement
           public void process(ProcessContext c) {
            c.output(c.element());
           }
          })
        .withSideInputs(empty));
  return materialized.apply(Reshuffle.viaRandomKey());
 }
}

 @Override
 public PCollection<OutputT> expand(PCollection<KV<K, InputT>> input) {
  DoFn<KV<K, InputT>, OutputT> fn = originalParDo.getFn();
  verifyFnIsStateful(fn);
  DataflowRunner.verifyStateSupported(fn);
  DataflowRunner.verifyStateSupportForWindowingStrategy(input.getWindowingStrategy());
  if (isFnApi) {
   return input
     .apply(GroupByKey.create())
     .apply(ParDo.of(new ExpandGbkFn<>()))
     .apply(originalParDo);
  }
  PTransform<
      PCollection<? extends KV<K, Iterable<KV<Instant, WindowedValue<KV<K, InputT>>>>>>,
      PCollection<OutputT>>
    statefulParDo =
      ParDo.of(new BatchStatefulDoFn<>(fn)).withSideInputs(originalParDo.getSideInputs());
  return input.apply(new GbkBeforeStatefulParDo<>()).apply(statefulParDo);
 }
}

 @Override
 public PCollection<KV<K, V>> expand(PCollection<K> input) {
  return input
    .apply(
      ParDo.of(
          new DoFn<K, KV<K, V>>() {
           @ProcessElement
           public void process(ProcessContext c) {
            c.output(KV.of(c.element(), c.sideInput(view)));
           }
          })
        .withSideInputs(view))
    .setCoder(KvCoder.of(input.getCoder(), coder));
 }
}

private PCollectionView<String> createTempFilePrefixView(
  Pipeline p, final PCollectionView<String> jobIdView) {
 return p.apply(Create.of(""))
   .apply(
     "GetTempFilePrefix",
     ParDo.of(
         new DoFn<String, String>() {
          @ProcessElement
          public void getTempFilePrefix(ProcessContext c) {
           String tempLocationRoot;
           if (customGcsTempLocation != null) {
            tempLocationRoot = customGcsTempLocation.get();
           } else {
            tempLocationRoot = c.getPipelineOptions().getTempLocation();
           }
           String tempLocation =
             resolveTempLocation(
               tempLocationRoot, "BigQueryWriteTemp", c.sideInput(jobIdView));
           LOG.info(
             "Writing BigQuery temporary files to {} before loading them.",
             tempLocation);
           c.output(tempLocation);
          }
         })
       .withSideInputs(jobIdView))
   .apply("TempFilePrefixView", View.asSingleton());
}

@Override
public PCollection<OutputT> expand(PCollection<? extends InputT> input) {
 checkArgument(fn != null, ".via() is required");
 return input.apply(
   "FlatMap",
   ParDo.of(
       new DoFn<InputT, OutputT>() {
        @ProcessElement
     .withSideInputs(fn.getRequirements().getSideInputs()));

  userScores.apply("UserSum", Sum.integersPerKey());
  sumScores.apply(Values.create()).apply(Mean.<Integer>globally().asSingletonView());
    ParDo
      .of(
        new DoFn<KV<String, Integer>, KV<String, Integer>>() {
         private final Counter numSpammerUsers =
      .withSideInputs(globalMeanScore));
return filtered;

Javadoc

Returns a new ParDo PTransform that's like this PTransform but with the specified additional side inputs. Does not modify this PTransform.

See the discussion of Side Inputs above for more explanation.

Popular methods of ParDo$SingleOutput

withOutputTags
Returns a new multi-output ParDo PTransform that's like this PTransform but with the specified outpu
getFn
getSideInputs
<init>
getAdditionalInputs
Returns the side inputs of this ParDo, tagged with the tag of the PCollectionView. The values of the
getName

Popular in Java

Making http requests using okhttp
scheduleAtFixedRate (Timer)
getSharedPreferences (Context)
onRequestPermissionsResult (Fragment)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Runner (org.openjdk.jmh.runner)
Top plugins for Android Studio

How to use withSideInputsmethodin org.apache.beam.sdk.transforms.ParDo$SingleOutput

Best Java code snippets using org.apache.beam.sdk.transforms.ParDo$SingleOutput.withSideInputs (Showing top 20 results out of 315)

Refine search

How to use
withSideInputs
method
in
org.apache.beam.sdk.transforms.ParDo$SingleOutput