org.apache.beam.sdk.values.PCollection.setRowSchema java code examples

@Override
public PCollection<Row> expand(PCollection<? extends String> jsonStrings) {
 return jsonStrings
   .apply(
     ParDo.of(
       new DoFn<String, Row>() {
        @ProcessElement
        public void processElement(ProcessContext context) {
         context.output(jsonToRow(objectMapper(), context.element()));
        }
       }))
   .setRowSchema(schema);
}

@Override
public PCollection<Row> buildIOReader(PBegin begin) {
 return begin
   .apply("MockedBoundedTable_Reader_" + COUNTER.incrementAndGet(), Create.of(rows))
   .setRowSchema(getSchema());
}

 @Override
 public PCollection<Row> expand(PCollection<KV<byte[], byte[]>> input) {
  return input
    .apply(
      "decodeRecord",
      ParDo.of(
        new DoFn<KV<byte[], byte[]>, Row>() {
         @ProcessElement
         public void processElement(ProcessContext c) {
          String rowInString = new String(c.element().getValue(), UTF_8);
          for (Row row : csvLines2BeamRows(format, rowInString, schema)) {
           c.output(row);
          }
         }
        }))
    .setRowSchema(schema);
 }
}

 @Override
 public PCollection<Row> expand(PCollectionList<Row> pinput) {
  checkArgument(
    pinput.size() == 0,
    "Should not have received input for %s: %s",
    BeamValuesRel.class.getSimpleName(),
    pinput);
  if (tuples.isEmpty()) {
   throw new IllegalStateException("Values with empty tuples!");
  }
  Schema schema = CalciteUtils.toSchema(getRowType());
  List<Row> rows = tuples.stream().map(tuple -> tupleToRow(schema, tuple)).collect(toList());
  return pinput.getPipeline().begin().apply(Create.of(rows)).setRowSchema(schema);
 }
}

 @Override
 public PCollection<Row> expand(PCollection<String> input) {
  return input
    .apply(
      "csvToRow",
      FlatMapElements.into(TypeDescriptors.rows())
        .via(s -> csvLines2BeamRows(csvFormat, s, schema)))
    .setRowSchema(schema);
 }
}

@Override
public PCollection<Row> buildIOReader(PBegin begin) {
 // TODO: make this more generic.
 return begin
   .apply(BigQueryIO.read(BigQueryUtils.toBeamRow(schema)).from(tableSpec))
   .setRowSchema(getSchema());
}

 @Override
 public PCollection<Row> expand(PCollectionList<Row> pinput) {
  // The set of rows where we run the correlated unnest for each row
  PCollection<Row> outer = pinput.get(0);
  Schema joinedSchema = CalciteUtils.toSchema(rowType);
  return outer
    .apply(ParDo.of(new UnnestFn(joinedSchema, unnestIndex)))
    .setRowSchema(joinedSchema);
 }
}

 @Override
 public PCollection<Row> expand(PCollection<T> input) {
  Schema inputSchema = input.getSchema();
  Schema outputSchema = getUnnestedSchema(inputSchema, getFieldNameFunction());
  return input
    .apply(
      ParDo.of(
        new DoFn<T, Row>() {
         @ProcessElement
         public void processElement(@Element Row row, OutputReceiver<Row> o) {
          o.output(unnestRow(row, outputSchema));
         }
        }))
    .setRowSchema(outputSchema);
 }
}

 private PCollection<Row> queryFromOrderTables(String sql) {
  return tuple(
      "ORDER_DETAILS1",
        ORDER_DETAILS1.buildIOReader(pipeline.begin()).setRowSchema(SOURCE_ROW_TYPE),
      "ORDER_DETAILS2",
        ORDER_DETAILS2.buildIOReader(pipeline.begin()).setRowSchema(SOURCE_ROW_TYPE))
    .apply("join", SqlTransform.query(sql))
    .setRowSchema(RESULT_ROW_TYPE);
 }
}

 @Override
 public PCollection<Row> expand(PCollectionList<Row> pinput) {
  checkArgument(
    pinput.size() == 1,
    "Wrong number of inputs for %s: %s",
    BeamCalcRel.class.getSimpleName(),
    pinput);
  PCollection<Row> upstream = pinput.get(0);
  BeamSqlExpressionExecutor executor = new BeamSqlFnExecutor(BeamCalcRel.this.getProgram());
  Schema schema = CalciteUtils.toSchema(rowType);
  PCollection<Row> projectStream =
    upstream
      .apply(ParDo.of(new CalcFn(executor, CalciteUtils.toSchema(rowType))))
      .setRowSchema(schema);
  projectStream.setRowSchema(CalciteUtils.toSchema(getRowType()));
  return projectStream;
 }
}

 @Override
 public PCollection<Row> expand(PCollection<InputT> input) {
  SchemaAggregateFn.Inner<InputT> fn =
    schemaAggregateFn.withSchema(input.getSchema(), input.getToRowFunction());
  return input.apply(Combine.globally(fn)).setRowSchema(fn.getOutputSchema());
 }
}

 @Override
 public PCollection<Row> expand(PCollectionList<Row> pinput) {
  checkArgument(
    pinput.size() == 1,
    "Wrong number of inputs for %s: %s",
    BeamUncollectRel.class.getSimpleName(),
    pinput);
  PCollection<Row> upstream = pinput.get(0);
  // Each row of the input contains a single array of things to be emitted; Calcite knows
  // what the row looks like
  Schema outputSchema = CalciteUtils.toSchema(getRowType());
  PCollection<Row> uncollected =
    upstream.apply(ParDo.of(new UncollectDoFn(outputSchema))).setRowSchema(outputSchema);
  return uncollected;
 }
}

 @Override
 public PCollection<Row> expand(PCollection<String> input) {
  return input
    .apply(
      "linesToRows",
      MapElements.into(TypeDescriptors.rows())
        .via(s -> Row.withSchema(SCHEMA).addValue(s).build()))
    .setRowSchema(SCHEMA);
 }
}

@Override
public PCollection<Row> expand(PCollection<T> input) {
 Schema inputSchema = input.getSchema();
 verifyCompatibility(inputSchema);
 return input
   .apply(
     ParDo.of(
       new DoFn<T, Row>() {
        // TODO: This should be the same as resolved so that Beam knows which fields
        // are being accessed. Currently Beam only supports wildcard descriptors.
        // Once BEAM-4457 is fixed, fix this.
        @FieldAccess("filterFields")
        final FieldAccessDescriptor fieldAccessDescriptor =
          FieldAccessDescriptor.withAllFields();
        @ProcessElement
        public void process(
          @FieldAccess("filterFields") Row input, OutputReceiver<Row> r) {
         Row output = castRow(input, inputSchema, outputSchema());
         r.output(output);
        }
       }))
   .setRowSchema(outputSchema());
}

protected PCollection<Row> getFloorCeilingTestPCollection() {
 try {
  return TestBoundedTable.of(ROW_TYPE_THREE)
    .addRows(parseTimestampWithUTCTimeZone("1986-02-15 11:35:26"), 1.4)
    .buildIOReader(pipeline.begin())
    .setRowSchema(ROW_TYPE_THREE);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

 @Override
 public PCollection<Row> buildIOReader(PBegin begin) {
  TestStream.Builder<Row> values =
    TestStream.create(
      schema, SerializableFunctions.identity(), SerializableFunctions.identity());

  for (Pair<Duration, List<Row>> pair : timestampedRows) {
   values = values.advanceWatermarkTo(new Instant(0).plus(pair.getKey()));
   for (int i = 0; i < pair.getValue().size(); i++) {
    values =
      values.addElements(
        TimestampedValue.of(
          pair.getValue().get(i),
          new Instant(pair.getValue().get(i).getDateTime(timestampField))));
   }
  }

  return begin
    .apply(
      "MockedUnboundedTable_" + COUNTER.incrementAndGet(),
      values.advanceWatermarkToInfinity())
    .setRowSchema(getSchema());
 }
}

private PCollection<Row> sideInputJoinHelper(
  JoinRelType joinType,
  PCollection<KV<Row, Row>> leftRows,
  PCollection<KV<Row, Row>> rightRows,
  Row rightNullRow,
  boolean swapped) {
 final PCollectionView<Map<Row, Iterable<Row>>> rowsView = rightRows.apply(View.asMultimap());
 Schema schema = CalciteUtils.toSchema(getRowType());
 PCollection<Row> ret =
   leftRows
     .apply(
       ParDo.of(
           new BeamJoinTransforms.SideInputJoinDoFn(
             joinType, rightNullRow, rowsView, swapped, schema))
         .withSideInputs(rowsView))
     .setRowSchema(schema);
 return ret;
}

protected PCollection<Row> getTestPCollection() {
 try {
  return TestBoundedTable.of(ROW_TYPE)
    .addRows(
      parseTimestampWithUTCTimeZone("1986-02-15 11:35:26"),
      (byte) 1,
      (short) 1,
      1,
      1L,
      1.0f,
      1.0,
      BigDecimal.ONE,
      (byte) 127,
      (short) 32767,
      2147483647,
      9223372036854775807L)
    .buildIOReader(pipeline.begin())
    .setRowSchema(ROW_TYPE);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

@Override
public PCollection<Row> buildIOReader(PBegin begin) {
 PCollectionTuple rowsWithDlq =
   begin
     .apply("readFromPubsub", readMessagesWithAttributes())
     .apply("parseMessageToRow", createParserParDo());
 rowsWithDlq.get(MAIN_TAG).setRowSchema(getSchema());
 if (useDlq()) {
  rowsWithDlq.get(DLQ_TAG).apply(writeMessagesToDlq());
 }
 return rowsWithDlq.get(MAIN_TAG);
}

@Test
@Category(NeedsRunner.class)
public void testMismatchingKeys() {
 PCollection<Row> pc1 =
   pipeline
     .apply(
       "Create1",
       Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 1, "us").build()))
     .setRowSchema(CG_SCHEMA_1);
 PCollection<Row> pc2 =
   pipeline
     .apply(
       "Create2",
       Create.of(Row.withSchema(CG_SCHEMA_1).addValues("user1", 9, "us").build()))
     .setRowSchema(CG_SCHEMA_1);
 TupleTag<Row> pc1Tag = new TupleTag<>("pc1");
 TupleTag<Row> pc2Tag = new TupleTag<>("pc2");
 thrown.expect(IllegalStateException.class);
 PCollection<KV<Row, Row>> joined =
   PCollectionTuple.of(pc1Tag, pc1)
     .and(pc2Tag, pc2)
     .apply("CoGroup", CoGroup.byFieldNames(pc1Tag, "user").byFieldNames(pc2Tag, "count"));
 pipeline.run();
}

Javadoc

Sets a schema on this PCollection.

Can only be called on a PCollection.

Popular methods of PCollection

apply
of the PTransform.
getPipeline
setCoder
Sets the Coder used by this PCollection to encode and decode the values stored in it. Returns this.
getCoder
Returns the Coder used by this PCollection to encode and decode the values stored in it.
getWindowingStrategy
Returns the WindowingStrategy of this PCollection.
isBounded
createPrimitiveOutputInternal
For internal use only; no backwards-compatibility guarantees.
getName
Returns the name of this PCollection.By default, the name of a PCollection is based on the name of t
setName
Sets the name of this PCollection. Returns this.
expand
getSchema
Returns the attached schema.
setIsBoundedInternal
For internal use only; no backwards-compatibility guarantees.

Popular in Java

Creating JSON documents from java classes using gson
runOnUiThread (Activity)
onRequestPermissionsResult (Fragment)
addToBackStack (FragmentTransaction)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
IsNull (org.hamcrest.core)
Is the value null?
JButton (javax.swing)
Option (scala)
Best plugins for Eclipse

How to use setRowSchemamethodin org.apache.beam.sdk.values.PCollection

Best Java code snippets using org.apache.beam.sdk.values.PCollection.setRowSchema (Showing top 20 results out of 315)

How to use
setRowSchema
method
in
org.apache.beam.sdk.values.PCollection