org.apache.beam.sdk.transforms.Create$Values.withSchema java code examples

private PCollection<Row> createPCollection(Pipeline pipeline, Row... rows) {
 return pipeline.apply(
   Create.of(Arrays.asList(rows))
     .withSchema(
       SOURCE_SCHEMA, SerializableFunctions.identity(), SerializableFunctions.identity()));
}

 private PCollection<Row> pCollectionOf2Elements() {
  return pipeline.apply(
    "boundedInput1",
    Create.of(
        Row.withSchema(INPUT_SCHEMA)
          .addValues(1)
          .addArray(Arrays.asList("111", "222"))
          .build(),
        Row.withSchema(INPUT_SCHEMA)
          .addValues(2)
          .addArray(Arrays.asList("33", "44", "55"))
          .build())
      .withSchema(
        INPUT_SCHEMA, SerializableFunctions.identity(), SerializableFunctions.identity()));
 }
}

@Test
public void testCreateExplicitSchema() {
 PCollection<String> out =
   p.apply(
     Create.of("a", "b", "c", "d")
       .withSchema(
         STRING_SCHEMA,
         s -> Row.withSchema(STRING_SCHEMA).addValue(s).build(),
         r -> r.getString("field")));
 assertThat(out.getCoder(), instanceOf(SchemaCoder.class));
}

      1, Row.withSchema(nestedSchema).addValues(312, "CC", 313).build())
    .build())
.withSchema(
  inputType, SerializableFunctions.identity(), SerializableFunctions.identity()));

      1, Row.withSchema(nestedSchema).addValues(312, "CC", 313).build())
    .build())
.withSchema(
  inputType, SerializableFunctions.identity(), SerializableFunctions.identity()));

pipeline.apply(
  Create.of(Row.withSchema(INPUT_ROW_SCHEMA).addValues(1).addValue("20181018").build())
    .withSchema(
      INPUT_ROW_SCHEMA,
      SerializableFunctions.identity(),

@Test
@Category({ValidatesRunner.class, UsesSchema.class})
public void testFieldAccessSchemaPipeline() {
 List<MyPojo> pojoList =
   Lists.newArrayList(new MyPojo("a", 1), new MyPojo("b", 2), new MyPojo("c", 3));
 Schema schema =
   Schema.builder().addStringField("string_field").addInt32Field("integer_field").build();
 PCollection<String> output =
   pipeline
     .apply(
       Create.of(pojoList)
         .withSchema(
           schema,
           o ->
             Row.withSchema(schema).addValues(o.stringField, o.integerField).build(),
           r -> new MyPojo(r.getString("string_field"), r.getInt32("integer_field"))))
     .apply(
       ParDo.of(
         new DoFn<MyPojo, String>() {
          @FieldAccess("foo")
          final FieldAccessDescriptor fieldAccess =
            FieldAccessDescriptor.withAllFields();
          @ProcessElement
          public void process(@FieldAccess("foo") Row row, OutputReceiver<String> r) {
           r.output(row.getString(0) + ":" + row.getInt32(1));
          }
         }));
 PAssert.that(output).containsInAnyOrder("a:1", "b:2", "c:3");
 pipeline.run();
}

@Test
public void testUnnestNamedLiteral() {
 PCollection<Row> input =
   pipeline.apply(
     "boundedInput1",
     Create.empty(TypeDescriptor.of(Row.class))
       .withSchema(
         INPUT_SCHEMA,
         SerializableFunctions.identity(),
         SerializableFunctions.identity()));
 // Because we have a multi-part FROM the DSL considers it multi-input
 TupleTag<Row> mainTag = new TupleTag<Row>("main") {};
 PCollectionTuple inputTuple = PCollectionTuple.of(mainTag, input);
 Schema resultType = Schema.builder().addStringField("f_string").build();
 PCollection<Row> result =
   inputTuple.apply(
     "sqlQuery",
     SqlTransform.query("SELECT * FROM UNNEST (ARRAY ['a', 'b', 'c']) AS t(f_string)"));
 PAssert.that(result)
   .containsInAnyOrder(
     Row.withSchema(resultType).addValues("a").build(),
     Row.withSchema(resultType).addValues("b").build(),
     Row.withSchema(resultType).addValues("c").build());
 pipeline.run();
}

@Test
public void testCastToDate() {
 PCollection<Row> input =
   pipeline.apply(
     Create.of(Row.withSchema(INPUT_ROW_SCHEMA).addValues(1).addValue("20181018").build())
       .withSchema(
         INPUT_ROW_SCHEMA,
         SerializableFunctions.identity(),
         SerializableFunctions.identity()));
 Schema resultType =
   Schema.builder().addInt32Field("f_int").addNullableField("f_date", DATETIME).build();
 PCollection<Row> result =
   input.apply(
     SqlTransform.query(
       "SELECT f_int, \n"
         + "   CAST( \n"
         + "     SUBSTRING(TRIM(f_string) FROM 1 FOR 4) \n"
         + "      ||'-' \n"
         + "      ||SUBSTRING(TRIM(f_string) FROM 5 FOR 2) \n"
         + "      ||'-' \n"
         + "      ||SUBSTRING(TRIM(f_string) FROM 7 FOR 2) as DATE) \n"
         + "FROM PCOLLECTION"));
 PAssert.that(result)
   .containsInAnyOrder(
     Row.withSchema(resultType).addValues(1, new DateTime(2018, 10, 18, 0, 0)).build());
 pipeline.run();
}

@Test
@Category({ValidatesRunner.class, UsesSchema.class})
public void testSimpleSchemaPipeline() {
 List<MyPojo> pojoList =
   Lists.newArrayList(new MyPojo("a", 1), new MyPojo("b", 2), new MyPojo("c", 3));
 Schema schema =
   Schema.builder().addStringField("string_field").addInt32Field("integer_field").build();
 PCollection<String> output =
   pipeline
     .apply(
       Create.of(pojoList)
         .withSchema(
           schema,
           o ->
             Row.withSchema(schema).addValues(o.stringField, o.integerField).build(),
           r -> new MyPojo(r.getString("string_field"), r.getInt32("integer_field"))))
     .apply(
       ParDo.of(
         new DoFn<MyPojo, String>() {
          @ProcessElement
          public void process(@Element Row row, OutputReceiver<String> r) {
           r.output(row.getString(0) + ":" + row.getInt32(1));
          }
         }));
 PAssert.that(output).containsInAnyOrder("a:1", "b:2", "c:3");
 pipeline.run();
}

@Test
@Category({ValidatesRunner.class, UsesSchema.class})
public void testUnmatchedSchema() {
 List<MyPojo> pojoList =
   Lists.newArrayList(new MyPojo("a", 1), new MyPojo("b", 2), new MyPojo("c", 3));
 Schema schema =
   Schema.builder().addStringField("string_field").addInt32Field("integer_field").build();
 thrown.expect(IllegalArgumentException.class);
 pipeline
   .apply(
     Create.of(pojoList)
       .withSchema(
         schema,
         o -> Row.withSchema(schema).addValues(o.stringField, o.integerField).build(),
         r -> new MyPojo(r.getString("string_field"), r.getInt32("integer_field"))))
   .apply(
     ParDo.of(
       new DoFn<MyPojo, Void>() {
        @FieldAccess("a")
        FieldAccessDescriptor fieldAccess = FieldAccessDescriptor.withFieldNames("baad");
        @ProcessElement
        public void process(@FieldAccess("a") Row row) {}
       }));
}

@Test
public void testCastToDate2() {
 PCollection<Row> input =
   pipeline.apply(
     Create.of(Row.withSchema(INPUT_ROW_SCHEMA).addValues(1).addValue("20181018").build())
       .withSchema(
         INPUT_ROW_SCHEMA,
         SerializableFunctions.identity(),
         SerializableFunctions.identity()));
 Schema resultType =
   Schema.builder().addInt32Field("f_int").addNullableField("f_date", DATETIME).build();
 PCollection<Row> result =
   input.apply(
     SqlTransform.query(
       "SELECT f_int, \n"
         + "   CAST( \n"
         + "     f_string AS DATE) \n"
         + "FROM PCOLLECTION"));
 PAssert.that(result)
   .containsInAnyOrder(
     Row.withSchema(resultType).addValues(1, new DateTime(2018, 10, 18, 0, 0)).build());
 pipeline.run();
}

@Test
public void testUnnestLiteral() {
 PCollection<Row> input =
   pipeline.apply(
     "boundedInput1",
     Create.empty(TypeDescriptor.of(Row.class))
       .withSchema(
         INPUT_SCHEMA,
         SerializableFunctions.identity(),
         SerializableFunctions.identity()));
 // Because we have a multi-part FROM the DSL considers it multi-input
 TupleTag<Row> mainTag = new TupleTag<Row>("main") {};
 PCollectionTuple inputTuple = PCollectionTuple.of(mainTag, input);
 Schema resultType = Schema.builder().addStringField("f_string").build();
 PCollection<Row> result =
   inputTuple.apply(
     "sqlQuery", SqlTransform.query("SELECT * FROM UNNEST (ARRAY ['a', 'b', 'c'])"));
 PAssert.that(result)
   .containsInAnyOrder(
     Row.withSchema(resultType).addValues("a").build(),
     Row.withSchema(resultType).addValues("b").build(),
     Row.withSchema(resultType).addValues("c").build());
 pipeline.run();
}

@Before
public void setUp() {
 Schema schema =
   Schema.builder()
     .addDoubleField("f_double1")
     .addDoubleField("f_double2")
     .addDoubleField("f_double3")
     .addInt32Field("f_int1")
     .addInt32Field("f_int2")
     .addInt32Field("f_int3")
     .build();
 List<Row> rowsInTableB =
   TestUtils.RowsBuilder.of(schema)
     .addRows(
       3.0, 1.0, 1.0, 3, 1, 0, 4.0, 2.0, 2.0, 4, 2, 0, 5.0, 3.0, 1.0, 5, 3, 0, 6.0, 4.0,
       2.0, 6, 4, 0, 8.0, 4.0, 1.0, 8, 4, 0)
     .getRows();
 boundedInput =
   pipeline.apply(
     Create.of(rowsInTableB)
       .withSchema(
         schema, SerializableFunctions.identity(), SerializableFunctions.identity()));
}

 @Override
 public PDone expand(PBegin begin) {
  PCollection<Boolean> result =
    begin
      .apply(
        Create.of(DUMMY_ROW)
          .withSchema(
            DUMMY_SCHEMA,
            SerializableFunctions.identity(),
            SerializableFunctions.identity()))
      .apply(SqlTransform.query("SELECT " + expr))
      .apply(MapElements.into(TypeDescriptors.booleans()).via(row -> row.getBoolean(0)));
  PAssert.that(result)
    .satisfies(
      input -> {
       assertTrue("Test expression is false: " + expr, Iterables.getOnlyElement(input));
       return null;
      });
  return PDone.in(begin.getPipeline());
 }
}

@Test
public void testSingleElement() throws Exception {
 Row inputRow = Row.withSchema(INPUT_SCHEMA).addValues(1).addArray(Arrays.asList("111")).build();
 PCollection<Row> input =
   pipeline.apply(
     "boundedInput1",
     Create.of(inputRow)
       .withSchema(
         INPUT_SCHEMA,
         SerializableFunctions.identity(),
         SerializableFunctions.identity()));
 Schema resultType = Schema.builder().addStringField("f_arrElem").build();
 PCollection<Row> result =
   input.apply("sqlQuery", SqlTransform.query("SELECT ELEMENT(f_stringArr) FROM PCOLLECTION"));
 PAssert.that(result).containsInAnyOrder(Row.withSchema(resultType).addValues("111").build());
 pipeline.run();
}

 private PCollection<Row> pCollectionOf2Elements() {
  return pipeline.apply(
    "boundedInput1",
    Create.of(
        Row.withSchema(INPUT_ROW_TYPE)
          .addValues(1)
          .addValue(ImmutableMap.of("key11", 11, "key22", 22))
          .build(),
        Row.withSchema(INPUT_ROW_TYPE)
          .addValues(2)
          .addValue(ImmutableMap.of("key33", 33, "key44", 44, "key55", 55))
          .build())
      .withSchema(
        INPUT_ROW_TYPE,
        SerializableFunctions.identity(),
        SerializableFunctions.identity()));
 }
}

@Before
public void setUp() {
 Schema schema =
   Schema.builder()
     .addNullableField("f_int1", Schema.FieldType.INT32)
     .addNullableField("f_int2", Schema.FieldType.INT32)
     .addInt32Field("f_int3")
     .build();
 List<Row> rows =
   TestUtils.RowsBuilder.of(schema)
     .addRows(1, 5, 1)
     .addRows(null, 1, 1)
     .addRows(2, 1, 1)
     .addRows(null, 1, 1)
     .addRows(null, null, 1)
     .addRows(null, null, 1)
     .addRows(3, 2, 1)
     .getRows();
 boundedInput =
   PBegin.in(pipeline).apply(Create.of(rows).withSchema(schema, identity(), identity()));
}

@Before
public void setUp() {
 Schema schema =
   Schema.builder()
     .addInt32Field("f_int")
     .addDoubleField("f_double")
     .addInt32Field("f_int2")
     .build();
 List<Row> rowsInTableB =
   TestUtils.RowsBuilder.of(schema)
     .addRows(
       1, 1.0, 0, 4, 4.0, 0, 7, 7.0, 0, 13, 13.0, 0, 5, 5.0, 0, 10, 10.0, 0, 17, 17.0, 0)
     .getRows();
 boundedInput =
   pipeline.apply(
     Create.of(rowsInTableB)
       .withSchema(
         schema, SerializableFunctions.identity(), SerializableFunctions.identity()));
}

@Test
@Category(NeedsRunner.class)
public void testFromRows() {
 PCollection<POJO1> pojos =
   pipeline
     .apply(
       Create.of(EXPECTED_ROW1)
         .withSchema(
           EXPECTED_SCHEMA1,
           SerializableFunctions.identity(),
           SerializableFunctions.identity()))
     .apply(Convert.fromRows(POJO1.class));
 PAssert.that(pojos).containsInAnyOrder(new POJO1());
 pipeline.run();
}

Javadoc

Returns a Create.Values PTransform like this one that uses the given Schemato represent objects.

Popular methods of Create$Values

withCoder
Returns a Create.Values PTransform like this one that uses the given Coderto decode each of the obje
getElements
withType
Returns a Create.Values PTransform like this one that uses the given TypeDescriptor to determine the
<init>
getName
withRowSchema
Returns a Create.Values PTransform like this one that uses the given Schemato represent objects.

Popular in Java

Reading from database using SQL prepared statement
compareTo (BigDecimal)
notifyDataSetChanged (ArrayAdapter)
setRequestProperty (URLConnection)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
BoxLayout (javax.swing)
Option (scala)
Top plugins for Android Studio

How to use withSchemamethodin org.apache.beam.sdk.transforms.Create$Values

Best Java code snippets using org.apache.beam.sdk.transforms.Create$Values.withSchema (Showing top 20 results out of 315)

How to use
withSchema
method
in
org.apache.beam.sdk.transforms.Create$Values