public static Row toBeamRow(GenericRecord record, Schema schema) { List<Object> values = new ArrayList(); for (int i = 0; i < record.getSchema().getFields().size(); i++) { org.apache.avro.Schema.Field avroField = record.getSchema().getFields().get(i); values.add(AvroUtils.convertAvroFormat(schema.getField(i), record.get(avroField.name()))); } return Row.withSchema(schema).addValues(values).build(); }
/** Creates a {@link Row} from the list of values and {@link #getSchema()}. */ public static <T> Collector<T, List<Object>, Row> toRow(Schema schema) { return Collector.of( () -> new ArrayList<>(schema.getFieldCount()), List::add, (left, right) -> { left.addAll(right); return left; }, values -> Row.withSchema(schema).addValues(values).build()); }
@ProcessElement public void processElement( @Element KV<Row, Row> kvRow, BoundedWindow window, OutputReceiver<Row> o) { List<Object> fieldValues = Lists.newArrayListWithCapacity( kvRow.getKey().getValues().size() + kvRow.getValue().getValues().size()); fieldValues.addAll(kvRow.getKey().getValues()); fieldValues.addAll(kvRow.getValue().getValues()); if (windowStartFieldIndex != -1) { fieldValues.add(windowStartFieldIndex, ((IntervalWindow) window).start()); } o.output(Row.withSchema(outputSchema).addValues(fieldValues).build()); } };
@Test public void testLength() throws Exception { Schema resultType = Schema.builder().addInt64Field("field").build(); Row resultRow = Row.withSchema(resultType).addValues(10L).build(); Row resultRow2 = Row.withSchema(resultType).addValues(0L).build(); Row resultRow3 = Row.withSchema(resultType).addValues(2L).build(); String sql = "SELECT LENGTH(f_bytes) FROM PCOLLECTION WHERE f_func = 'LENGTH'"; PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql)); PAssert.that(result).containsInAnyOrder(resultRow, resultRow2, resultRow3); pipeline.run().waitUntilFinish(); }
@Override public List<Row> seekRow(Row lookupSubRow) { return Arrays.asList(Row.withSchema(getSchema()).addValues(1, "SITE1").build()); } }
@Test public void testAccessArrayElement() { PCollection<Row> input = pCollectionOf2Elements(); Schema resultType = Schema.builder().addStringField("f_arrElem").build(); PCollection<Row> result = input.apply("sqlQuery", SqlTransform.query("SELECT f_stringArr[1] FROM PCOLLECTION")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues("111").build(), Row.withSchema(resultType).addValues("33").build()); pipeline.run(); }
@Test public void testBasicRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery("SELECT col FROM basicRowTestTable")); PAssert.that(stream) .containsInAnyOrder(Row.withSchema(innerRowSchema).addValues("innerStr", 1L).build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test public void testArrayWithRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery("SELECT arrayWithRowTestTable.col[1] FROM arrayWithRowTestTable")); PAssert.that(stream) .containsInAnyOrder(Row.withSchema(innerRowSchema).addValues("str", 1L).build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
private void runAggregationWithoutWindow(PCollection<Row> input) throws Exception { String sql = "SELECT f_int2, COUNT(*) AS `getFieldCount` FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result = input.apply("testAggregationWithoutWindow", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int2").addInt64Field("size").build(); Row row = Row.withSchema(resultType).addValues(0, 4L).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
@ProcessElement public void process(@Element Row row, OutputReceiver<Row> r) { r.output( Row.withSchema(schema) .addValues(row.getString(0), row.getInt32(1)) .build()); } }))
@Test @Category(NeedsRunner.class) public void testFlatSchema() { List<Row> rows = IntStream.rangeClosed(0, 2) .mapToObj(i -> Row.withSchema(SIMPLE_SCHEMA).addValues(i, Integer.toString(i)).build()) .collect(Collectors.toList()); PCollection<Row> unnested = pipeline.apply(Create.of(rows).withRowSchema(SIMPLE_SCHEMA)).apply(Unnest.create()); PAssert.that(unnested).containsInAnyOrder(rows); pipeline.run(); }
/** Creates a new record filled with nulls. */ public static Row nullRow(Schema schema) { return Row.withSchema(schema) .addValues(Collections.nCopies(schema.getFieldCount(), null)) .build(); } }
@Test public void testParsesArrayField() throws Exception { Schema schema = Schema.builder() .addInt32Field("f_int32") .addArrayField("f_intArray", FieldType.INT32) .build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_intArray\" : [ 1, 2, 3, 4, 5]\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema).addValues(32, Arrays.asList(1, 2, 3, 4, 5)).build(); assertEquals(expectedRow, parsedRow); }
private Row row(Schema schema, Object... values) { return Row.withSchema(schema).addValues(values).build(); } }
private Row row(Schema schema, Object... values) { return Row.withSchema(schema).addValues(values).build(); } }
@DoFn.ProcessElement public void processElement(ProcessContext context) { try { List<Object> values = getFieldValues(context); context.output(Row.withSchema(messageSchema()).addValues(values).build()); } catch (UnsupportedRowJsonException jsonException) { if (useDlq()) { context.output(DLQ_TAG, context.element()); } else { throw new RuntimeException("Error parsing message", jsonException); } } }
public void runLiteralField(PCollection<Row> input) throws Exception { String sql = "SELECT 1 as literal_field FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testLiteralField", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("literal_field").build(); Row row = Row.withSchema(resultType).addValues(1).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
@ProcessElement public void process(@Element Row row, OutputReceiver<Row> r) { r.output( Row.withSchema(schema2) .addValues(row.getString(0), row.getInt32(1)) .build()); } }))
@Test public void testThrowsForIncorrectNumberOfFields() { Schema type = Stream.of( Schema.Field.of("f_int", FieldType.INT32), Schema.Field.of("f_str", FieldType.STRING), Schema.Field.of("f_double", FieldType.DOUBLE)) .collect(toSchema()); thrown.expect(IllegalArgumentException.class); Row.withSchema(type).addValues(1, "2").build(); }
private void testSupportedConversion( FieldType fieldType, String jsonFieldValue, Object expectedRowFieldValue) throws Exception { String fieldName = "f_" + fieldType.getTypeName().name().toLowerCase(); Schema schema = schemaWithField(fieldName, fieldType); Row expectedRow = Row.withSchema(schema).addValues(expectedRowFieldValue).build(); ObjectMapper jsonParser = newObjectMapperWith(RowJsonDeserializer.forSchema(schema)); Row parsedRow = jsonParser.readValue(jsonObjectWith(fieldName, jsonFieldValue), Row.class); assertEquals(expectedRow, parsedRow); }