public static Row toBeamRow(GenericRecord record, Schema schema) { List<Object> values = new ArrayList(); for (int i = 0; i < record.getSchema().getFields().size(); i++) { org.apache.avro.Schema.Field avroField = record.getSchema().getFields().get(i); values.add(AvroUtils.convertAvroFormat(schema.getField(i), record.get(avroField.name()))); } return Row.withSchema(schema).addValues(values).build(); }
@Test @Category(NeedsRunner.class) public void testFlatSchema() { List<Row> rows = IntStream.rangeClosed(0, 2) .mapToObj(i -> Row.withSchema(SIMPLE_SCHEMA).addValues(i, Integer.toString(i)).build()) .collect(Collectors.toList()); PCollection<Row> unnested = pipeline.apply(Create.of(rows).withRowSchema(SIMPLE_SCHEMA)).apply(Unnest.create()); PAssert.that(unnested).containsInAnyOrder(rows); pipeline.run(); }
@Test public void testThrowsForIncorrectNumberOfFields() { Schema type = Stream.of( Schema.Field.of("f_int", FieldType.INT32), Schema.Field.of("f_str", FieldType.STRING), Schema.Field.of("f_double", FieldType.DOUBLE)) .collect(toSchema()); thrown.expect(IllegalArgumentException.class); Row.withSchema(type).addValues(1, "2").build(); }
@ProcessElement public void process(@Element Row row, OutputReceiver<Row> r) { r.output( Row.withSchema(schema) .addValues(row.getString(0), row.getInt32(1)) .build()); } }))
@Test public void testCreatesArrayArray() { List<List<Integer>> data = Lists.<List<Integer>>newArrayList(Lists.newArrayList(1, 2, 3, 4)); Schema type = Stream.of(Schema.Field.of("array", FieldType.array(FieldType.array(FieldType.INT32)))) .collect(toSchema()); Row row = Row.withSchema(type).addArray(data).build(); assertEquals(data, row.getArray("array")); }
@Test public void testCreateRegisteredSchema() { p.getSchemaRegistry() .registerSchemaForClass( String.class, STRING_SCHEMA, s -> Row.withSchema(STRING_SCHEMA).addValue(s).build(), r -> r.getString("field")); PCollection<String> out = p.apply(Create.of("a", "b", "c", "d")); assertThat(out.getCoder(), instanceOf(SchemaCoder.class)); }
@Override public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) { if (typeDescriptor.equals(TypeDescriptor.of(TestSchemaClass.class))) { return v -> Row.withSchema(EMPTY_SCHEMA).build(); } return null; }
private Row row(Schema schema, Object... values) { return Row.withSchema(schema).addValues(values).build(); } }
private Row row(Schema schema, Object... values) { return Row.withSchema(schema).addValues(values).build(); } }
@DoFn.ProcessElement public void processElement(ProcessContext context) { try { List<Object> values = getFieldValues(context); context.output(Row.withSchema(messageSchema()).addValues(values).build()); } catch (UnsupportedRowJsonException jsonException) { if (useDlq()) { context.output(DLQ_TAG, context.element()); } else { throw new RuntimeException("Error parsing message", jsonException); } } }
@Test public void testParsesArrayField() throws Exception { Schema schema = Schema.builder() .addInt32Field("f_int32") .addArrayField("f_intArray", FieldType.INT32) .build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_intArray\" : [ 1, 2, 3, 4, 5]\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema).addValues(32, Arrays.asList(1, 2, 3, 4, 5)).build(); assertEquals(expectedRow, parsedRow); }
@Override public <T> SerializableFunction<T, Row> toRowFunction(TypeDescriptor<T> typeDescriptor) { return v -> Row.withSchema(schemaFor(typeDescriptor)).addValue(v).build(); }
public void runLiteralField(PCollection<Row> input) throws Exception { String sql = "SELECT 1 as literal_field FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testLiteralField", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("literal_field").build(); Row row = Row.withSchema(resultType).addValues(1).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
@ProcessElement public void process(@Element Row inputRow, OutputReceiver<Row> output) { for (Object element : inputRow.getArray(0)) { output.output(Row.withSchema(schema).addValue(element).build()); } } }
@ProcessElement public void process(@Element Row row, OutputReceiver<Row> r) { r.output( Row.withSchema(schema2) .addValues(row.getString(0), row.getInt32(1)) .build()); } }))
@Override public Row extractOutput(Object[] accumulator) { // Build a row containing a field for every aggregate that was registered. CoCombineResult coCombineResult = getComposedCombineFn().extractOutput(accumulator); Row.Builder output = Row.withSchema(getOutputSchema()); for (FieldAggregation fieldAggregation : getFieldAggregations()) { Object aggregate = coCombineResult.get(fieldAggregation.combineTag); output.addValue(aggregate); } return output.build(); } }
private void testSupportedConversion( FieldType fieldType, String jsonFieldValue, Object expectedRowFieldValue) throws Exception { String fieldName = "f_" + fieldType.getTypeName().name().toLowerCase(); Schema schema = schemaWithField(fieldName, fieldType); Row expectedRow = Row.withSchema(schema).addValues(expectedRowFieldValue).build(); ObjectMapper jsonParser = newObjectMapperWith(RowJsonDeserializer.forSchema(schema)); Row parsedRow = jsonParser.readValue(jsonObjectWith(fieldName, jsonFieldValue), Row.class); assertEquals(expectedRow, parsedRow); }
@Test public void testCreatesArrayArrayWithNullElement() { List<List<Integer>> data = Lists.<List<Integer>>newArrayList(Lists.newArrayList(1, null, 3, null), null); Schema type = Stream.of( Schema.Field.of( "array", FieldType.array(FieldType.array(FieldType.INT32, true), true))) .collect(toSchema()); Row row = Row.withSchema(type).addArray(data).build(); assertEquals(data, row.getArray("array")); }
@Test public void testCreateExplicitSchema() { PCollection<String> out = p.apply( Create.of("a", "b", "c", "d") .withSchema( STRING_SCHEMA, s -> Row.withSchema(STRING_SCHEMA).addValue(s).build(), r -> r.getString("field"))); assertThat(out.getCoder(), instanceOf(SchemaCoder.class)); }
/** Unnest a row. */ static Row unnestRow(Row input, Schema unnestedSchema) { Row.Builder builder = Row.withSchema(unnestedSchema); unnestRow(input, builder); return builder.build(); }