@Override public KV<Row, Row> apply(Row input) { Row row = joinColumns.stream().map(input::getValue).collect(toRow(schema)); return KV.of(row, input); }
/** * Convenient way to build a {@code BeamSqlRow}s. * * <p>e.g. * * <pre>{@code * buildRows( * schema, * 1, 1, 1, // the first row * 2, 2, 2, // the second row * ... * ) * }</pre> */ public static List<Row> buildRows(Schema type, List<?> rowsValues) { return Lists.partition(rowsValues, type.getFieldCount()) .stream() .map(values -> values.stream().collect(toRow(type))) .collect(toList()); } }
private Row tupleToRow(Schema schema, ImmutableList<RexLiteral> tuple) { return IntStream.range(0, tuple.size()) .mapToObj(i -> autoCastField(schema.getField(i), tuple.get(i).getValue())) .collect(toRow(schema)); } }
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema .getFields() .stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
IntStream.range(0, schema.getFieldCount()) .mapToObj(idx -> autoCastField(schema.getField(idx), rawRecord.get(idx))) .collect(toRow(schema)));
@Test public void testSelectsFromExistingTable() throws Exception { TestTableProvider tableProvider = new TestTableProvider(); Connection connection = JdbcDriver.connect(tableProvider); connection .createStatement() .executeUpdate("CREATE EXTERNAL TABLE person (id BIGINT, name VARCHAR) TYPE 'test'"); tableProvider.addRows("person", row(1L, "aaa"), row(2L, "bbb")); ResultSet selectResult = connection.createStatement().executeQuery("SELECT id, name FROM person"); List<Row> resultRows = readResultSet(selectResult) .stream() .map(values -> values.stream().collect(toRow(BASIC_SCHEMA))) .collect(Collectors.toList()); assertThat(resultRows, containsInAnyOrder(row(1L, "aaa"), row(2L, "bbb"))); }
@Test public void testInsertIntoCreatedTable() throws Exception { TestTableProvider tableProvider = new TestTableProvider(); Connection connection = JdbcDriver.connect(tableProvider); connection .createStatement() .executeUpdate("CREATE EXTERNAL TABLE person (id BIGINT, name VARCHAR) TYPE 'test'"); connection .createStatement() .executeUpdate("CREATE EXTERNAL TABLE person_src (id BIGINT, name VARCHAR) TYPE 'test'"); tableProvider.addRows("person_src", row(1L, "aaa"), row(2L, "bbb")); connection.createStatement().execute("INSERT INTO person SELECT id, name FROM person_src"); ResultSet selectResult = connection.createStatement().executeQuery("SELECT id, name FROM person"); List<Row> resultRows = readResultSet(selectResult) .stream() .map(resultValues -> resultValues.stream().collect(toRow(BASIC_SCHEMA))) .collect(Collectors.toList()); assertThat(resultRows, containsInAnyOrder(row(1L, "aaa"), row(2L, "bbb"))); }
private static Row jsonObjectToRow(FieldValue rowFieldValue) { if (!rowFieldValue.isJsonObject()) { throw new UnsupportedRowJsonException( "Expected JSON object for field '" + rowFieldValue.name() + "'. " + "Unable to convert '" + rowFieldValue.jsonValue().asText() + "'" + " to Beam Row, it is not a JSON object. Currently only JSON objects " + "can be parsed to Beam Rows"); } return rowFieldValue .rowSchema() .getFields() .stream() .map( schemaField -> extractJsonNodeValue( FieldValue.of( schemaField.getName(), schemaField.getType(), rowFieldValue.jsonFieldValue(schemaField.getName())))) .collect(toRow(rowFieldValue.rowSchema())); }
@Test public void testSelectsFromExistingComplexTable() throws Exception { TestTableProvider tableProvider = new TestTableProvider(); Connection connection = JdbcDriver.connect(tableProvider); connection .createStatement() .executeUpdate( "CREATE EXTERNAL TABLE person ( \n" + "description VARCHAR, \n" + "nestedRow ROW< \n" + " id BIGINT, \n" + " name VARCHAR> \n" + ") \n" + "TYPE 'test'"); tableProvider.addRows( "person", row(COMPLEX_SCHEMA, "description1", row(1L, "aaa")), row(COMPLEX_SCHEMA, "description2", row(2L, "bbb"))); ResultSet selectResult = connection .createStatement() .executeQuery("SELECT person.nestedRow.id, person.nestedRow.name FROM person"); List<Row> resultRows = readResultSet(selectResult) .stream() .map(values -> values.stream().collect(toRow(BASIC_SCHEMA))) .collect(Collectors.toList()); assertThat(resultRows, containsInAnyOrder(row(1L, "aaa"), row(2L, "bbb"))); }
@Test public void testCollector() { Schema type = Stream.of( Schema.Field.of("f_int", FieldType.INT32), Schema.Field.of("f_str", FieldType.STRING), Schema.Field.of("f_double", FieldType.DOUBLE)) .collect(toSchema()); Row row = Stream.of(1, "2", 3.0d).collect(toRow(type)); assertEquals(1, row.<Object>getValue("f_int")); assertEquals("2", row.getValue("f_str")); assertEquals(3.0d, row.<Object>getValue("f_double")); }