Read() { this(BigQueryIO.read(TableRowParser.INSTANCE).withCoder(TableRowJsonCoder.of())); }
/** * Like {@link #read(SerializableFunction)} but represents each row as a {@link TableRow}. * * <p>This method is more convenient to use in some cases, but usually has significantly lower * performance than using {@link #read(SerializableFunction)} directly to parse data into a * domain-specific type, due to the overhead of converting the rows to {@link TableRow}. */ public static TypedRead<TableRow> readTableRows() { return read(new TableRowParser()).withCoder(TableRowJsonCoder.of()); }
@Override public PCollection<FeatureRow> expand(PInput input) { BigQuerySourceOptions options = OptionsParser .parse(importSpec.getOptionsMap(), BigQuerySourceOptions.class); List<String> entities = importSpec.getEntitiesList(); Preconditions.checkArgument( entities.size() == 1, "exactly 1 entity must be set for BigQuery import"); String url = String.format("%s:%s.%s", options.project, options.dataset, options.table); return input .getPipeline() .apply( BigQueryIO.read(new BigQueryToFeatureRowFn(importSpec)).from(url)); }
@Override public PCollection<Row> buildIOReader(PBegin begin) { // TODO: make this more generic. return begin .apply(BigQueryIO.read(BigQueryUtils.toBeamRow(schema)).from(tableSpec)) .setRowSchema(getSchema()); }
@Test public void testBuildTableBasedSource() { BigQueryIO.Read read = BigQueryIO.read().from("foo.com:project:somedataset.sometable"); checkReadTableObject(read, "foo.com:project", "somedataset", "sometable"); }
@Test public void testBuildQueryBasedSource() { BigQueryIO.Read read = BigQueryIO.read().fromQuery("foo_query"); checkReadQueryObject(read, "foo_query"); }
@Test public void testBuildTableBasedSourceWithDefaultProject() { BigQueryIO.Read read = BigQueryIO.read().from("somedataset.sometable"); checkReadTableObject(read, null, "somedataset", "sometable"); }
@Test public void testBuildTableBasedSourceWithoutValidation() { // This test just checks that using withoutValidation will not trigger object // construction errors. BigQueryIO.Read read = BigQueryIO.read().from("foo.com:project:somedataset.sometable").withoutValidation(); checkReadTableObjectWithValidate(read, "foo.com:project", "somedataset", "sometable", false); }
@Test public void testBuildQueryBasedSourceWithoutValidation() { // This test just checks that using withoutValidation will not trigger object // construction errors. BigQueryIO.Read read = BigQueryIO.read().fromQuery("some_query").withoutValidation(); checkReadQueryObjectWithValidate(read, "some_query", false); }
@Test public void testBigQueryIOGetName() { assertEquals("BigQueryIO.Read", BigQueryIO.read().from("somedataset.sometable").getName()); }
@Test public void testRuntimeOptionsNotCalledInApplyInputQuery() { BigQueryIO.Read read = BigQueryIO.read().fromQuery(p.newProvider("")).withoutValidation(); // Test that this doesn't throw. DisplayData.from(read); }
@Test public void testRuntimeOptionsNotCalledInApplyInputTable() { BigQueryIO.Read read = BigQueryIO.read().from(p.newProvider("")).withoutValidation(); // Test that this doesn't throw. DisplayData.from(read); }
@Test public void testBuildSourceWithTableAndSqlDialect() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a SQL dialect preference," + " which only applies to queries"); p.apply(BigQueryIO.read().from("foo.com:project:somedataset.sometable").usingStandardSql()); p.run(); }
@Test public void testBuildSourceWithTableReference() { TableReference table = new TableReference() .setProjectId("foo.com:project") .setDatasetId("somedataset") .setTableId("sometable"); BigQueryIO.Read read = BigQueryIO.read().from(table); checkReadTableObject(read, "foo.com:project", "somedataset", "sometable"); }
@Test public void testBuildSourceWithTableAndFlatten() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference," + " which only applies to queries"); p.apply( "ReadMyTable", BigQueryIO.read().from("foo.com:project:somedataset.sometable").withoutResultFlattening()); p.run(); }
@Test public void testCoderInference() { // Lambdas erase too much type information - use an anonymous class here. SerializableFunction<SchemaAndRecord, KV<ByteString, Mutation>> parseFn = new SerializableFunction<SchemaAndRecord, KV<ByteString, Mutation>>() { @Override public KV<ByteString, Mutation> apply(SchemaAndRecord input) { return null; } }; assertEquals( KvCoder.of(ByteStringCoder.of(), ProtoCoder.of(Mutation.class)), BigQueryIO.read(parseFn).inferCoder(CoderRegistry.createDefault())); } }
@Test public void testBuildSourceWithTableAndFlattenWithoutValidation() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference," + " which only applies to queries"); p.apply( BigQueryIO.read() .from("foo.com:project:somedataset.sometable") .withoutValidation() .withoutResultFlattening()); p.run(); }
private void runBigQueryIOReadPipeline() { Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply("Read", BigQueryIO.read().from(options.getInputTable())) .apply("Count", Count.globally()); PAssert.thatSingleton(count).isEqualTo(options.getNumRecords()); p.run().waitUntilFinish(); }
@Test public void testBuildSourceDisplayDataTable() { String tableSpec = "project:dataset.tableid"; BigQueryIO.Read read = BigQueryIO.read() .from(tableSpec) .withoutResultFlattening() .usingStandardSql() .withoutValidation(); DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("table", tableSpec)); assertThat(displayData, hasDisplayItem("flattenResults", false)); assertThat(displayData, hasDisplayItem("useLegacySql", false)); assertThat(displayData, hasDisplayItem("validation", false)); }
@Test public void testBuildSourceDisplayDataQuery() { BigQueryIO.Read read = BigQueryIO.read() .fromQuery("myQuery") .withoutResultFlattening() .usingStandardSql() .withoutValidation(); DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("query", "myQuery")); assertThat(displayData, hasDisplayItem("flattenResults", false)); assertThat(displayData, hasDisplayItem("useLegacySql", false)); assertThat(displayData, hasDisplayItem("validation", false)); }