@Test public void testTableSourcePrimitiveDisplayData() throws IOException, InterruptedException { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); BigQueryIO.Read read = BigQueryIO.read() .from("project:dataset.tableId") .withTestServices( new FakeBigQueryServices() .withDatasetService(new FakeDatasetService()) .withJobService(new FakeJobService())) .withoutValidation(); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read); assertThat( "BigQueryIO.Read should include the table spec in its primitive display data", displayData, hasItem(hasDisplayItem("table"))); }
@Test public void testQuerySourcePrimitiveDisplayData() throws IOException, InterruptedException { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); BigQueryIO.Read read = BigQueryIO.read() .fromQuery("foobar") .withTestServices( new FakeBigQueryServices() .withDatasetService(new FakeDatasetService()) .withJobService(new FakeJobService())) .withoutValidation(); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read); assertThat( "BigQueryIO.Read should include the query in its primitive display data", displayData, hasItem(hasDisplayItem("query"))); }
/** * @deprecated Use {@link #read(SerializableFunction)} or {@link #readTableRows} instead. {@link * #readTableRows()} does exactly the same as {@link #read}, however {@link * #read(SerializableFunction)} performs better. */ @Deprecated public static Read read() { return new Read(); }
p.apply(BigQueryIO.read().from(tableRef).withTestServices(fakeBqServices)) .apply( ParDo.of(
BigQueryIO.Read read = BigQueryIO.read() .from("non-executing-project:somedataset.sometable") .withTestServices(fakeBqServices) .withoutValidation(); readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read; } else { BigQueryIO.TypedRead<TableRow> read =
.apply(BigQueryIO.read().from("ReadSourceTable").fromQuery(options.getBqQuery()) .usingStandardSql()) .apply(ParDo.of(MUTATION_TRANSFORM)) .apply(CloudBigtableIO.writeToTable(config));
public static void main(String[] args) { Options options = getOptions(args); String projectId = options.getProject(); String datasetId = options.getInputBigQueryDataset(); String tableId = options.getInputBigQueryTable(); String namespace = options.getOutputDatastoreNamespace(); String kind = options.getOutputDatastoreKind(); String keyColumn = options.getKeyColumn(); LinkedHashMap<String, String> parents = parseParentPaths(options.getParentPaths()); List<String> indexedColumns = parseIndexedColumns(options.getIndexedColumns()); // Input TableReference tableRef = new TableReference().setDatasetId(datasetId).setTableId(tableId); BigQueryIO.Read reader = BigQueryIO.read().from(tableRef); // Output DatastoreV1.Write writer = DatastoreIO.v1().write().withProjectId(projectId); // Build and run pipeline TableRow2EntityFn fn = new TableRow2EntityFn(projectId, namespace, parents, kind, keyColumn, indexedColumns); Pipeline pipeline = Pipeline.create(options); pipeline .apply(reader) .apply(ParDo.of(fn)) .apply(writer); pipeline.run(); }
private void runBigQueryToTablePipeline() { Pipeline p = Pipeline.create(options); BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery()); if (options.getUsingStandardSql()) { bigQueryRead = bigQueryRead.usingStandardSql(); } PCollection<TableRow> input = p.apply(bigQueryRead); if (options.getReshuffle()) { input = input .apply(WithKeys.<Void, TableRow>of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of())) .apply(Reshuffle.<Void, TableRow>of()) .apply(Values.<TableRow>create()); } input.apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(options.getOutputSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run().waitUntilFinish(); }
@Test public void testBuildSourceDisplayDataTable() { String tableSpec = "project:dataset.tableid"; BigQueryIO.Read read = BigQueryIO.read() .from(tableSpec) .withoutResultFlattening() .usingStandardSql() .withoutValidation(); DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("table", tableSpec)); assertThat(displayData, hasDisplayItem("flattenResults", false)); assertThat(displayData, hasDisplayItem("useLegacySql", false)); assertThat(displayData, hasDisplayItem("validation", false)); }
@Test public void testBuildSourceDisplayDataQuery() { BigQueryIO.Read read = BigQueryIO.read() .fromQuery("myQuery") .withoutResultFlattening() .usingStandardSql() .withoutValidation(); DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("query", "myQuery")); assertThat(displayData, hasDisplayItem("flattenResults", false)); assertThat(displayData, hasDisplayItem("useLegacySql", false)); assertThat(displayData, hasDisplayItem("validation", false)); }
@Test public void testBuildSourceWithTableAndFlattenWithoutValidation() { thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Invalid BigQueryIO.Read: Specifies a table with a result flattening preference," + " which only applies to queries"); p.apply( BigQueryIO.read() .from("foo.com:project:somedataset.sometable") .withoutValidation() .withoutResultFlattening()); p.run(); }
/** Same as {@code fromQuery(String)}, but with a {@link ValueProvider}. */ public Read fromQuery(ValueProvider<String> query) { return new Read(this.inner.fromQuery(query)); }
/** Same as {@code from(String)}, but with a {@link ValueProvider}. */ public Read from(ValueProvider<String> tableSpec) { return new Read(this.inner.from(tableSpec)); }
Read withTestServices(BigQueryServices testServices) { return new Read(this.inner.withTestServices(testServices)); }
@Test public void testBuildTableBasedSourceWithDefaultProject() { BigQueryIO.Read read = BigQueryIO.read().from("somedataset.sometable"); checkReadTableObject(read, null, "somedataset", "sometable"); }
private void runBigQueryIOReadPipeline() { Pipeline p = Pipeline.create(options); PCollection<Long> count = p.apply("Read", BigQueryIO.read().from(options.getInputTable())) .apply("Count", Count.globally()); PAssert.thatSingleton(count).isEqualTo(options.getNumRecords()); p.run().waitUntilFinish(); }
/** * Use new template-compatible source implementation. * * <p>Use new template-compatible source implementation. This implementation is compatible with * repeated template invocations. It does not support dynamic work rebalancing. */ @Experimental(Experimental.Kind.SOURCE_SINK) public Read withTemplateCompatibility() { return new Read(this.inner.withTemplateCompatibility()); } }
/** * Reads results received after executing the given query. * * <p>By default, the query results will be flattened -- see "flattenResults" in the <a * href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">Jobs documentation</a> for * more information. To disable flattening, use {@link Read#withoutResultFlattening}. * * <p>By default, the query will use BigQuery's legacy SQL dialect. To use the BigQuery Standard * SQL dialect, use {@link Read#usingStandardSql}. */ public Read fromQuery(String query) { return new Read(this.inner.fromQuery(query)); }
@Test public void testRuntimeOptionsNotCalledInApplyInputTable() { BigQueryIO.Read read = BigQueryIO.read().from(p.newProvider("")).withoutValidation(); // Test that this doesn't throw. DisplayData.from(read); }