@Override public WriteResult expand(PCollection<FailsafeElement<String, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedStringToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); }
@Override public WriteResult expand(PCollection<FailsafeElement<PubsubMessage, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedPubsubMessageToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); }
@Override public WriteResult expand( PCollection<FailsafeElement<KV<String, String>, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedMessageToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); } }
static void runBigQueryTornadoes(Options options) { Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new CountTornadoes()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); }
@Override public PDone expand(PCollection<InputT> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); }
@Override public PDone expand(PCollection<T> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("word").setType("STRING")); fields.add(new TableFieldSchema().setName("all_plays").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new PlaysForWord()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new MaxMeanTemp()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
@Override public PDone expand(PCollection<IndexedRecord> in) { TableReference table = new TableReference(); table.setProjectId(datastore.projectName.getValue()); table.setDatasetId(dataset.bqDataset.getValue()); table.setTableId(dataset.tableName.getValue()); BigQueryIO.Write bigQueryIOPTransform = BigQueryIO.writeTableRows().to(table); bigQueryIOPTransform = setTableOperation(bigQueryIOPTransform); bigQueryIOPTransform = setWriteOperation(bigQueryIOPTransform); in.apply(ParDo.of(new IndexedRecordToTableRowFn())).apply(bigQueryIOPTransform); return PDone.in(in.getPipeline()); }
@Override public PDone expand(PCollection<IndexedRecord> in) { TableReference table = new TableReference(); table.setProjectId(datastore.projectName.getValue()); table.setDatasetId(dataset.bqDataset.getValue()); table.setTableId(dataset.tableName.getValue()); BigQueryIO.Write bigQueryIOPTransform = BigQueryIO.writeTableRows().to(table); bigQueryIOPTransform = setTableOperation(bigQueryIOPTransform); bigQueryIOPTransform = setWriteOperation(bigQueryIOPTransform); in.apply(ParDo.of(new IndexedRecordToTableRowFn())).apply(bigQueryIOPTransform); return PDone.in(in.getPipeline()); }
@Test public void testWriteValidateFailsCreateNoSchema() { p.enableAbandonedNodeEnforcement(false); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("no schema was provided"); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset.table") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); }
@Test public void testWriteEmptyPCollection() throws Exception { TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema(schema) .withoutValidation()); p.run(); checkNotNull( fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"))); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); TableSchema schema = buildWeatherSchemaProjection(); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(ParDo.of(new ProjectionFn())) .apply(new BelowGlobalMean(options.getMonthFilter())) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
@Test public void testWriteWithBrokenGetTable() throws Exception { p.apply(Create.<TableRow>of(new TableRow().set("foo", "bar"))) .apply( BigQueryIO.writeTableRows() .to(input -> null) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("result of tableFunction can not be null"); thrown.expectMessage("foo"); p.run(); }
@Test public void testBuildWriteDefaultProject() { BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to("somedataset.sometable"); assertEquals(null, write.getTable().get().getProjectId()); assertEquals("somedataset", write.getTable().get().getDatasetId()); assertEquals("sometable", write.getTable().get().getTableId()); }
@Test public void testRuntimeOptionsNotCalledInApplyOutput() { p.enableAbandonedNodeEnforcement(false); BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows() .to(p.newProvider("some-table")) .withSchema( ValueProvider.NestedValueProvider.of( p.newProvider("some-schema"), new BigQueryHelpers.JsonSchemaToTableSchema())) .withoutValidation(); p.apply(Create.empty(TableRowJsonCoder.of())).apply(write); // Test that this doesn't throw. DisplayData.from(write); }
@Test public void testWriteToTableDecorator() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.of(row1, row2)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id$20171127") .withTestServices(fakeBqServices) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema(schema) .withoutValidation()); p.run(); }
@Test public void testBuildWriteWithTableReference() { TableReference table = new TableReference() .setProjectId("foo.com:project") .setDatasetId("somedataset") .setTableId("sometable"); BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to(table); assertEquals("foo.com:project", write.getTable().get().getProjectId()); assertEquals("somedataset", write.getTable().get().getDatasetId()); assertEquals("sometable", write.getTable().get().getTableId()); }
@Test public void testWriteWithMissingSchemaFromView() throws Exception { PCollectionView<Map<String, String>> view = p.apply("Create schema view", Create.of(KV.of("foo", "bar"), KV.of("bar", "boo"))) .apply(View.asMap()); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchemaFromView(view) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("does not contain data for table destination dataset-id.table-id"); p.run(); }
@Test public void testWriteUnknown() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expect(RuntimeException.class); thrown.expectMessage("Failed to create job"); p.run(); }