static void runBigQueryTornadoes(Options options) { Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new CountTornadoes()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("word").setType("STRING")); fields.add(new TableFieldSchema().setName("all_plays").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new PlaysForWord()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new MaxMeanTemp()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema( new TableSchema()
@Test public void testBuildWriteDisplayData() { String tableSpec = "project:dataset.table"; TableSchema schema = new TableSchema().set("col1", "type1").set("col2", "type2"); final String tblDescription = "foo bar table"; BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows() .to(tableSpec) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withTableDescription(tblDescription) .withoutValidation(); DisplayData displayData = DisplayData.from(write); assertThat(displayData, hasDisplayItem("table")); assertThat(displayData, hasDisplayItem("schema")); assertThat( displayData, hasDisplayItem( "createDisposition", BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED.toString())); assertThat( displayData, hasDisplayItem( "writeDisposition", BigQueryIO.Write.WriteDisposition.WRITE_APPEND.toString())); assertThat(displayData, hasDisplayItem("tableDescription", tblDescription)); assertThat(displayData, hasDisplayItem("validation", false)); }
@Test public void testCreateNeverWithStreaming() throws Exception { p.enableAbandonedNodeEnforcement(false); TableReference tableRef = new TableReference(); tableRef.setDatasetId("dataset"); tableRef.setTableId("sometable"); PCollection<TableRow> tableRows = p.apply(GenerateSequence.from(0)) .apply( MapElements.via( new SimpleFunction<Long, TableRow>() { @Override public TableRow apply(Long input) { return null; } })) .setCoder(TableRowJsonCoder.of()); tableRows.apply( BigQueryIO.writeTableRows() .to(tableRef) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withoutValidation()); }
private void runBigQueryToTablePipeline() { Pipeline p = Pipeline.create(options); BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery()); if (options.getUsingStandardSql()) { bigQueryRead = bigQueryRead.usingStandardSql(); } PCollection<TableRow> input = p.apply(bigQueryRead); if (options.getReshuffle()) { input = input .apply(WithKeys.<Void, TableRow>of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of())) .apply(Reshuffle.<Void, TableRow>of()) .apply(Values.<TableRow>create()); } input.apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(options.getOutputSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run().waitUntilFinish(); }
@Test public void testWrite() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .withoutValidation()); p.run(); }
@Test public void testWriteFailedJobs() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expect(RuntimeException.class); thrown.expectMessage("Failed to create job with prefix"); thrown.expectMessage("reached max retries"); thrown.expectMessage("last failed job"); p.run(); }
@Test public void testWriteEmptyPCollection() throws Exception { TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema(schema) .withoutValidation()); p.run(); checkNotNull( fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"))); }
@Test public void testWriteUnknown() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expect(RuntimeException.class); thrown.expectMessage("Failed to create job"); p.run(); }
@Test public void testWriteWithMissingSchemaFromView() throws Exception { PCollectionView<Map<String, String>> view = p.apply("Create schema view", Create.of(KV.of("foo", "bar"), KV.of("bar", "boo"))) .apply(View.asMap()); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchemaFromView(view) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("does not contain data for table destination dataset-id.table-id"); p.run(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); TableSchema schema = buildWeatherSchemaProjection(); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(ParDo.of(new ProjectionFn())) .apply(new BelowGlobalMean(options.getMonthFilter())) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
@Override public WriteResult expand( PCollection<FailsafeElement<KV<String, String>, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedMessageToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); } }
@Override public PDone expand(PCollection<T> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); } }
@Test public void testWriteWithBrokenGetTable() throws Exception { p.apply(Create.<TableRow>of(new TableRow().set("foo", "bar"))) .apply( BigQueryIO.writeTableRows() .to(input -> null) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_NEVER) .withTestServices(fakeBqServices) .withoutValidation()); thrown.expectMessage("result of tableFunction can not be null"); thrown.expectMessage("foo"); p.run(); }
@Override public WriteResult expand(PCollection<FailsafeElement<String, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedStringToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); }
@Override public WriteResult expand(PCollection<FailsafeElement<PubsubMessage, String>> failedRecords) { return failedRecords .apply("FailedRecordToTableRow", ParDo.of(new FailedPubsubMessageToTableRowFn())) .apply( "WriteFailedRecordsToBigQuery", BigQueryIO.writeTableRows() .to(getErrorRecordsTable()) .withJsonSchema(getErrorRecordsTableSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); }
@Override public PDone expand(PCollection<InputT> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); }
@Test public void testWriteValidateFailsCreateNoSchema() { p.enableAbandonedNodeEnforcement(false); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("no schema was provided"); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset.table") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); }