static void runBigQueryTornadoes(Options options) { Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new CountTornadoes()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new MaxMeanTemp()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("word").setType("STRING")); fields.add(new TableFieldSchema().setName("all_plays").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new PlaysForWord()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
.to("project-id:dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema( new TableSchema() .setFields(
.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema( new TableSchema() .setFields(
@Test public void testBuildWriteDisplayData() { String tableSpec = "project:dataset.table"; TableSchema schema = new TableSchema().set("col1", "type1").set("col2", "type2"); final String tblDescription = "foo bar table"; BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows() .to(tableSpec) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withTableDescription(tblDescription) .withoutValidation(); DisplayData displayData = DisplayData.from(write); assertThat(displayData, hasDisplayItem("table")); assertThat(displayData, hasDisplayItem("schema")); assertThat( displayData, hasDisplayItem( "createDisposition", BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED.toString())); assertThat( displayData, hasDisplayItem( "writeDisposition", BigQueryIO.Write.WriteDisposition.WRITE_APPEND.toString())); assertThat(displayData, hasDisplayItem("tableDescription", tblDescription)); assertThat(displayData, hasDisplayItem("validation", false)); }
public static void runTrafficRoutes(TrafficRoutesOptions options) throws IOException { // Using ExampleUtils to set up required resources. ExampleUtils exampleUtils = new ExampleUtils(options); exampleUtils.setup(); Pipeline pipeline = Pipeline.create(options); TableReference tableRef = new TableReference(); tableRef.setProjectId(options.getProject()); tableRef.setDatasetId(options.getBigQueryDataset()); tableRef.setTableId(options.getBigQueryTable()); pipeline .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())) // row... => <station route, station speed> ... .apply(ParDo.of(new ExtractStationSpeedFn())) // map the incoming data stream into sliding windows. .apply( Window.into( SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())) .every(Duration.standardMinutes(options.getWindowSlideEvery())))) .apply(new TrackSpeed()) .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatStatsFn.getSchema())); // Run the pipeline. PipelineResult result = pipeline.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exists. exampleUtils.waitToFinish(result); }
public static void runTrafficMaxLaneFlow(TrafficMaxLaneFlowOptions options) throws IOException { // Using ExampleUtils to set up required resources. ExampleUtils exampleUtils = new ExampleUtils(options); exampleUtils.setup(); Pipeline pipeline = Pipeline.create(options); TableReference tableRef = new TableReference(); tableRef.setProjectId(options.getProject()); tableRef.setDatasetId(options.getBigQueryDataset()); tableRef.setTableId(options.getBigQueryTable()); pipeline .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile())) // row... => <station route, station speed> ... .apply(ParDo.of(new ExtractFlowInfoFn())) // map the incoming data stream into sliding windows. .apply( Window.into( SlidingWindows.of(Duration.standardMinutes(options.getWindowDuration())) .every(Duration.standardMinutes(options.getWindowSlideEvery())))) .apply(new MaxLaneFlow()) .apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatMaxesFn.getSchema())); // Run the pipeline. PipelineResult result = pipeline.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exists. exampleUtils.waitToFinish(result); }
public void testTimePartitioning(BigQueryIO.Write.Method insertMethod) throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TimePartitioning timePartitioning = new TimePartitioning().setType("DAY").setExpirationMs(1000L); TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.of(row1, row2)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withMethod(insertMethod) .withSchema(schema) .withTimePartitioning(timePartitioning) .withoutValidation()); p.run(); Table table = fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id")); assertEquals(schema, table.getSchema()); assertEquals(timePartitioning, table.getTimePartitioning()); }
public static void main(String[] args) throws Exception { TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(TrafficFlowOptions.class); options.setStreaming(true); options.setBigQuerySchema(getSchema()); ExampleUtils exampleUtils = new ExampleUtils(options); exampleUtils.setup(); Pipeline pipeline = Pipeline.create(options); TableReference tableRef = getTableReference( options.getProject(), options.getBigQueryDataset(), options.getBigQueryTable()); PCollectionList<TableRow> resultList = pipeline .apply("ReadMyFile", TextIO.read().from(options.getInput())) .apply("InsertRandomDelays", ParDo.of(new InsertDelays())) .apply(ParDo.of(new ExtractFlowInfo())) .apply(new CalculateTotalFlow(options.getWindowDuration())); for (int i = 0; i < resultList.size(); i++) { resultList.get(i).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(getSchema())); } PipelineResult result = pipeline.run(); // ExampleUtils will try to cancel the pipeline and the injector before the program exits. exampleUtils.waitToFinish(result); }
@Test public void testWrite() throws Exception { p.apply( Create.of( new TableRow().set("name", "a").set("number", 1), new TableRow().set("name", "b").set("number", 2), new TableRow().set("name", "c").set("number", 3)) .withCoder(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("dataset-id.table-id") .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema( new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER")))) .withTestServices(fakeBqServices) .withoutValidation()); p.run(); }
private void runBigQueryToTablePipeline() { Pipeline p = Pipeline.create(options); BigQueryIO.Read bigQueryRead = BigQueryIO.read().fromQuery(options.getQuery()); if (options.getUsingStandardSql()) { bigQueryRead = bigQueryRead.usingStandardSql(); } PCollection<TableRow> input = p.apply(bigQueryRead); if (options.getReshuffle()) { input = input .apply(WithKeys.<Void, TableRow>of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), TableRowJsonCoder.of())) .apply(Reshuffle.<Void, TableRow>of()) .apply(Values.<TableRow>create()); } input.apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(options.getOutputSchema()) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); p.run().waitUntilFinish(); }
@Test public void testWriteBuilderMethods() { BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to("foo.com:project:somedataset.sometable"); assertEquals("foo.com:project", write.getTable().get().getProjectId()); assertEquals("somedataset", write.getTable().get().getDatasetId()); assertEquals("sometable", write.getTable().get().getTableId()); assertNull(write.getJsonSchema()); assertNull(write.getSchemaFromView()); assertEquals(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED, write.getCreateDisposition()); assertEquals(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY, write.getWriteDisposition()); assertEquals(null, write.getTableDescription()); assertEquals(true, write.getValidate()); assertFalse(write.withoutValidation().getValidate()); TableSchema schema = new TableSchema(); assertEquals( schema, BigQueryHelpers.fromJsonString( write.withSchema(schema).getJsonSchema().get(), TableSchema.class)); }
@Test public void testWriteEmptyPCollection() throws Exception { TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.empty(TableRowJsonCoder.of())) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id") .withTestServices(fakeBqServices) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withSchema(schema) .withoutValidation()); p.run(); checkNotNull( fakeDatasetService.getTable( BigQueryHelpers.parseTableSpec("project-id:dataset-id.table-id"))); }
@Test public void testWriteToTableDecorator() throws Exception { TableRow row1 = new TableRow().set("name", "a").set("number", "1"); TableRow row2 = new TableRow().set("name", "b").set("number", "2"); TableSchema schema = new TableSchema() .setFields( ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER"))); p.apply(Create.of(row1, row2)) .apply( BigQueryIO.writeTableRows() .to("project-id:dataset-id.table-id$20171127") .withTestServices(fakeBqServices) .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS) .withSchema(schema) .withoutValidation()); p.run(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); TableSchema schema = buildWeatherSchemaProjection(); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(ParDo.of(new ProjectionFn())) .apply(new BelowGlobalMean(options.getMonthFilter())) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
@Test public void testRuntimeOptionsNotCalledInApplyOutput() { p.enableAbandonedNodeEnforcement(false); BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows() .to(p.newProvider("some-table")) .withSchema( ValueProvider.NestedValueProvider.of( p.newProvider("some-schema"), new BigQueryHelpers.JsonSchemaToTableSchema())) .withoutValidation(); p.apply(Create.empty(TableRowJsonCoder.of())).apply(write); // Test that this doesn't throw. DisplayData.from(write); }
@Override public PDone expand(PCollection<T> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); } }
@Override public POutput buildIOWriter(PCollection<Row> input) { return input.apply( BigQueryIO.<Row>write() .withSchema(BigQueryUtils.toTableSchema(getSchema())) .withFormatFunction(BigQueryUtils.toTableRow()) .to(tableSpec)); }
@Override public PDone expand(PCollection<InputT> teamAndScore) { teamAndScore .apply("ConvertToRow", ParDo.of(new BuildRowFn())) .apply( BigQueryIO.writeTableRows() .to(getTable(projectId, datasetId, tableName)) .withSchema(getSchema()) .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(WriteDisposition.WRITE_APPEND)); return PDone.in(teamAndScore.getPipeline()); }