@Override public PCollection<FeatureRow> expand(PInput input) { BigQuerySourceOptions options = OptionsParser .parse(importSpec.getOptionsMap(), BigQuerySourceOptions.class); List<String> entities = importSpec.getEntitiesList(); Preconditions.checkArgument( entities.size() == 1, "exactly 1 entity must be set for BigQuery import"); String url = String.format("%s:%s.%s", options.project, options.dataset, options.table); return input .getPipeline() .apply( BigQueryIO.read(new BigQueryToFeatureRowFn(importSpec)).from(url)); }
static void runBigQueryTornadoes(Options options) { Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new CountTornadoes()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("word").setType("STRING")); fields.add(new TableFieldSchema().setName("all_plays").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new PlaysForWord()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRows() .from("non-executing-project:somedataset.sometable") .withTestServices(fakeBqServices) .withoutValidation();
bigQueryIOPTransform = BigQueryIO.readTableRows().from(table); break;
bigQueryIOPTransform = BigQueryIO.readTableRows().from(table); break;
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new MaxMeanTemp()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); TableSchema schema = buildWeatherSchemaProjection(); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(ParDo.of(new ProjectionFn())) .apply(new BelowGlobalMean(options.getMonthFilter())) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
/** See {@link Read#from(String)}. */ public TypedRead<T> from(String tableSpec) { return from(StaticValueProvider.of(tableSpec)); }
/** Same as {@code from(String)}, but with a {@link ValueProvider}. */ public Read from(ValueProvider<String> tableSpec) { return new Read(this.inner.from(tableSpec)); }
@Override public PCollection<Row> buildIOReader(PBegin begin) { // TODO: make this more generic. return begin .apply(BigQueryIO.read(BigQueryUtils.toBeamRow(schema)).from(tableSpec)) .setRowSchema(getSchema()); }
/** See {@link Read#from(TableReference)}. */ public TypedRead<T> from(TableReference table) { return from(StaticValueProvider.of(BigQueryHelpers.toTableSpec(table))); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<TableRow> eventsTable = p.apply(BigQueryIO.readTableRows().from(GDELT_EVENTS_TABLE)); PCollection<TableRow> countryCodes = p.apply(BigQueryIO.readTableRows().from(COUNTRY_CODES)); PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes); formattedResults.apply(TextIO.write().to(options.getOutput())); p.run().waitUntilFinish(); } }
/** Read from table specified by a {@link TableReference}. */ public Read from(TableReference table) { return new Read(this.inner.from(table)); }
/** * Reads a BigQuery table specified as {@code "[project_id]:[dataset_id].[table_id]"} or {@code * "[dataset_id].[table_id]"} for tables within the current project. */ public Read from(String tableSpec) { return new Read(this.inner.from(tableSpec)); }