p.apply(BigQueryIO.readTableRows().from(tableSpec)) BigQueryIO.read( (SchemaAndRecord elem) -> (Double) elem.getRecord().get("max_temperature")) .from(tableSpec) .withCoder(DoubleCoder.of())); BigQueryIO.read( (SchemaAndRecord elem) -> (Double) elem.getRecord().get("max_temperature")) .fromQuery( "SELECT max_temperature FROM [clouddataflow-readonly:samples.weather_stations]") .withCoder(DoubleCoder.of())); BigQueryIO.read( (SchemaAndRecord elem) -> (Double) elem.getRecord().get("max_temperature")) .fromQuery( "SELECT max_temperature FROM `clouddataflow-readonly.samples.weather_stations`") .usingStandardSql() .withCoder(DoubleCoder.of())); (Double) record.get("max_temperature")); }) .fromQuery( "SELECT year, month, day, max_temperature " + "FROM [clouddataflow-readonly:samples.weather_stations] " + "WHERE year BETWEEN 2007 AND 2009") .withCoder(AvroCoder.of(WeatherData.class)));
@Override public PCollection<T> expand(PBegin input) { ValueProvider<TableReference> table = getTableProvider(); checkArgument(getQuery() == null, "from() and fromQuery() are exclusive"); checkArgument( getQueryPriority() == null, "withQueryPriority() can only be specified when using fromQuery()"); checkArgument( getFlattenResults() == null, "Invalid BigQueryIO.Read: Specifies a table with a result flattening" + " preference, which only applies to queries"); checkArgument( getUseLegacySql() == null, "Invalid BigQueryIO.Read: Specifies a table with a SQL dialect" + " preference, which only applies to queries"); checkArgument(getQuery() != null, "Either from() or fromQuery() is required"); checkArgument( getFlattenResults() != null, "flattenResults should not be null if query is set"); checkArgument(getUseLegacySql() != null, "useLegacySql should not be null if query is set"); checkArgument(getParseFn() != null, "A parseFn is required"); final Coder<T> coder = inferCoder(p.getCoderRegistry()); final PCollectionView<String> jobIdTokenView; PCollection<String> jobIdTokenCollection; PCollection<T> rows; if (!getWithTemplateCompatibility()) {
p.apply("ReadFlattened", BigQueryIO.readTableRows().fromQuery(options.getInput())); PCollection<TableRow> nonFlattenedCollection = p.apply( "ReadNonFlattened", BigQueryIO.readTableRows().fromQuery(options.getInput()).withoutResultFlattening()); PCollection<TableRow> unflattenableCollection = p.apply( "ReadUnflattenable", BigQueryIO.readTableRows() .fromQuery(options.getUnflattenableInput()) .withoutResultFlattening());
BigQueryIO.TypedRead<TableRow> read = BigQueryIO.readTableRows() .from("non-executing-project:somedataset.sometable") .withTestServices(fakeBqServices) .withoutValidation(); readTransform = useTemplateCompatibility ? read.withTemplateCompatibility() : read;
bigQueryIOPTransform = BigQueryIO.readTableRows().from(table); break; bigQueryIOPTransform = BigQueryIO.readTableRows().fromQuery(dataset.query.getValue()); if (!dataset.useLegacySql.getValue()) { bigQueryIOPTransform = bigQueryIOPTransform.usingStandardSql(); } else {
bigQueryIOPTransform = BigQueryIO.readTableRows().from(table); break; bigQueryIOPTransform = BigQueryIO.readTableRows().fromQuery(dataset.query.getValue()); if (!dataset.useLegacySql.getValue()) { bigQueryIOPTransform = bigQueryIOPTransform.usingStandardSql(); } else {
@Override public PCollectionTuple expand(PBegin begin) { return begin .apply( "AvroToEntity", BigQueryIO.read( AvroToEntity.newBuilder() .setEntityKind(entityKind()) .setUniqueNameColumn(uniqueNameColumn()) .setNamespace(namespace()) .build()) .fromQuery(query()) .withoutValidation() .withTemplateCompatibility() .usingStandardSql()) .apply( "CheckNoKey", CheckNoKey.newBuilder() .setFailureTag(failureTag()) .setSuccessTag(successTag()) .build()); } }
static void runBigQueryTornadoes(Options options) { Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new CountTornadoes()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("month").setType("INTEGER")); fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new MaxMeanTemp()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // Build the table schema for the output table. List<TableFieldSchema> fields = new ArrayList<>(); fields.add(new TableFieldSchema().setName("word").setType("STRING")); fields.add(new TableFieldSchema().setName("all_plays").setType("STRING")); TableSchema schema = new TableSchema().setFields(fields); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(new PlaysForWord()) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); TableSchema schema = buildWeatherSchemaProjection(); p.apply(BigQueryIO.readTableRows().from(options.getInput())) .apply(ParDo.of(new ProjectionFn())) .apply(new BelowGlobalMean(options.getMonthFilter())) .apply( BigQueryIO.writeTableRows() .to(options.getOutput()) .withSchema(schema) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE)); p.run().waitUntilFinish(); } }
/** See {@link Read#from(String)}. */ public TypedRead<T> from(String tableSpec) { return from(StaticValueProvider.of(tableSpec)); }
/** Same as {@code from(String)}, but with a {@link ValueProvider}. */ public Read from(ValueProvider<String> tableSpec) { return new Read(this.inner.from(tableSpec)); }
/** Same as {@code fromQuery(String)}, but with a {@link ValueProvider}. */ public Read fromQuery(ValueProvider<String> query) { return new Read(this.inner.fromQuery(query)); }
@Test public void testCoderInference() { // Lambdas erase too much type information - use an anonymous class here. SerializableFunction<SchemaAndRecord, KV<ByteString, Mutation>> parseFn = new SerializableFunction<SchemaAndRecord, KV<ByteString, Mutation>>() { @Override public KV<ByteString, Mutation> apply(SchemaAndRecord input) { return null; } }; assertEquals( KvCoder.of(ByteStringCoder.of(), ProtoCoder.of(Mutation.class)), BigQueryIO.read(parseFn).inferCoder(CoderRegistry.createDefault())); } }
@Override public PCollection<Row> buildIOReader(PBegin begin) { // TODO: make this more generic. return begin .apply(BigQueryIO.read(BigQueryUtils.toBeamRow(schema)).from(tableSpec)) .setRowSchema(getSchema()); }
/** * Disable validation that the table exists or the query succeeds prior to pipeline submission. * Basic validation (such as ensuring that a query or table is specified) still occurs. */ public Read withoutValidation() { return new Read(this.inner.withoutValidation()); }
Read() { this(BigQueryIO.read(TableRowParser.INSTANCE).withCoder(TableRowJsonCoder.of())); }
Read withTestServices(BigQueryServices testServices) { return new Read(this.inner.withTestServices(testServices)); }
@Override public PCollection<FeatureRow> expand(PInput input) { BigQuerySourceOptions options = OptionsParser .parse(importSpec.getOptionsMap(), BigQuerySourceOptions.class); List<String> entities = importSpec.getEntitiesList(); Preconditions.checkArgument( entities.size() == 1, "exactly 1 entity must be set for BigQuery import"); String url = String.format("%s:%s.%s", options.project, options.dataset, options.table); return input .getPipeline() .apply( BigQueryIO.read(new BigQueryToFeatureRowFn(importSpec)).from(url)); }