/** * Like {@link #read(SerializableFunction)} but represents each row as a {@link TableRow}. * * <p>This method is more convenient to use in some cases, but usually has significantly lower * performance than using {@link #read(SerializableFunction)} directly to parse data into a * domain-specific type, due to the overhead of converting the rows to {@link TableRow}. */ public static TypedRead<TableRow> readTableRows() { return read(new TableRowParser()).withCoder(TableRowJsonCoder.of()); }
private StreamingWriteTables( BigQueryServices bigQueryServices, InsertRetryPolicy retryPolicy, boolean extendedErrorInfo, boolean skipInvalidRows, boolean ignoreUnknownValues) { this.bigQueryServices = bigQueryServices; this.retryPolicy = retryPolicy; this.extendedErrorInfo = extendedErrorInfo; this.skipInvalidRows = skipInvalidRows; this.ignoreUnknownValues = ignoreUnknownValues; }
/** * Enables extended error information by enabling {@link WriteResult#getFailedInsertsWithErr()} * * <p>ATM this only works if using {@link Method#STREAMING_INSERTS}. See {@link * Write#withMethod(Method)}. */ public Write<T> withExtendedErrorInfo() { return toBuilder().setExtendedErrorInfo(true).build(); }
@Test public void testBuildQueryBasedSource() { BigQueryIO.Read read = BigQueryIO.read().fromQuery("foo_query"); checkReadQueryObject(read, "foo_query"); }
@Override public TableRow apply(SchemaAndRecord schemaAndRecord) { return BigQueryAvroUtils.convertGenericRecordToTableRow( schemaAndRecord.getRecord(), schemaAndRecord.getTableSchema()); } }
/** See {@link Read#fromQuery(ValueProvider)}. */ public TypedRead<T> fromQuery(ValueProvider<String> query) { ensureFromNotCalledYet(); return toBuilder().setQuery(query).setFlattenResults(true).setUseLegacySql(true).build(); }
/** * Enables BigQuery's Standard SQL dialect when reading from a query. * * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading * from a table will cause an error during validation. */ public Read usingStandardSql() { return new Read(this.inner.usingStandardSql()); }
private synchronized JobStatistics dryRunQueryIfNeeded(BigQueryOptions bqOptions) throws InterruptedException, IOException { if (dryRunJobStats.get() == null) { JobStatistics jobStats = bqServices .getJobService(bqOptions) .dryRunQuery(bqOptions.getProject(), createBasicQueryConfig(), this.location); dryRunJobStats.compareAndSet(null, jobStats); } return dryRunJobStats.get(); }
@Override public TableDestination getTable(Integer userId) { verifySideInputs(); // Each user in it's own table. return new TableDestination( "dataset-id.userid-" + userId + "$" + partitionDecorator, "table for userid " + userId); }
private void initializeBigQuery(Description description) throws IOException, InterruptedException { this.datasetService = new BigQueryServicesImpl().getDatasetService(pipelineOptions); this.table = createTable(description); }
/** * Accept rows that contain values that do not match the schema. The unknown values are ignored. * Default is false, which treats unknown values as errors. */ public Write<T> ignoreUnknownValues() { return toBuilder().setIgnoreUnknownValues(true).build(); }
/** * Disable <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">flattening of * query results</a>. * * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading * from a table will cause an error during validation. */ public Read withoutResultFlattening() { return new Read(this.inner.withoutResultFlattening()); }
/** * Insert all valid rows of a request, even if invalid rows exist. This is only applicable when * the write method is set to {@link Method#STREAMING_INSERTS}. The default value is false, * which causes the entire request to fail if any invalid rows exist. */ public Write<T> skipInvalidRows() { return toBuilder().setSkipInvalidRows(true).build(); }
/** See {@link Read#usingStandardSql()}. */ public TypedRead<T> usingStandardSql() { return toBuilder().setUseLegacySql(false).build(); }
/** * BigQuery geographic location where the query <a * href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs">job</a> will be * executed. If not specified, Beam tries to determine the location by examining the tables * referenced by the query. Location must be specified for queries not executed in US or EU. See * <a href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query">BigQuery Jobs: * query</a>. */ public TypedRead<T> withQueryLocation(String location) { return toBuilder().setQueryLocation(location).build(); }
private void setupStandardQueryTest() { this.setupLegacyQueryTest(); this.options.setQuery( "SELECT * FROM (SELECT \"apple\" as fruit) UNION ALL (SELECT \"orange\" as fruit)"); this.options.setUsingStandardSql(true); }
public List<TableRow> getAllRows(String projectId, String datasetId, String tableId) throws InterruptedException, IOException { synchronized (tables) { return getTableContainer(projectId, datasetId, tableId).getRows(); } }