org.apache.beam.sdk.io.gcp.bigquery java code examples

/**
 * Like {@link #read(SerializableFunction)} but represents each row as a {@link TableRow}.
 *
 * <p>This method is more convenient to use in some cases, but usually has significantly lower
 * performance than using {@link #read(SerializableFunction)} directly to parse data into a
 * domain-specific type, due to the overhead of converting the rows to {@link TableRow}.
 */
public static TypedRead<TableRow> readTableRows() {
 return read(new TableRowParser()).withCoder(TableRowJsonCoder.of());
}

private StreamingWriteTables(
  BigQueryServices bigQueryServices,
  InsertRetryPolicy retryPolicy,
  boolean extendedErrorInfo,
  boolean skipInvalidRows,
  boolean ignoreUnknownValues) {
 this.bigQueryServices = bigQueryServices;
 this.retryPolicy = retryPolicy;
 this.extendedErrorInfo = extendedErrorInfo;
 this.skipInvalidRows = skipInvalidRows;
 this.ignoreUnknownValues = ignoreUnknownValues;
}

/**
 * Enables extended error information by enabling {@link WriteResult#getFailedInsertsWithErr()}
 *
 * <p>ATM this only works if using {@link Method#STREAMING_INSERTS}. See {@link
 * Write#withMethod(Method)}.
 */
public Write<T> withExtendedErrorInfo() {
 return toBuilder().setExtendedErrorInfo(true).build();
}

@Test
public void testBuildQueryBasedSource() {
 BigQueryIO.Read read = BigQueryIO.read().fromQuery("foo_query");
 checkReadQueryObject(read, "foo_query");
}

 @Override
 public TableRow apply(SchemaAndRecord schemaAndRecord) {
  return BigQueryAvroUtils.convertGenericRecordToTableRow(
    schemaAndRecord.getRecord(), schemaAndRecord.getTableSchema());
 }
}

/**
 * Reads a BigQuery table specified as {@code "[project_id]:[dataset_id].[table_id]"} or {@code
 * "[dataset_id].[table_id]"} for tables within the current project.
 */
public Read from(String tableSpec) {
 return new Read(this.inner.from(tableSpec));
}

/** See {@link Read#fromQuery(ValueProvider)}. */
public TypedRead<T> fromQuery(ValueProvider<String> query) {
 ensureFromNotCalledYet();
 return toBuilder().setQuery(query).setFlattenResults(true).setUseLegacySql(true).build();
}

/**
 * Enables BigQuery's Standard SQL dialect when reading from a query.
 *
 * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading
 * from a table will cause an error during validation.
 */
public Read usingStandardSql() {
 return new Read(this.inner.usingStandardSql());
}

private synchronized JobStatistics dryRunQueryIfNeeded(BigQueryOptions bqOptions)
  throws InterruptedException, IOException {
 if (dryRunJobStats.get() == null) {
  JobStatistics jobStats =
    bqServices
      .getJobService(bqOptions)
      .dryRunQuery(bqOptions.getProject(), createBasicQueryConfig(), this.location);
  dryRunJobStats.compareAndSet(null, jobStats);
 }
 return dryRunJobStats.get();
}

@Override
public TableDestination getTable(Integer userId) {
 verifySideInputs();
 // Each user in it's own table.
 return new TableDestination(
   "dataset-id.userid-" + userId + "$" + partitionDecorator,
   "table for userid " + userId);
}

private void initializeBigQuery(Description description)
  throws IOException, InterruptedException {
 this.datasetService = new BigQueryServicesImpl().getDatasetService(pipelineOptions);
 this.table = createTable(description);
}

/**
 * Accept rows that contain values that do not match the schema. The unknown values are ignored.
 * Default is false, which treats unknown values as errors.
 */
public Write<T> ignoreUnknownValues() {
 return toBuilder().setIgnoreUnknownValues(true).build();
}

/**
 * Disable <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">flattening of
 * query results</a>.
 *
 * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading
 * from a table will cause an error during validation.
 */
public Read withoutResultFlattening() {
 return new Read(this.inner.withoutResultFlattening());
}

/**
 * Sets a {@link Coder} for the result of the parse function. This may be required if a coder
 * can not be inferred automatically.
 */
public TypedRead<T> withCoder(Coder<T> coder) {
 return toBuilder().setCoder(coder).build();
}

/**
 * Insert all valid rows of a request, even if invalid rows exist. This is only applicable when
 * the write method is set to {@link Method#STREAMING_INSERTS}. The default value is false,
 * which causes the entire request to fail if any invalid rows exist.
 */
public Write<T> skipInvalidRows() {
 return toBuilder().setSkipInvalidRows(true).build();
}

/** See {@link Read#usingStandardSql()}. */
public TypedRead<T> usingStandardSql() {
 return toBuilder().setUseLegacySql(false).build();
}

/**
 * BigQuery geographic location where the query <a
 * href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs">job</a> will be
 * executed. If not specified, Beam tries to determine the location by examining the tables
 * referenced by the query. Location must be specified for queries not executed in US or EU. See
 * <a href="https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query">BigQuery Jobs:
 * query</a>.
 */
public TypedRead<T> withQueryLocation(String location) {
 return toBuilder().setQueryLocation(location).build();
}

private void setupStandardQueryTest() {
 this.setupLegacyQueryTest();
 this.options.setQuery(
   "SELECT * FROM (SELECT \"apple\" as fruit) UNION ALL (SELECT \"orange\" as fruit)");
 this.options.setUsingStandardSql(true);
}

public List<TableRow> getAllRows(String projectId, String datasetId, String tableId)
  throws InterruptedException, IOException {
 synchronized (tables) {
  return getTableContainer(projectId, datasetId, tableId).getRows();
 }
}

/** Read from table specified by a {@link TableReference}. */
public Read from(TableReference table) {
 return new Read(this.inner.from(table));
}

How to use org.apache.beam.sdk.io.gcp.bigquery

Best Java code snippets using org.apache.beam.sdk.io.gcp.bigquery (Showing top 20 results out of 315)