org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write$Builder java code examples

/**
 * Choose the frequency at which file writes are triggered.
 *
 * <p>This is only applicable when the write method is set to {@link Method#FILE_LOADS}, and
 * only when writing an unbounded {@link PCollection}.
 *
 * <p>Every triggeringFrequency duration, a BigQuery load job will be generated for all the data
 * written since the last load job. BigQuery has limits on how many load jobs can be triggered
 * per day, so be careful not to set this duration too low, or you may exceed daily quota. Often
 * this is set to 5 or 10 minutes to ensure that the project stays well under the BigQuery
 * quota. See <a href="https://cloud.google.com/bigquery/quota-policy">Quota Policy</a> for more
 * information about BigQuery quotas.
 */
public Write<T> withTriggeringFrequency(Duration triggeringFrequency) {
 checkArgument(triggeringFrequency != null, "triggeringFrequency can not be null");
 return toBuilder().setTriggeringFrequency(triggeringFrequency).build();
}

@VisibleForTesting
/** This method is for test usage only */
public Write<T> withTestServices(BigQueryServices testServices) {
 checkArgument(testServices != null, "testServices can not be null");
 return toBuilder().setBigQueryServices(testServices).build();
}

/** Specifies whether the table should be created if it does not exist. */
public Write<T> withCreateDisposition(CreateDisposition createDisposition) {
 checkArgument(createDisposition != null, "createDisposition can not be null");
 return toBuilder().setCreateDisposition(createDisposition).build();
}

return new AutoValue_BigQueryIO_Write.Builder<T>()
  .setValidate(true)
  .setBigQueryServices(new BigQueryServicesImpl())
  .setCreateDisposition(Write.CreateDisposition.CREATE_IF_NEEDED)
  .setWriteDisposition(Write.WriteDisposition.WRITE_EMPTY)
  .setNumFileShards(0)
  .setMethod(Write.Method.DEFAULT)
  .setExtendedErrorInfo(false)
  .setSkipInvalidRows(false)
  .setIgnoreUnknownValues(false)
  .build();

@VisibleForTesting
Write<T> withMaxFilesPerBundle(int maxFilesPerBundle) {
 checkArgument(
   maxFilesPerBundle > 0, "maxFilesPerBundle must be > 0, but was: %s", maxFilesPerBundle);
 return toBuilder().setMaxFilesPerBundle(maxFilesPerBundle).build();
}

/**
 * Insert all valid rows of a request, even if invalid rows exist. This is only applicable when
 * the write method is set to {@link Method#STREAMING_INSERTS}. The default value is false,
 * which causes the entire request to fail if any invalid rows exist.
 */
public Write<T> skipInvalidRows() {
 return toBuilder().setSkipInvalidRows(true).build();
}

/**
 * Choose the method used to write data to BigQuery. See the Javadoc on {@link Method} for
 * information and restrictions of the different methods.
 */
public Write<T> withMethod(Method method) {
 checkArgument(method != null, "method can not be null");
 return toBuilder().setMethod(method).build();
}

/**
 * Allows the schemas for each table to be computed within the pipeline itself.
 *
 * <p>The input is a map-valued {@link PCollectionView} mapping string tablespecs to
 * JSON-formatted {@link TableSchema}s. Tablespecs must be in the same format as taken by {@link
 * #to(String)}.
 */
public Write<T> withSchemaFromView(PCollectionView<Map<String, String>> view) {
 checkArgument(view != null, "view can not be null");
 return toBuilder().setSchemaFromView(view).build();
}

/** Specifies the table description. */
public Write<T> withTableDescription(String tableDescription) {
 checkArgument(tableDescription != null, "tableDescription can not be null");
 return toBuilder().setTableDescription(tableDescription).build();
}

/** The same as {@link #withTimePartitioning}, but takes a JSON-serialized object. */
public Write<T> withJsonTimePartitioning(ValueProvider<String> partitioning) {
 checkArgument(partitioning != null, "partitioning can not be null");
 return toBuilder().setJsonTimePartitioning(partitioning).build();
}

/**
 * Writes to table specified by the specified table function. The table is a function of {@link
 * ValueInSingleWindow}, so can be determined by the value or by the window.
 */
public Write<T> to(
  SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction) {
 checkArgument(tableFunction != null, "tableFunction can not be null");
 return toBuilder().setTableFunction(tableFunction).build();
}

/** Same as {@link #withJsonSchema(String)} but using a deferred {@link ValueProvider}. */
public Write<T> withJsonSchema(ValueProvider<String> jsonSchema) {
 checkArgument(jsonSchema != null, "jsonSchema can not be null");
 return toBuilder().setJsonSchema(jsonSchema).build();
}

/** Writes to the table and schema specified by the {@link DynamicDestinations} object. */
public Write<T> to(DynamicDestinations<T, ?> dynamicDestinations) {
 checkArgument(dynamicDestinations != null, "dynamicDestinations can not be null");
 return toBuilder().setDynamicDestinations(dynamicDestinations).build();
}

/** Disables BigQuery table validation. */
public Write<T> withoutValidation() {
 return toBuilder().setValidate(false).build();
}

/** Formats the user's type into a {@link TableRow} to be written to BigQuery. */
public Write<T> withFormatFunction(SerializableFunction<T, TableRow> formatFunction) {
 checkArgument(formatFunction != null, "formatFunction can not be null");
 return toBuilder().setFormatFunction(formatFunction).build();
}

public Write<T> withLoadJobProjectId(ValueProvider<String> loadJobProjectId) {
 checkArgument(loadJobProjectId != null, "loadJobProjectId can not be null");
 return toBuilder().setLoadJobProjectId(loadJobProjectId).build();
}

@VisibleForTesting
Write<T> withMaxFileSize(long maxFileSize) {
 checkArgument(maxFileSize > 0, "maxFileSize must be > 0, but was: %s", maxFileSize);
 return toBuilder().setMaxFileSize(maxFileSize).build();
}

/**
 * Control how many file shards are written when using BigQuery load jobs. Applicable only when
 * also setting {@link #withTriggeringFrequency}. The default value is 1000.
 */
@Experimental
public Write<T> withNumFileShards(int numFileShards) {
 checkArgument(numFileShards > 0, "numFileShards must be > 0, but was: %s", numFileShards);
 return toBuilder().setNumFileShards(numFileShards).build();
}

/** Specifies what to do with existing data in the table, in case the table already exists. */
public Write<T> withWriteDisposition(WriteDisposition writeDisposition) {
 checkArgument(writeDisposition != null, "writeDisposition can not be null");
 return toBuilder().setWriteDisposition(writeDisposition).build();
}

/** Same as {@link #to(String)}, but with a {@link ValueProvider}. */
public Write<T> to(ValueProvider<String> tableSpec) {
 checkArgument(tableSpec != null, "tableSpec can not be null");
 return toBuilder()
   .setJsonTableRef(
     NestedValueProvider.of(
       NestedValueProvider.of(tableSpec, new TableSpecToTableRef()),
       new TableRefToJson()))
   .build();
}

How to useBigQueryIO$Write$Builder in org.apache.beam.sdk.io.gcp.bigquery

Best Java code snippets using org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write$Builder (Showing top 20 results out of 315)

How to use
BigQueryIO$Write$Builder
in
org.apache.beam.sdk.io.gcp.bigquery