public BigQueryServicesWrapper(BigQueryOptions bqOptions) { this.bqServices = new BigQueryServicesImpl(); this.bqOptions = bqOptions; }
private void initializeBigQuery(Description description) throws IOException, InterruptedException { this.datasetService = new BigQueryServicesImpl().getDatasetService(pipelineOptions); this.table = createTable(description); }
/** * {@inheritDoc} * * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds. * * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts. */ @Override public void deleteDataset(String projectId, String datasetId) throws IOException, InterruptedException { executeWithRetries( client.datasets().delete(projectId, datasetId), String.format( "Unable to delete table: %s, aborting after %d retries.", datasetId, MAX_RPC_RETRIES), Sleeper.DEFAULT, createDefaultBackoff(), ALWAYS_RETRY); }
@VisibleForTesting static <T> T executeWithRetries( AbstractGoogleClientRequest<T> request, String errorMessage, Sleeper sleeper, BackOff backoff, SerializableFunction<IOException, Boolean> shouldRetry) throws IOException, InterruptedException { Exception lastException = null; do { try { return request.execute(); } catch (IOException e) { lastException = e; if (!shouldRetry.apply(e)) { break; } LOG.info("Ignore the error and retry the request.", e); } } while (nextBackOff(sleeper, backoff)); throw new IOException(errorMessage, lastException); }
/** Returns a BigQuery client builder using the specified {@link BigQueryOptions}. */ private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { RetryHttpRequestInitializer httpRequestInitializer = new RetryHttpRequestInitializer(ImmutableList.of(404)); httpRequestInitializer.setWriteTimeout(options.getHTTPWriteTimeout()); return new Bigquery.Builder( Transport.getTransport(), Transport.getJsonFactory(), chainHttpRequestInitializer( options.getGcpCredential(), // Do not log 404. It clutters the output and is possibly even required by the caller. httpRequestInitializer)) .setApplicationName(options.getAppName()) .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); }
/** * {@inheritDoc} * * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds. * * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts. */ @Override public Dataset getDataset(String projectId, String datasetId) throws IOException, InterruptedException { return executeWithRetries( client.datasets().get(projectId, datasetId), String.format( "Unable to get dataset: %s, aborting after %d retries.", datasetId, MAX_RPC_RETRIES), Sleeper.DEFAULT, createDefaultBackoff(), DONT_RETRY_NOT_FOUND); }
@VisibleForTesting boolean isTableEmpty(TableReference tableRef, BackOff backoff, Sleeper sleeper) throws IOException, InterruptedException { TableDataList dataList = executeWithRetries( client .tabledata() .list(tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()), String.format( "Unable to list table data: %s, aborting after %d retries.", tableRef.getTableId(), MAX_RPC_RETRIES), sleeper, backoff, DONT_RETRY_NOT_FOUND); return dataList.getRows() == null || dataList.getRows().isEmpty(); }
public CreateTables( CreateDisposition createDisposition, DynamicDestinations<?, DestinationT> dynamicDestinations) { this(createDisposition, new BigQueryServicesImpl(), dynamicDestinations); }
@VisibleForTesting @Nullable Table getTable(TableReference ref, BackOff backoff, Sleeper sleeper) throws IOException, InterruptedException { try { return executeWithRetries( client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId()), String.format( "Unable to get table: %s, aborting after %d retries.", ref.getTableId(), MAX_RPC_RETRIES), sleeper, backoff, DONT_RETRY_NOT_FOUND); } catch (IOException e) { if (errorExtractor.itemNotFound(e)) { return null; } throw e; } }
BatchLoads( WriteDisposition writeDisposition, CreateDisposition createDisposition, boolean singletonTable, DynamicDestinations<?, DestinationT> dynamicDestinations, Coder<DestinationT> destinationCoder, ValueProvider<String> customGcsTempLocation, @Nullable ValueProvider<String> loadJobProjectId, boolean ignoreUnknownValues) { bigQueryServices = new BigQueryServicesImpl(); this.writeDisposition = writeDisposition; this.createDisposition = createDisposition; this.singletonTable = singletonTable; this.dynamicDestinations = dynamicDestinations; this.destinationCoder = destinationCoder; this.maxNumWritersPerBundle = DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE; this.maxFileSize = DEFAULT_MAX_FILE_SIZE; this.numFileShards = DEFAULT_NUM_FILE_SHARDS; this.triggeringFrequency = null; this.customGcsTempLocation = customGcsTempLocation; this.loadJobProjectId = loadJobProjectId; this.ignoreUnknownValues = ignoreUnknownValues; }
@Override public Table patchTableDescription( TableReference tableReference, @Nullable String tableDescription) throws IOException, InterruptedException { Table table = new Table(); table.setDescription(tableDescription); return executeWithRetries( client .tables() .patch( tableReference.getProjectId(), tableReference.getDatasetId(), tableReference.getTableId(), table), String.format( "Unable to patch table description: %s, aborting after %d retries.", tableReference, MAX_RPC_RETRIES), Sleeper.DEFAULT, createDefaultBackoff(), ALWAYS_RETRY); } }
private StreamingWriteTables( BigQueryServices bigQueryServices, InsertRetryPolicy retryPolicy, boolean extendedErrorInfo, boolean skipInvalidRows, boolean ignoreUnknownValues) { this.bigQueryServices = bigQueryServices; this.retryPolicy = retryPolicy; this.extendedErrorInfo = extendedErrorInfo; this.skipInvalidRows = skipInvalidRows; this.ignoreUnknownValues = ignoreUnknownValues; }
/** * {@inheritDoc} * * <p>Tries executing the RPC for at most {@code MAX_RPC_RETRIES} times until it succeeds. * * @throws IOException if it exceeds {@code MAX_RPC_RETRIES} attempts. */ @Override public void deleteTable(TableReference tableRef) throws IOException, InterruptedException { executeWithRetries( client .tables() .delete(tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId()), String.format( "Unable to delete table: %s, aborting after %d retries.", tableRef.getTableId(), MAX_RPC_RETRIES), Sleeper.DEFAULT, createDefaultBackoff(), ALWAYS_RETRY); }
/** Constructor. */ public StreamingInserts( CreateDisposition createDisposition, DynamicDestinations<?, DestinationT> dynamicDestinations) { this( createDisposition, dynamicDestinations, new BigQueryServicesImpl(), InsertRetryPolicy.alwaysRetry(), false, false, false); }
@Test public void testExecuteWithRetries() throws IOException, InterruptedException { Table testTable = new Table(); when(response.getContentType()).thenReturn(Json.MEDIA_TYPE); when(response.getStatusCode()).thenReturn(200); when(response.getContent()).thenReturn(toStream(testTable)); Table table = BigQueryServicesImpl.executeWithRetries( bigquery.tables().get("projectId", "datasetId", "tableId"), "Failed to get table.", Sleeper.DEFAULT, BackOff.STOP_BACKOFF, BigQueryServicesImpl.ALWAYS_RETRY); assertEquals(testTable, table); verify(response, times(1)).getStatusCode(); verify(response, times(1)).getContent(); verify(response, times(1)).getContentType(); }
/** * Reads from a BigQuery table or query and returns a {@link PCollection} with one element per * each row of the table or query result, parsed from the BigQuery AVRO format using the specified * function. * * <p>Each {@link SchemaAndRecord} contains a BigQuery {@link TableSchema} and a {@link * GenericRecord} representing the row, indexed by column name. Here is a sample parse function * that parses click events from a table. * * <pre>{@code * class ClickEvent { long userId; String url; ... } * * p.apply(BigQueryIO.read(new SerializableFunction<SchemaAndRecord, ClickEvent>() { * public ClickEvent apply(SchemaAndRecord record) { * GenericRecord r = record.getRecord(); * return new ClickEvent((Long) r.get("userId"), (String) r.get("url")); * } * }).from("..."); * }</pre> */ public static <T> TypedRead<T> read(SerializableFunction<SchemaAndRecord, T> parseFn) { return new AutoValue_BigQueryIO_TypedRead.Builder<T>() .setValidate(true) .setWithTemplateCompatibility(false) .setBigQueryServices(new BigQueryServicesImpl()) .setParseFn(parseFn) .build(); }
@Override public JobStatistics dryRunQuery( String projectId, JobConfigurationQuery queryConfig, String location) throws InterruptedException, IOException { JobReference jobRef = new JobReference().setLocation(location).setProjectId(projectId); Job job = new Job() .setJobReference(jobRef) .setConfiguration(new JobConfiguration().setQuery(queryConfig).setDryRun(true)); return executeWithRetries( client.jobs().insert(projectId, job), String.format( "Unable to dry run query: %s, aborting after %d retries.", queryConfig, MAX_RPC_RETRIES), Sleeper.DEFAULT, createDefaultBackoff(), ALWAYS_RETRY) .getStatistics(); }
return new AutoValue_BigQueryIO_Write.Builder<T>() .setValidate(true) .setBigQueryServices(new BigQueryServicesImpl()) .setCreateDisposition(Write.CreateDisposition.CREATE_IF_NEEDED) .setWriteDisposition(Write.WriteDisposition.WRITE_EMPTY)