/** Returns a BigQuery client builder using the specified {@link BigQueryOptions}. */ private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { RetryHttpRequestInitializer httpRequestInitializer = new RetryHttpRequestInitializer(ImmutableList.of(404)); httpRequestInitializer.setWriteTimeout(options.getHTTPWriteTimeout()); return new Bigquery.Builder( Transport.getTransport(), Transport.getJsonFactory(), chainHttpRequestInitializer( options.getGcpCredential(), // Do not log 404. It clutters the output and is possibly even required by the caller. httpRequestInitializer)) .setApplicationName(options.getAppName()) .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); }
@Override protected void cleanupTempResource(BigQueryOptions bqOptions) throws Exception { TableReference tableToRemove = createTempTableReference( bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid)); DatasetService tableService = bqServices.getDatasetService(bqOptions); LOG.info("Deleting temporary table with query results {}", tableToRemove); tableService.deleteTable(tableToRemove); LOG.info("Deleting temporary dataset with query results {}", tableToRemove.getDatasetId()); tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId()); }
private static Bigquery newBigQueryClient(BigQueryOptions options) { return new Bigquery.Builder( Transport.getTransport(), Transport.getJsonFactory(), chainHttpRequestInitializer( options.getGcpCredential(), // Do not log 404. It clutters the output and is possibly even required by the // caller. new RetryHttpRequestInitializer(ImmutableList.of(404)))) .setApplicationName(options.getAppName()) .setGoogleClientRequestInitializer(options.getGoogleApiTrace()) .build(); }
bqOptions.setProject("project"); String stepUuid = "testStepUuid"; bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid)); fakeDatasetService.createDataset( bqOptions.getProject(), tempTableReference.getDatasetId(), "", "", null); fakeDatasetService.createTable( new Table() .setProjectId(bqOptions.getProject()) .setDatasetId(tempTableReference.getDatasetId()) .setTableId(tempTableReference.getTableId()); bqOptions.getProject(), query, new JobStatistics()
@Override void cleanup(PassThroughThenCleanup.ContextContainer c) throws Exception { PipelineOptions options = c.getPipelineOptions(); BigQueryOptions bqOptions = options.as(BigQueryOptions.class); String jobUuid = c.getJobId(); final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", jobUuid); final String executingProject = bqOptions.getProject(); JobReference jobRef = new JobReference() .setProjectId(executingProject) .setJobId(getExtractJobId(createJobIdToken(bqOptions.getJobName(), jobUuid))); Job extractJob = getBigQueryServices().getJobService(bqOptions).getJob(jobRef); if (extractJob != null) { List<ResourceId> extractFiles = getExtractFilePaths(extractDestinationDir, extractJob); if (extractFiles != null && !extractFiles.isEmpty()) { FileSystems.delete( extractFiles, MoveOptions.StandardMoveOptions.IGNORE_MISSING_FILES); } } } };
BigqueryClient bigQueryClient = new BigqueryClient(bigQueryOptions.getAppName()); .queryWithRetries(options.getInput(), bigQueryOptions.getProject(), true) .getRows() .get(0); .queryUnflattened(options.getInput(), bigQueryOptions.getProject(), true) .get(0); .queryUnflattened(options.getUnflattenableInput(), bigQueryOptions.getProject(), true) .get(0);
protected ExtractResult extractFiles(PipelineOptions options) throws Exception { BigQueryOptions bqOptions = options.as(BigQueryOptions.class); TableReference tableToExtract = getTableToExtract(bqOptions); BigQueryServices.DatasetService datasetService = bqServices.getDatasetService(bqOptions); Table table = datasetService.getTable(tableToExtract); if (table == null) { throw new IOException( String.format( "Cannot start an export job since table %s does not exist", BigQueryHelpers.toTableSpec(tableToExtract))); } TableSchema schema = table.getSchema(); JobService jobService = bqServices.getJobService(bqOptions); String extractJobId = getExtractJobId(createJobIdToken(options.getJobName(), stepUuid)); final String extractDestinationDir = resolveTempLocation(bqOptions.getTempLocation(), "BigQueryExtractTemp", stepUuid); String bqLocation = BigQueryHelpers.getDatasetLocation( datasetService, tableToExtract.getProjectId(), tableToExtract.getDatasetId()); List<ResourceId> tempFiles = executeExtract( extractJobId, tableToExtract, jobService, bqOptions.getProject(), extractDestinationDir, bqLocation); return new ExtractResult(schema, tempFiles); }
@Test public void testBigQueryQuerySourceEstimatedSize() throws Exception { List<TableRow> data = ImmutableList.of( new TableRow().set("name", "A").set("number", 10L), new TableRow().set("name", "B").set("number", 11L), new TableRow().set("name", "C").set("number", 12L)); PipelineOptions options = PipelineOptionsFactory.create(); BigQueryOptions bqOptions = options.as(BigQueryOptions.class); bqOptions.setProject("project"); String stepUuid = "testStepUuid"; String query = FakeBigQueryServices.encodeQuery(data); BigQueryQuerySource<TableRow> bqSource = BigQueryQuerySource.create( stepUuid, ValueProvider.StaticValueProvider.of(query), true /* flattenResults */, true /* useLegacySql */, fakeBqServices, TableRowJsonCoder.of(), BigQueryIO.TableRowParser.INSTANCE, QueryPriority.BATCH, null); fakeJobService.expectDryRunQuery( bqOptions.getProject(), query, new JobStatistics().setQuery(new JobStatistics2().setTotalBytesProcessed(100L))); assertEquals(100, bqSource.getEstimatedSizeBytes(bqOptions)); }
/** * Sets the {@link TableReference#projectId} of the provided table reference to the id of the * default project if the table reference does not have a project ID specified. */ private TableReference setDefaultProjectIfAbsent( BigQueryOptions bqOptions, TableReference tableReference) { if (Strings.isNullOrEmpty(tableReference.getProjectId())) { checkState( !Strings.isNullOrEmpty(bqOptions.getProject()), "No project ID set in %s or %s, cannot construct a complete %s", TableReference.class.getSimpleName(), BigQueryOptions.class.getSimpleName(), TableReference.class.getSimpleName()); LOG.info( "Project ID not set in {}. Using default project from {}.", TableReference.class.getSimpleName(), BigQueryOptions.class.getSimpleName()); tableReference.setProjectId(bqOptions.getProject()); } return tableReference; }
@Before public void setupBqEnvironment() { Long timeSeed = System.currentTimeMillis(); Integer random = new Random(timeSeed).nextInt(900) + 100; this.bigQueryDatasetId = "bq_query_to_table_" + timeSeed.toString() + "_" + random.toString(); PipelineOptionsFactory.register(BigQueryToTableOptions.class); options = TestPipeline.testingPipelineOptions().as(BigQueryToTableOptions.class); options.setTempLocation(options.getTempRoot() + "/bq_it_temp"); project = TestPipeline.testingPipelineOptions().as(GcpOptions.class).getProject(); bqOption = options.as(BigQueryOptions.class); bqClient = new BigqueryClient(bqOption.getAppName()); bqClient.createNewDataset(project, this.bigQueryDatasetId); outputTable = project + ":" + this.bigQueryDatasetId + "." + BigQueryToTableIT.OUTPUT_TABLE_NAME; }
@Test public void testModelBigQueryIO() { // We cannot test BigQueryIO functionality in unit tests, therefore we limit ourselves // to making sure the pipeline containing BigQuery sources and sinks can be built. // // To run locally, set `runLocally` to `true`. You will have to set `project`, `dataset` and // `table` to the BigQuery table the test will write into. boolean runLocally = false; if (runLocally) { String project = "my-project"; String dataset = "samples"; // this must already exist String table = "modelBigQueryIO"; // this will be created if needed BigQueryOptions options = PipelineOptionsFactory.create().as(BigQueryOptions.class); options.setProject(project); options.setTempLocation("gs://" + project + "/samples/temp/"); Pipeline p = Pipeline.create(options); Snippets.modelBigQueryIO(p, project, dataset, table); p.run(); } else { Pipeline p = Pipeline.create(); Snippets.modelBigQueryIO(p); } }
options.setTempLocation(testFolder.getRoot().getAbsolutePath()); BigQueryOptions bqOptions = options.as(BigQueryOptions.class); bqOptions.setProject("project");
/** Returns a BigQuery client builder using the specified {@link BigQueryOptions}. */ private static Bigquery.Builder newBigQueryClient(BigQueryOptions options) { return new Bigquery.Builder( Transport.getTransport(), Transport.getJsonFactory(), chainHttpRequestInitializer( options.getGcpCredential(), // Do not log 404. It clutters the output and is possibly even required by the caller. new RetryHttpRequestInitializer(ImmutableList.of(404)))) .setApplicationName(options.getAppName()) .setGoogleClientRequestInitializer(options.getGoogleApiTrace()); }
String tempLocation = bqOptions.getTempLocation(); checkArgument( !Strings.isNullOrEmpty(tempLocation), try { jobService.dryRunQuery( bqOptions.getProject(), new JobConfigurationQuery() .setQuery(getQuery().get())
private synchronized JobStatistics dryRunQueryIfNeeded(BigQueryOptions bqOptions) throws InterruptedException, IOException { if (dryRunJobStats.get() == null) { JobStatistics jobStats = bqServices .getJobService(bqOptions) .dryRunQuery(bqOptions.getProject(), createBasicQueryConfig(), this.location); dryRunJobStats.compareAndSet(null, jobStats); } return dryRunJobStats.get(); }
String jobIdToken = createJobIdToken(bqOptions.getJobName(), stepUuid); TableReference tableToExtract = createTempTableReference(bqOptions.getProject(), jobIdToken); tableToExtract.getDatasetId(), location, "Temporary tables for query results of job " + bqOptions.getJobName(), bqOptions.getProject(), tableToExtract, bqServices.getJobService(bqOptions),
/** * Returns the table to write, or {@code null} if writing with {@code tableFunction}. * * <p>If the table's project is not specified, use the executing project. */ @Nullable ValueProvider<TableReference> getTableWithDefaultProject(BigQueryOptions bqOptions) { ValueProvider<TableReference> table = getTable(); if (table == null) { return table; } if (!table.isAccessible()) { LOG.info( "Using a dynamic value for table input. This must contain a project" + " in the table reference: {}", table); return table; } if (Strings.isNullOrEmpty(table.get().getProjectId())) { // If user does not specify a project we assume the table to be located in // the default project. TableReference tableRef = table.get(); tableRef.setProjectId(bqOptions.getProject()); return NestedValueProvider.of( StaticValueProvider.of(BigQueryHelpers.toJsonString(tableRef)), new JsonTableRefToTableRef()); } return table; }