@Override public DataflowPackage stageToFile(byte[] bytes, String baseName) { try (PackageUtil packageUtil = PackageUtil.withDefaultThreadPool()) { return packageUtil.stageToFile( bytes, baseName, options.getStagingLocation(), buildCreateOptions()); } }
/** * Stages files to {@link DataflowPipelineOptions#getStagingLocation()}, suffixed with their md5 * hash to avoid collisions. * * <p>Uses {@link DataflowPipelineOptions#getGcsUploadBufferSizeBytes()}. */ @Override public List<DataflowPackage> stageFiles(List<String> filesToStage) { try (PackageUtil packageUtil = PackageUtil.withDefaultThreadPool()) { return packageUtil.stageClasspathElements( filesToStage, options.getStagingLocation(), buildCreateOptions()); } }
/** * Stages {@link DataflowPipelineOptions#getFilesToStage()}, which defaults to every file on the * classpath unless overridden, as well as {@link * DataflowPipelineDebugOptions#getOverrideWindmillBinary()} if specified. * * @see #stageFiles(List) */ @Override public List<DataflowPackage> stageDefaultFiles() { checkNotNull(options.getStagingLocation()); String windmillBinary = options.as(DataflowPipelineDebugOptions.class).getOverrideWindmillBinary(); String dataflowWorkerJar = options.getDataflowWorkerJar(); List<String> filesToStage = options.getFilesToStage(); if (windmillBinary != null) { filesToStage.add("windmill_main=" + windmillBinary); } if (dataflowWorkerJar != null && !dataflowWorkerJar.isEmpty()) { filesToStage.add("dataflow-worker.jar=" + dataflowWorkerJar); } return stageFiles(filesToStage); }
stagingLocation = dataflowOptions.getStagingLocation(); } catch (Exception e) { throw new IllegalArgumentException(
@Test public void testDefaultInvalidGcpTempLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setGcpTempLocation("file://temp_location"); thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Error constructing default value for stagingLocation: gcpTempLocation is not" + " a valid GCS path"); thrown.expectCause(hasMessage(containsString("Expected a valid 'gs://' path"))); options.getStagingLocation(); }
@Test public void testDefaultNoneGcsTempLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setTempLocation("file://temp_location"); thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "Error constructing default value for stagingLocation: " + "failed to retrieve gcpTempLocation."); thrown.expectCause( hasMessage(containsString("Error constructing default value for gcpTempLocation"))); options.getStagingLocation(); }
@Test public void testDefaultStagingLocationUnset() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(""); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Error constructing default value for stagingLocation"); options.getStagingLocation(); } }
LOG.info("Staging pipeline description to {}", options.getStagingLocation()); byte[] serializedProtoPipeline = jobSpecification.getPipelineProto().toByteArray(); DataflowPackage stagedPipeline =
@Test public void testDefaultToGcpTempLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); FileSystems.setDefaultPipelineOptions(options); options.setPathValidatorClass(NoopPathValidator.class); options.setTempLocation("gs://temp_location/"); options.setGcpTempLocation("gs://gcp_temp_location/"); assertEquals("gs://gcp_temp_location/staging/", options.getStagingLocation()); }
@Test public void testStagingLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setPathValidatorClass(NoopPathValidator.class); options.setTempLocation("gs://temp_location"); options.setStagingLocation("gs://staging_location"); assertEquals("gs://temp_location", options.getGcpTempLocation()); assertEquals("gs://staging_location", options.getStagingLocation()); }
@Test public void testDefaultToTempLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); FileSystems.setDefaultPipelineOptions(options); options.setPathValidatorClass(NoopPathValidator.class); options.setTempLocation("gs://temp_location/"); assertEquals("gs://temp_location/", options.getGcpTempLocation()); assertEquals("gs://temp_location/staging/", options.getStagingLocation()); }
@Test public void testGcsStagingLocationInitialization() throws Exception { // Set temp location (required), and check that staging location is set. DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setTempLocation(VALID_TEMP_BUCKET); options.setProject(PROJECT_ID); options.setGcpCredential(new TestCredential()); options.setGcsUtil(mockGcsUtil); options.setRunner(DataflowRunner.class); DataflowRunner.fromOptions(options); assertNotNull(options.getStagingLocation()); }
assertThat(optionsMap, hasEntry("jobName", (Object) options.getJobName())); assertThat(optionsMap, hasEntry("tempLocation", (Object) options.getTempLocation())); assertThat(optionsMap, hasEntry("stagingLocation", (Object) options.getStagingLocation())); assertThat( optionsMap,