private static int getDesiredNumUnboundedSourceSplits(DataflowPipelineOptions options) { int cores = 4; //TODO: decide at runtime? if (options.getMaxNumWorkers() > 0) { return options.getMaxNumWorkers() * cores; } else if (options.getNumWorkers() > 0) { return options.getNumWorkers() * cores; } else { return 5 * cores; } }
/** Get the project this job exists in. */ public String getProjectId() { return dataflowOptions.getProject(); }
/** Get the region this job exists in. */ public String getRegion() { return dataflowOptions.getRegion(); }
private Pipeline createTestStreamingRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setStreaming(true); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
@Test public void testStagingLocation() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setPathValidatorClass(NoopPathValidator.class); options.setTempLocation("gs://temp_location"); options.setStagingLocation("gs://staging_location"); assertEquals("gs://temp_location", options.getGcpTempLocation()); assertEquals("gs://staging_location", options.getStagingLocation()); }
@Test public void testInvalidNumberOfWorkerHarnessThreads() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); FileSystems.setDefaultPipelineOptions(options); options.setRunner(DataflowRunner.class); options.setProject("foo-12345"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); options.as(DataflowPipelineDebugOptions.class).setNumberOfWorkerHarnessThreads(-1); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Number of worker harness threads"); thrown.expectMessage("Please make sure the value is non-negative."); DataflowRunner.fromOptions(options); }
@Override public String toString() { return "DataflowRunner#" + options.getJobName(); }
/** * Overrides the default log level for the passed in package. * * <p>This is equivalent to calling {@link #addOverrideForName(String, * DataflowWorkerLoggingOptions.Level)} and passing in the {@link Package#getName() package * name}. */ public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) { checkNotNull(pkg, "Expected package to be not null."); addOverrideForName(pkg.getName(), level); return this; }
@Override public DataflowPackage stageToFile(byte[] bytes, String baseName) { try (PackageUtil packageUtil = PackageUtil.withDefaultThreadPool()) { return packageUtil.stageToFile( bytes, baseName, options.getStagingLocation(), buildCreateOptions()); } }
/** * Overrides the default log level for the passed in name. * * <p>Note that because of the hierarchical nature of logger names, this will override the log * level of all loggers that have the passed in name or a parent logger that has the passed in * name. */ public WorkerLogLevelOverrides addOverrideForName(String name, Level level) { checkNotNull(name, "Expected name to be not null."); checkNotNull(level, "Expected level to be one of %s.", Arrays.toString(Level.values())); put(name, level); return this; }
public static DataflowClient create(DataflowPipelineOptions options) { return new DataflowClient(options.getDataflowClient(), options); }
private static boolean isFnApi(DataflowPipelineOptions options) { List<String> experiments = options.getExperiments(); return experiments != null && experiments.contains("beam_fn_api"); }
/** Returns true if the specified experiment is enabled, handling null experiments. */ public static boolean hasExperiment(DataflowPipelineDebugOptions options, String experiment) { List<String> experiments = firstNonNull(options.getExperiments(), Collections.<String>emptyList()); return experiments.contains(experiment); }
@Override public Stager create(PipelineOptions options) { DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class); return InstanceBuilder.ofType(Stager.class) .fromClass(debugOptions.getStagerClass()) .fromFactoryMethod("fromOptions") .withArg(PipelineOptions.class, options) .build(); } }
@Test public void testValidProfileLocation() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); options.setSaveProfilesToGcs(VALID_PROFILE_BUCKET); DataflowRunner.fromOptions(options); }
private Pipeline createTestBatchRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
/** * Overrides the default log level for the passed in class. * * <p>This is equivalent to calling {@link #addOverrideForName(String, * DataflowWorkerLoggingOptions.Level)} and passing in the {@link Class#getName() class name}. */ public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level) { checkNotNull(klass, "Expected class to be not null."); addOverrideForName(klass.getName(), level); return this; }
/** * Stages files to {@link DataflowPipelineOptions#getStagingLocation()}, suffixed with their md5 * hash to avoid collisions. * * <p>Uses {@link DataflowPipelineOptions#getGcsUploadBufferSizeBytes()}. */ @Override public List<DataflowPackage> stageFiles(List<String> filesToStage) { try (PackageUtil packageUtil = PackageUtil.withDefaultThreadPool()) { return packageUtil.stageClasspathElements( filesToStage, options.getStagingLocation(), buildCreateOptions()); } }
default boolean isFnApi() { List<String> experiments = getPipelineOptions().getExperiments(); return experiments != null && experiments.contains("beam_fn_api"); }
/** * Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey} is not * expanded. This is used for verifying that even without expansion the proper errors show up. */ private Pipeline createTestServiceRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }