/** * A GCS path for storing temporary files in GCP. * * <p>Its default to {@link PipelineOptions#getTempLocation}. */ @Description("A GCS path for storing temporary files in GCP.") @Default.InstanceFactory(GcpTempLocationFactory.class) @Nullable String getGcpTempLocation();
/** * The resource stager instance that should be used to stage resources. If no stager has been set * explicitly, the default is to use the instance factory that constructs a resource stager based * upon the currently set stagerClass. */ @JsonIgnore @Description( "The resource stager instance that should be used to stage resources. " + "If no stager has been set explicitly, the default is to use the instance factory " + "that constructs a resource stager based upon the currently set stagerClass.") @Default.InstanceFactory(StagerFactory.class) Stager getStager();
/** * The path validator instance that should be used to validate paths. If no path validator has * been set explicitly, the default is to use the instance factory that constructs a path * validator based upon the currently set pathValidatorClass. */ @JsonIgnore @Description( "The path validator instance that should be used to validate paths. " + "If no path validator has been set explicitly, the default is to use the instance factory " + "that constructs a path validator based upon the currently set pathValidatorClass.") @Default.InstanceFactory(PathValidatorFactory.class) PathValidator getPathValidator();
@Description("GDELT file date") @Default.InstanceFactory(GDELTFileFactory.class) String getDate(); void setDate(String value);
@Override @Description("The beam sink class to which the metrics will be pushed") @Default.InstanceFactory(NoOpMetricsSink.class) Class<? extends MetricsSink> getMetricsSink();
@Description( "A watermark (time in millis) that causes a pipeline that reads " + "from an unbounded source to stop.") @Default.InstanceFactory(DefaultStopPipelineWatermarkFactory.class) Long getStopPipelineWatermark();
@Description( "A checkpoint directory for streaming resilience, ignored in batch. " + "For durability, a reliable filesystem such as HDFS/S3/GS is necessary.") @Default.InstanceFactory(TmpCheckpointDirFactory.class) String getCheckpointDir();
/** The GcsUtil instance that should be used to communicate with Google Cloud Storage. */ @JsonIgnore @Description("The GcsUtil instance that should be used to communicate with Google Cloud Storage.") @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class) @Hidden GcsUtil getGcsUtil();
@Description( "Project id. Required when running a Dataflow in the cloud. " + "See https://cloud.google.com/storage/docs/projects for further details.") @Override @Validation.Required @Default.InstanceFactory(DefaultProjectFactory.class) String getProject();
@Default.InstanceFactory(AvailableParallelismFactory.class) @Description( "Controls the amount of target parallelism the DirectRunner will use. Defaults to" + " the greater of the number of available processors and 3. Must be a value greater" + " than zero.") int getTargetParallelism();
/** Project id to use when launching jobs. */ @Description( "Project id. Required when using Google Cloud Platform services. " + "See https://cloud.google.com/storage/docs/projects for further details.") @Default.InstanceFactory(DefaultProjectFactory.class) String getProject();
/** * An instance of the Dataflow client. Defaults to creating a Dataflow client using the current * set of options. */ @JsonIgnore @Description( "An instance of the Dataflow client. Defaults to creating a Dataflow client " + "using the current set of options.") @Default.InstanceFactory(DataflowClientFactory.class) Dataflow getDataflowClient();
/** * Docker container image that executes Dataflow worker harness, residing in Google Container * Registry. */ @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class) @Description( "Docker container image that executes Dataflow worker harness, residing in Google " + " Container Registry.") @Hidden String getWorkerHarnessContainerImage();
@Description("The project that contains the table to export. Defaults to --project.") @Default.InstanceFactory(Utils.DefaultBigtableProjectFactory.class) @Validation.Required String getBigtableProject(); @SuppressWarnings("unused")
@Description( "A list of Hadoop configurations used to configure zero or more Hadoop filesystems. " + "By default, Hadoop configuration is loaded from 'core-site.xml' and 'hdfs-site.xml' " + "based upon the HADOOP_CONF_DIR and YARN_CONF_DIR environment variables. " + "To specify configuration on the command-line, represent the value as a JSON list of JSON " + "maps, where each map represents the entire configuration for a single Hadoop filesystem. " + "For example --hdfsConfiguration='[{\"fs.default.name\": \"hdfs://localhost:9998\", ...}," + "{\"fs.default.name\": \"s3a://\", ...},...]'") @Default.InstanceFactory(ConfigurationLocator.class) List<Configuration> getHdfsConfiguration();
/** * The credential instance that should be used to authenticate against GCP services. If no * credential has been set explicitly, the default is to use the instance factory that constructs * a credential based upon the currently set credentialFactoryClass. */ @JsonIgnore @Description( "The credential instance that should be used to authenticate against GCP services. " + "If no credential has been set explicitly, the default is to use the instance factory " + "that constructs a credential based upon the currently set credentialFactoryClass.") @Default.InstanceFactory(GcpUserCredentialsFactory.class) Credentials getGcpCredential();
/** * Provides a process wide unique ID for this {@link PipelineOptions} object, assigned at graph * construction time. */ @Hidden @Default.InstanceFactory(AtomicLongFactory.class) long getOptionsId();
@Description( "Size of S3 upload chunks; max upload object size is this value multiplied by 10000;" + "default is 64MB, or 5MB in memory-constrained environments. Must be at least 5MB.") @Default.InstanceFactory(S3UploadBufferSizeBytesFactory.class) Integer getS3UploadBufferSizeBytes();
@Description( "Name of the pipeline execution." + "It must match the regular expression '[a-z]([-a-z0-9]{0,38}[a-z0-9])?'." + "It defaults to ApplicationName-UserName-Date-RandomInteger") @Default.InstanceFactory(JobNameFactory.class) String getJobName();
/** * The pipeline runner that will be used to execute the pipeline. For registered runners, the * class name can be specified, otherwise the fully qualified name needs to be specified. */ @Validation.Required @Description( "The pipeline runner that will be used to execute the pipeline. " + "For registered runners, the class name can be specified, otherwise the fully " + "qualified name needs to be specified.") @Default.InstanceFactory(DirectRunner.class) Class<? extends PipelineRunner<?>> getRunner();