@Stateful(scopes = Scope.CLUSTER, description = "Mock for QueryDatabaseTableRecord processor") private static class MockQueryDatabaseTableRecord extends QueryDatabaseTableRecord { void putColumnType(String colName, Integer colType) { columnTypeMap.put(colName, colType); } } }
@Stateful(scopes = Scope.CLUSTER, description = "Mock for QueryDatabaseTable processor") private static class MockQueryDatabaseTable extends QueryDatabaseTable { void putColumnType(String colName, Integer colType) { columnTypeMap.put(colName, colType); } } }
@Tags({"Apache", "Solr", "Get", "Pull", "Records"}) @InputRequirement(Requirement.INPUT_FORBIDDEN) @CapabilityDescription("Queries Solr and outputs the results as a FlowFile in the format of XML or using a Record Writer") @Stateful(scopes = {Scope.CLUSTER}, description = "Stores latest date of Date Field so that the same data will not be fetched multiple times.") public class GetSolr extends SolrProcessor {
@Tags({"provenance", "lineage", "tracking", "site", "site to site"}) @CapabilityDescription("Publishes Provenance events using the Site To Site protocol.") @Stateful(scopes = Scope.LOCAL, description = "Stores the Reporting Task's last event Id so that on restart the task knows where it left off.") @Restricted( restrictions = {
@Stateful(scopes = {Scope.LOCAL, Scope.CLUSTER}, description = "Scripts can store and retrieve state using the State Management APIs. Consult the State Manager section of the Developer's Guide for more details.") @SeeAlso({InvokeScriptedProcessor.class})
@WritesAttribute(attribute = ROLLING_WINDOW_MEAN_KEY, description = "The mean of the FlowFiles seen in the rolling window.") }) @Stateful(scopes = {Scope.LOCAL}, description = "Store the values backing the rolling window. This includes storing the individual values and their time-stamps or the batches of values and their " + "counts.") public class AttributeRollingWindow extends AbstractProcessor {
@WritesAttribute(attribute = "mime.type", description = "The MIME Type of the FlowFile, as reported by the HTTP Content-Type header") }) @Stateful(scopes = {Scope.LOCAL}, description = "Stores Last Modified Time and ETag headers returned by server so that the same data will not be fetched multiple times.") public class GetHTTP extends AbstractSessionFactoryProcessor {
@WritesAttribute(attribute="splunk.latest.time", description = "The value of the latest time that was used when performing the query.") }) @Stateful(scopes = Scope.CLUSTER, description = "If using one of the managed Time Range Strategies, this processor will " + "store the values of the latest and earliest times from the previous execution so that the next execution of the " + "can pick up where the last execution left off. The state will be cleared and start over if the query is changed.")
expressionLanguageScope = ExpressionLanguageScope.VARIABLE_REGISTRY, description = "Adds the specified property name/value as a query parameter in the Elasticsearch URL used for processing") @Stateful(description = "After each successful scroll page, the latest scroll_id is persisted in scrollId as input for the next scroll call. " + "Once the entire query is complete, finishedQuery state will be set to true, and the processor will not execute unless this is cleared.", scopes = { Scope.LOCAL }) public class ScrollElasticsearchHttp extends AbstractElasticsearchHttpProcessor {
@WritesAttribute(attribute = "inactivityStartMillis", description = "The time at which Inactivity began, in the form of milliseconds since Epoch"), @WritesAttribute(attribute = "inactivityDurationMillis", description = "The number of milliseconds that the inactivity has spanned")}) @Stateful(scopes = Scope.CLUSTER, description = "MonitorActivity stores the last timestamp at each node as state, so that it can examine activity at cluster wide." + "If 'Copy Attribute' is set to true, then flow file attributes are also persisted.") public class MonitorActivity extends AbstractProcessor {
"processor's watch directories there will be a never ending flow of events. It is also important to be aware that this processor must consume all events. The filtering must happen within " + "the processor. This is because the HDFS admin's event notifications API does not have filtering.") @Stateful(scopes = Scope.CLUSTER, description = "The last used transaction id is stored. This is used ") @SeeAlso({GetHDFS.class, FetchHDFS.class, PutHDFS.class, ListHDFS.class}) public class GetHDFSEvents extends AbstractHadoopProcessor {
+ "a timer or cron expression, using the standard scheduling methods. This processor is intended to be run on the Primary Node only. FlowFile attribute " + "'querydbtable.row.count' indicates how many rows were selected.") @Stateful(scopes = Scope.CLUSTER, description = "After performing a query on the specified table, the maximum values for " + "the specified column(s) will be retained for use in future executions of the query. This allows the Processor " + "to fetch only those records that have max values greater than the retained values. This can be used for "
@WritesAttribute(attribute = "db.table.count", description = "Contains the number of rows in the table") }) @Stateful(scopes = {Scope.CLUSTER}, description = "After performing a listing of tables, the timestamp of the query is stored. " + "This allows the Processor to not re-list tables the next time that the Processor is run. Specifying the refresh interval in the processor properties will " + "indicate that when the processor detects the interval has elapsed, the state will be reset and tables will be re-listed as a result. "
+ "a timer or cron expression, using the standard scheduling methods. This processor is intended to be run on the Primary Node only. FlowFile attribute " + "'querydbtable.row.count' indicates how many rows were selected.") @Stateful(scopes = Scope.CLUSTER, description = "After performing a query on the specified table, the maximum values for " + "the specified column(s) will be retained for use in future executions of the query. This allows the Processor " + "to fetch only those records that have max values greater than the retained values. This can be used for "
@WritesAttribute(attribute = "path", description = "The fully qualified name of the directory on the SFTP Server from which the file was pulled"), }) @Stateful(scopes = {Scope.CLUSTER}, description = "After performing a listing of files, the timestamp of the newest file is stored. " + "This allows the Processor to list only files that have been added or modified after " + "this date the next time that the Processor is run. State is stored across the cluster so that this Processor can be run on Primary Node only and if "
@WritesAttribute(attribute = "path", description = "The fully qualified name of the directory on the SFTP Server from which the file was pulled"), }) @Stateful(scopes = {Scope.CLUSTER}, description = "After performing a listing of files, the timestamp of the newest file is stored. " + "This allows the Processor to list only files that have been added or modified after " + "this date the next time that the Processor is run. State is stored across the cluster so that this Processor can be run on Primary Node only and if "
@WritesAttribute(attribute = "lang", description = "Language code for the content"), @WritesAttribute(attribute = "azure.blobtype", description = "This is the type of blob and can be either page or block type") }) @Stateful(scopes = { Scope.CLUSTER }, description = "After performing a listing of blobs, the timestamp of the newest blob is stored. " + "This allows the Processor to list only blobs that have been added or modified after this date the next time that the Processor is run. State is " + "stored across the cluster so that this Processor can be run on Primary Node only and if a new Primary Node is selected, the new node can pick up " +
+ "in a cluster. If the primary node changes, the new Primary Node will pick up where the previous node left off without duplicating " + "all of the data.") @Stateful(scopes = Scope.CLUSTER, description = "After performing a listing of keys, the timestamp of the newest key is stored, " + "along with the keys that share that same timestamp. This allows the Processor to list only keys that have been added or modified after " + "this date the next time that the Processor is run. State is stored across the cluster so that this Processor can be run on Primary Node only and if a new Primary "
+ " - If incoming connection(s) are specified and a flow file is available to a processor task, the flow file's attributes may be used in Expression Language for such fields " + "as Table Name and others. However, the Max-Value Columns and Columns to Return fields must be empty or refer to columns that are available in each specified table.") @Stateful(scopes = Scope.CLUSTER, description = "After performing a query on the specified table, the maximum values for " + "the specified column(s) will be retained for use in future executions of the query. This allows the Processor " + "to fetch only those records that have max values greater than the retained values. This can be used for "
+ "in a cluster. If the primary node changes, the new Primary Node will pick up where the previous node left off without duplicating " + "all of the data.") @Stateful(scopes = Scope.CLUSTER, description = "After performing a listing of keys, the timestamp of the newest key is stored, " + "along with the keys that share that same timestamp. This allows the Processor to list only keys that have been added or modified after " + "this date the next time that the Processor is run. State is stored across the cluster so that this Processor can be run on Primary Node only and if a new Primary "