org.apache.kafka.connect.storage.KafkaConfigBackingStore java code examples

/**
 * Remove configuration for a given connector.
 * @param connector name of the connector to remove
 */
@Override
public void removeConnectorConfig(String connector) {
  log.debug("Removing connector configuration for connector '{}'", connector);
  try {
    configLog.send(CONNECTOR_KEY(connector), null);
    configLog.send(TARGET_STATE_KEY(connector), null);
    configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
  } catch (InterruptedException | ExecutionException | TimeoutException e) {
    log.error("Failed to remove connector configuration from Kafka: ", e);
    throw new ConnectException("Error removing connector configuration from Kafka", e);
  }
}

KafkaBasedLog<String, byte[]> setupAndCreateKafkaBasedLog(String topic, final WorkerConfig config) {
  Map<String, Object> originals = config.originals();
  Map<String, Object> producerProps = new HashMap<>(originals);
  producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
  producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
  producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
  Map<String, Object> consumerProps = new HashMap<>(originals);
  consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
  consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
  Map<String, Object> adminProps = new HashMap<>(originals);
  NewTopic topicDescription = TopicAdmin.defineTopic(topic).
      compacted().
      partitions(1).
      replicationFactor(config.getShort(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG)).
      build();
  return createKafkaBasedLog(topic, producerProps, consumerProps, new ConsumeCallback(), topicDescription, adminProps);
}

public KafkaConfigBackingStore(Converter converter, WorkerConfig config, WorkerConfigTransformer configTransformer) {
  this.lock = new Object();
  this.started = false;
  this.converter = converter;
  this.offset = -1;
  this.topic = config.getString(DistributedConfig.CONFIG_TOPIC_CONFIG);
  if (this.topic == null || this.topic.trim().length() == 0)
    throw new ConfigException("Must specify topic for connector configuration.");
  configLog = setupAndCreateKafkaBasedLog(this.topic, config);
  this.configTransformer = configTransformer;
}

statusBackingStore.configure(config);
ConfigBackingStore configBackingStore = new KafkaConfigBackingStore(
    internalValueConverter,
    config,

log.debug("Writing configuration for connector '{}' task {}", connector, index);
ConnectorTaskId connectorTaskId = new ConnectorTaskId(connector, index);
configLog.send(TASK_KEY(connectorTaskId), serializedConfig);
index++;
byte[] serializedConfig = converter.fromConnectData(topic, CONNECTOR_TASKS_COMMIT_V0, connectConfig);
log.debug("Writing commit for connector '{}' with {} tasks.", connector, taskCount);
configLog.send(COMMIT_TASKS_KEY(connector), serializedConfig);

private void updateConnectorConfig(String connector, byte[] serializedConfig) {
  try {
    configLog.send(CONNECTOR_KEY(connector), serializedConfig);
    configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
  } catch (InterruptedException | ExecutionException | TimeoutException e) {
    log.error("Failed to write connector configuration to Kafka: ", e);
    throw new ConnectException("Error writing connector configuration to Kafka", e);
  }
}

@Override
public void putTargetState(String connector, TargetState state) {
  Struct connectTargetState = new Struct(TARGET_STATE_V0);
  connectTargetState.put("state", state.name());
  byte[] serializedTargetState = converter.fromConnectData(topic, TARGET_STATE_V0, connectTargetState);
  log.debug("Writing target state {} for connector {}", state, connector);
  configLog.send(TARGET_STATE_KEY(connector), serializedTargetState);
}

public ConnectEmbedded(Properties workerConfig, Properties... connectorConfigs) throws Exception {
  Time time = new SystemTime();
  DistributedConfig config = new DistributedConfig(Utils.propsToStringMap(workerConfig));
  KafkaOffsetBackingStore offsetBackingStore = new KafkaOffsetBackingStore();
  offsetBackingStore.configure(config);
  //not sure if this is going to work but because we don't have advertised url we can get at least a fairly random
  String workerId = UUID.randomUUID().toString();
  worker = new Worker(workerId, time, config, offsetBackingStore);
  StatusBackingStore statusBackingStore = new KafkaStatusBackingStore(time, worker.getInternalValueConverter());
  statusBackingStore.configure(config);
  ConfigBackingStore configBackingStore = new KafkaConfigBackingStore(worker.getInternalValueConverter());
  configBackingStore.configure(config);
  //advertisedUrl = "" as we don't have the rest server - hopefully this will not break anything
  herder = new DistributedHerder(config, time, worker, statusBackingStore, configBackingStore, "");
  this.connectorConfigs = connectorConfigs;
  shutdownHook = new ShutdownHook();
}

public KafkaConnectDistributed(Map<String, String> workerProps) {
  Time time = new SystemTime();
  ConnectorFactory connectorFactory = new ConnectorFactory();
  DistributedConfig config = new DistributedConfig(workerProps);
  rest = new RestServer(config);
  URI advertisedUrl = rest.advertisedUrl();
  String workerId = advertisedUrl.getHost() + ":" + advertisedUrl.getPort();
  KafkaOffsetBackingStore offsetBackingStore = new KafkaOffsetBackingStore();
  offsetBackingStore.configure(config);
  Worker worker = new Worker(workerId, time, connectorFactory, config, offsetBackingStore);
  StatusBackingStore statusBackingStore = new KafkaStatusBackingStore(time, worker.getInternalValueConverter());
  statusBackingStore.configure(config);
  ConfigBackingStore configBackingStore = new KafkaConfigBackingStore(worker.getInternalValueConverter());
  configBackingStore.configure(config);
  DistributedHerder herder = new DistributedHerder(config, time, worker, statusBackingStore, configBackingStore, advertisedUrl.toString());
  connect = new Connect(herder, rest);
  try {
    connect.start();
  } catch (Throwable t) {
    LOGGER.error("Failed to start Connector error", t);
    rest.stop();
    connect.stop();
  }
}

new KafkaConfigBackingStore(
  worker.getInternalValueConverter(),
  configWithClientIdSuffix(workerProps, "config"),

Javadoc

Provides persistent storage of Kafka Connect connector configurations in a Kafka topic.

This class manages both connector and task configurations. It tracks three types of configuration entries:

1. Connector config: map of string -> string configurations passed to the Connector class, with support for expanding this format if necessary. (Kafka key: connector-[connector-id]). These configs are *not* ephemeral. They represent the source of truth. If the entire Connect cluster goes down, this is all that is really needed to recover. 2. Task configs: map of string -> string configurations passed to the Task class, with support for expanding this format if necessary. (Kafka key: task-[connector-id]-[task-id]). These configs are ephemeral; they are stored here to a) disseminate them to all workers while ensuring agreement and b) to allow faster cluster/worker recovery since the common case of recovery (restoring a connector) will simply result in the same configuration as before the failure. 3. Task commit "configs": records indicating that previous task config entries should be committed and all task configs for a connector can be applied. (Kafka key: commit-[connector-id]. This config has two effects. First, it records the number of tasks the connector is currently running (and can therefore increase/decrease parallelism). Second, because each task config is stored separately but they need to be applied together to ensure each partition is assigned to a single task, this record also indicates that task configs for the specified connector can be "applied" or "committed".

This configuration is expected to be stored in a *single partition* and *compacted* topic. Using a single partition ensures we can enforce ordering on messages, allowing Kafka to be used as a write ahead log. Compaction allows us to clean up outdated configurations over time. However, this combination has some important implications for the implementation of this class and the configuration state that it may expose.

Connector configurations are independent of all other configs, so they are handled easily. Writing a single record is already atomic, so these can be applied as soon as they are read. One connectors config does not affect any others, and they do not need to coordinate with the connector's task configuration at all.

The most obvious implication for task configs is the need for the commit messages. Because Kafka does not currently have multi-record transactions or support atomic batch record writes, task commit messages are required to ensure that readers do not end up using inconsistent configs. For example, consider if a connector wrote configs for its tasks, then was reconfigured and only managed to write updated configs for half its tasks. If task configs were applied immediately you could be using half the old configs and half the new configs. In that condition, some partitions may be double-assigned because the old config and new config may use completely different assignments. Therefore, when reading the log, we must buffer config updates for a connector's tasks and only apply atomically them once a commit message has been read.

However, there are also further challenges. This simple buffering approach would work fine as long as the entire log was always available, but we would like to be able to enable compaction so our configuration topic does not grow indefinitely. Compaction may break a normal log because old entries will suddenly go missing. A new worker reading from the beginning of the log in order to build up the full current configuration will see task commits, but some records required for those commits will have been removed because the same keys have subsequently been rewritten. For example, if you have a sequence of record keys [connector-foo-config, task-foo-1-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config, commit-foo (1 task)], we can end up with a compacted log containing [connector-foo-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config, commit-foo (1 task)]. When read back, the first commit will see an invalid state because the first task-foo-1-config has been cleaned up.

Compaction can further complicate things if writing new task configs fails mid-write. Consider a similar scenario as the previous one, but in this case both the first and second update will write 2 task configs. However, the second write fails half of the way through: [connector-foo-config, task-foo-1-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config]. Now compaction occurs and we're left with [connector-foo-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config]. At the first commit, we don't have a complete set of configs. And because of the failure, there is no second commit. We are left in an inconsistent state with no obvious way to resolve the issue -- we can try to keep on reading, but the failed node may never recover and write the updated config. Meanwhile, other workers may have seen the entire log; they will see the second task-foo-1-config waiting to be applied, but will otherwise think everything is ok -- they have a valid set of task configs for connector "foo".

Because we can encounter these inconsistencies and addressing them requires support from the rest of the system (resolving the task configuration inconsistencies requires support from the connector instance to regenerate updated configs), this class exposes not only the current set of configs, but also which connectors have inconsistent data. This allows users of this class (i.e., Herder implementations) to take action to resolve any inconsistencies. These inconsistencies should be rare (as described above, due to compaction combined with leader failures in the middle of updating task configurations).

Note that the expectation is that this config storage system has only a single writer at a time. The caller (Herder) must ensure this is the case. In distributed mode this will require forwarding config change requests to the leader in the cluster (i.e. the worker group coordinated by the Kafka broker).

Since processing of the config log occurs in a background thread, callers must take care when using accessors. To simplify handling this correctly, this class only exposes a mechanism to snapshot the current state of the cluster. Updates may continue to be applied (and callbacks invoked) in the background. Callers must take care that they are using a consistent snapshot and only update when it is safe. In particular, if task configs are updated which require synchronization across workers to commit offsets and update the configuration, callbacks and updates during the rebalance must be deferred.

Most used methods

Popular in Java

Reading from database using SQL prepared statement
onCreateOptionsMenu (Activity)
setScale (BigDecimal)
compareTo (BigDecimal)
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
Collectors (java.util.stream)
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Top PhpStorm plugins

How to useKafkaConfigBackingStore in org.apache.kafka.connect.storage

Best Java code snippets using org.apache.kafka.connect.storage.KafkaConfigBackingStore (Showing top 10 results out of 315)

How to use
KafkaConfigBackingStore
in
org.apache.kafka.connect.storage