private void setFetchThresholds() { // get the thresholds, and set defaults if not defined. KafkaConfig kafkaConfig = new KafkaConfig(config); Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName); long fetchThreshold = FETCH_THRESHOLD; if (fetchThresholdOption.isDefined()) { fetchThreshold = Long.valueOf(fetchThresholdOption.get()); } Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName); long fetchThresholdBytes = FETCH_THRESHOLD_BYTES; if (fetchThresholdBytesOption.isDefined()) { fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get()); } int numPartitions = topicPartitionsToSSP.size(); if (numPartitions != topicPartitionsToOffset.size()) { throw new SamzaException("topicPartitionsToSSP.size() doesn't match topicPartitionsToOffset.size()"); } if (numPartitions > 0) { perPartitionFetchThreshold = fetchThreshold / numPartitions; if (fetchThresholdBytesEnabled) { // currently this feature cannot be enabled, because we do not have the size of the messages available. // messages get double buffered, hence divide by 2 perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numPartitions; } } LOG.info("{}: fetchThresholdBytes = {}; fetchThreshold={}; numPartitions={}, perPartitionFetchThreshold={}, perPartitionFetchThresholdBytes(0 if disabled)={}", this, fetchThresholdBytes, fetchThreshold, numPartitions, perPartitionFetchThreshold, perPartitionFetchThresholdBytes); }
protected Properties createAdminClientProperties() { // populate brokerList from either consumer or producer configs Properties props = new Properties(); // included SSL settings if needed props.putAll(config.subset(String.format("systems.%s.consumer.", systemName), true)); //validate brokerList String brokerList = config.get( String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)); if (brokerList == null) { brokerList = config.get(String.format(KafkaConfig.PRODUCER_CONFIGS_CONFIG_KEY(), systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)); } if (brokerList == null) { throw new SamzaException( ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " is required for systemAdmin for system " + systemName); } props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); // kafka.admin.AdminUtils requires zkConnect // this will change after we move to the new org.apache..AdminClient String zkConnect = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT)); if (StringUtils.isBlank(zkConnect)) { throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName); } props.put(ZOOKEEPER_CONNECT, zkConnect); return props; }
/** * Filter out properties from the original config that are not supported by Kafka. * For example, we allow users to set replication.factor as a property of the streams * and then parse it out so we can pass it separately as Kafka requires. But Kafka * will also throw if replication.factor is passed as a property on a new topic. * * @param originalConfig The original config to filter * @return The filtered config */ private static Map<String, String> filterUnsupportedProperties(Map<String, String> originalConfig) { Map<String, String> filteredConfig = new HashMap<>(); for (Map.Entry<String, String> entry: originalConfig.entrySet()) { // Kafka requires replication factor, but not as a property, so we have to filter it out. if (!KafkaConfig.TOPIC_REPLICATION_FACTOR().equals(entry.getKey())) { if (LogConfig.configNames().contains(entry.getKey())) { filteredConfig.put(entry.getKey(), entry.getValue()); } else { LOG.warn("Property '{}' is not a valid Kafka topic config. It will be ignored.", entry.getKey()); } } } return filteredConfig; }
/** * Create a KafkaSystemConsumer for the provided {@code systemName} * @param kafkaConsumer kafka Consumer object to be used by this system consumer * @param systemName system name for which we create the consumer * @param config application config * @param clientId clientId from the kafka consumer to be used in the KafkaConsumerProxy * @param metrics metrics for this KafkaSystemConsumer * @param clock system clock */ public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) { super(metrics.registry(), clock, metrics.getClass().getName()); this.kafkaConsumer = kafkaConsumer; this.clientId = clientId; this.systemName = systemName; this.config = config; this.metrics = metrics; fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName); // create a sink for passing the messages between the proxy and the consumer messageSink = new KafkaConsumerMessageSink(); // Create the proxy to do the actual message reading. String metricName = String.format("%s-%s", systemName, clientId); proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, metrics, metricName); LOG.info("{}: Created KafkaConsumerProxy {} ", this, proxy); }
KafkaConfig kafkaConfig = new KafkaConfig(config); coordinatorStreamReplicationFactor = Integer.valueOf(kafkaConfig.getCoordinatorReplicationFactor()); coordinatorStreamProperties = KafkaSystemAdminUtilsScala.getCoordinatorTopicProperties(kafkaConfig); JavaConverters.mapAsJavaMapConverter(kafkaConfig.getKafkaChangelogEnabledStores()).asJava(); String storeName = e.getKey(); String topicName = e.getValue(); String replicationFactorStr = kafkaConfig.getChangelogStreamReplicationFactor(storeName); int replicationFactor = StringUtils.isEmpty(replicationFactorStr) ? DEFAULT_REPL_FACTOR : Integer.valueOf(replicationFactorStr); ChangelogInfo changelogInfo = new ChangelogInfo(replicationFactor, kafkaConfig.getChangelogKafkaProperties(storeName)); LOG.info(String.format("Creating topic meta information for topic: %s with replication factor: %s", topicName, replicationFactor));
/** * Converts any StreamSpec to a KafkaStreamSpec. * If the original spec already is a KafkaStreamSpec, it is simply returned. * * @param originalSpec The StreamSpec instance to convert to KafkaStreamSpec. * @return A KafkaStreamSpec instance. */ public static KafkaStreamSpec fromSpec(StreamSpec originalSpec) { if (originalSpec instanceof KafkaStreamSpec) { return ((KafkaStreamSpec) originalSpec); } int replicationFactor = Integer.parseInt(originalSpec.getOrDefault( KafkaConfig.TOPIC_REPLICATION_FACTOR(), KafkaConfig.TOPIC_DEFAULT_REPLICATION_FACTOR())); return new KafkaStreamSpec( originalSpec.getId(), originalSpec.getPhysicalName(), originalSpec.getSystemName(), originalSpec.getPartitionCount(), replicationFactor, mapToProperties(filterUnsupportedProperties(originalSpec.getConfig()))); }
@Test public void testGetCheckpointTopicProperties() { Map<String, String> config = new HashMap<>(); Properties properties = new KafkaConfig(new MapConfig(config)).getCheckpointTopicProperties(); assertEquals(properties.getProperty("cleanup.policy"), "compact"); assertEquals(properties.getProperty("segment.bytes"), String.valueOf(KafkaConfig.DEFAULT_CHECKPOINT_SEGMENT_BYTES())); config.put(ApplicationConfig.APP_MODE, ApplicationConfig.ApplicationMode.BATCH.name()); properties = new KafkaConfig(new MapConfig(config)).getCheckpointTopicProperties(); assertEquals(properties.getProperty("cleanup.policy"), "compact,delete"); assertEquals(properties.getProperty("segment.bytes"), String.valueOf(KafkaConfig.DEFAULT_CHECKPOINT_SEGMENT_BYTES())); assertEquals(properties.getProperty("retention.ms"), String.valueOf(KafkaConfig.DEFAULT_RETENTION_MS_FOR_BATCH())); } }
private Supplier<ZkUtils> getZkConnection() { String zkConnect = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT)); if (StringUtils.isBlank(zkConnect)) { throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName); } return () -> ZkUtils.apply(zkConnect, 6000, 6000, false); }
@Before public void setUp() throws Exception { Map<String, String> configMap = new HashMap<>(); configMap.put(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), TEST_SYSTEM, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG), "localhost:123"); configMap.put(String.format(KafkaConfig.CONSUMER_ZK_CONNECT_CONFIG_KEY(), TEST_SYSTEM), "localhost:124"); configMap.put(JobConfig.JOB_NAME(), "jobName"); configMap.put(JobConfig.JOB_ID(), "jobId");
KafkaConfig kafkaConfig = new KafkaConfig(config); coordinatorStreamReplicationFactor = Integer.valueOf(kafkaConfig.getCoordinatorReplicationFactor()); coordinatorStreamProperties = KafkaSystemAdminUtilsScala.getCoordinatorTopicProperties(kafkaConfig); JavaConverters.mapAsJavaMapConverter(kafkaConfig.getKafkaChangelogEnabledStores()).asJava(); String storeName = e.getKey(); String topicName = e.getValue(); String replicationFactorStr = kafkaConfig.getChangelogStreamReplicationFactor(storeName); int replicationFactor = StringUtils.isEmpty(replicationFactorStr) ? DEFAULT_REPL_FACTOR : Integer.valueOf(replicationFactorStr); ChangelogInfo changelogInfo = new ChangelogInfo(replicationFactor, kafkaConfig.getChangelogKafkaProperties(storeName)); LOG.info(String.format("Creating topic meta information for topic: %s with replication factor: %s", topicName, replicationFactor));
/** * Create a KafkaSystemConsumer for the provided {@code systemName} * @param kafkaConsumer kafka Consumer object to be used by this system consumer * @param systemName system name for which we create the consumer * @param config application config * @param clientId clientId from the kafka consumer to be used in the KafkaConsumerProxy * @param metrics metrics for this KafkaSystemConsumer * @param clock system clock */ public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) { super(metrics.registry(), clock, metrics.getClass().getName()); this.kafkaConsumer = kafkaConsumer; this.clientId = clientId; this.systemName = systemName; this.config = config; this.metrics = metrics; fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName); // create a sink for passing the messages between the proxy and the consumer messageSink = new KafkaConsumerMessageSink(); // Create the proxy to do the actual message reading. String metricName = String.format("%s-%s", systemName, clientId); proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, metrics, metricName); LOG.info("{}: Created KafkaConsumerProxy {} ", this, proxy); }
/** * Converts any StreamSpec to a KafkaStreamSpec. * If the original spec already is a KafkaStreamSpec, it is simply returned. * * @param originalSpec The StreamSpec instance to convert to KafkaStreamSpec. * @return A KafkaStreamSpec instance. */ public static KafkaStreamSpec fromSpec(StreamSpec originalSpec) { if (originalSpec instanceof KafkaStreamSpec) { return ((KafkaStreamSpec) originalSpec); } int replicationFactor = Integer.parseInt(originalSpec.getOrDefault( KafkaConfig.TOPIC_REPLICATION_FACTOR(), KafkaConfig.TOPIC_DEFAULT_REPLICATION_FACTOR())); return new KafkaStreamSpec( originalSpec.getId(), originalSpec.getPhysicalName(), originalSpec.getSystemName(), originalSpec.getPartitionCount(), replicationFactor, mapToProperties(filterUnsupportedProperties(originalSpec.getConfig()))); }
private Supplier<ZkUtils> getZkConnection() { String zkConnect = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT)); if (StringUtils.isBlank(zkConnect)) { throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName); } return () -> ZkUtils.apply(zkConnect, 6000, 6000, false); }
adminClient = AdminClient.create(props); KafkaConfig kafkaConfig = new KafkaConfig(config); coordinatorStreamReplicationFactor = Integer.valueOf(kafkaConfig.getCoordinatorReplicationFactor()); coordinatorStreamProperties = KafkaSystemAdminUtilsScala.getCoordinatorTopicProperties(kafkaConfig); JavaConverters.mapAsJavaMapConverter(kafkaConfig.getKafkaChangelogEnabledStores()).asJava(); String storeName = e.getKey(); String topicName = e.getValue(); String replicationFactorStr = kafkaConfig.getChangelogStreamReplicationFactor(storeName); int replicationFactor = StringUtils.isEmpty(replicationFactorStr) ? DEFAULT_REPL_FACTOR : Integer.valueOf(replicationFactorStr); ChangelogInfo changelogInfo = new ChangelogInfo(replicationFactor, kafkaConfig.getChangelogKafkaProperties(storeName)); LOG.info(String.format("Creating topic meta information for topic: %s with replication factor: %s", topicName, replicationFactor));
private void setFetchThresholds() { // get the thresholds, and set defaults if not defined. KafkaConfig kafkaConfig = new KafkaConfig(config); Option<String> fetchThresholdOption = kafkaConfig.getConsumerFetchThreshold(systemName); long fetchThreshold = FETCH_THRESHOLD; if (fetchThresholdOption.isDefined()) { fetchThreshold = Long.valueOf(fetchThresholdOption.get()); } Option<String> fetchThresholdBytesOption = kafkaConfig.getConsumerFetchThresholdBytes(systemName); long fetchThresholdBytes = FETCH_THRESHOLD_BYTES; if (fetchThresholdBytesOption.isDefined()) { fetchThresholdBytes = Long.valueOf(fetchThresholdBytesOption.get()); } int numPartitions = topicPartitionsToSSP.size(); if (numPartitions != topicPartitionsToOffset.size()) { throw new SamzaException("topicPartitionsToSSP.size() doesn't match topicPartitionsToOffset.size()"); } if (numPartitions > 0) { perPartitionFetchThreshold = fetchThreshold / numPartitions; if (fetchThresholdBytesEnabled) { // currently this feature cannot be enabled, because we do not have the size of the messages available. // messages get double buffered, hence divide by 2 perPartitionFetchThresholdBytes = (fetchThresholdBytes / 2) / numPartitions; } } LOG.info("{}: fetchThresholdBytes = {}; fetchThreshold={}; numPartitions={}, perPartitionFetchThreshold={}, perPartitionFetchThresholdBytes(0 if disabled)={}", this, fetchThresholdBytes, fetchThreshold, numPartitions, perPartitionFetchThreshold, perPartitionFetchThresholdBytes); }
protected Properties createAdminClientProperties() { // populate brokerList from either consumer or producer configs Properties props = new Properties(); // included SSL settings if needed props.putAll(config.subset(String.format("systems.%s.consumer.", systemName), true)); //validate brokerList String brokerList = config.get( String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)); if (brokerList == null) { brokerList = config.get(String.format(KafkaConfig.PRODUCER_CONFIGS_CONFIG_KEY(), systemName, ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG)); } if (brokerList == null) { throw new SamzaException( ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG + " is required for systemAdmin for system " + systemName); } props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); // kafka.admin.AdminUtils requires zkConnect // this will change after we move to the new org.apache..AdminClient String zkConnect = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT)); if (StringUtils.isBlank(zkConnect)) { throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName); } props.put(ZOOKEEPER_CONNECT, zkConnect); return props; }
/** * Create a KafkaSystemConsumer for the provided {@code systemName} * @param kafkaConsumer kafka Consumer object to be used by this system consumer * @param systemName system name for which we create the consumer * @param config application config * @param clientId clientId from the kafka consumer to be used in the KafkaConsumerProxy * @param metrics metrics for this KafkaSystemConsumer * @param clock system clock */ public KafkaSystemConsumer(Consumer<K, V> kafkaConsumer, String systemName, Config config, String clientId, KafkaSystemConsumerMetrics metrics, Clock clock) { super(metrics.registry(), clock, metrics.getClass().getName()); this.kafkaConsumer = kafkaConsumer; this.clientId = clientId; this.systemName = systemName; this.config = config; this.metrics = metrics; fetchThresholdBytesEnabled = new KafkaConfig(config).isConsumerFetchThresholdBytesEnabled(systemName); // create a sink for passing the messages between the proxy and the consumer messageSink = new KafkaConsumerMessageSink(); // Create the proxy to do the actual message reading. String metricName = String.format("%s-%s", systemName, clientId); proxy = new KafkaConsumerProxy(kafkaConsumer, systemName, clientId, messageSink, metrics, metricName); LOG.info("{}: Created KafkaConsumerProxy {} ", this, proxy); }
/** * Converts any StreamSpec to a KafkaStreamSpec. * If the original spec already is a KafkaStreamSpec, it is simply returned. * * @param originalSpec The StreamSpec instance to convert to KafkaStreamSpec. * @return A KafkaStreamSpec instance. */ public static KafkaStreamSpec fromSpec(StreamSpec originalSpec) { if (originalSpec instanceof KafkaStreamSpec) { return ((KafkaStreamSpec) originalSpec); } int replicationFactor = Integer.parseInt(originalSpec.getOrDefault( KafkaConfig.TOPIC_REPLICATION_FACTOR(), KafkaConfig.TOPIC_DEFAULT_REPLICATION_FACTOR())); return new KafkaStreamSpec( originalSpec.getId(), originalSpec.getPhysicalName(), originalSpec.getSystemName(), originalSpec.getPartitionCount(), replicationFactor, mapToProperties(filterUnsupportedProperties(originalSpec.getConfig()))); }
private Supplier<ZkUtils> getZkConnection() { String zkConnect = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName, ZOOKEEPER_CONNECT)); if (StringUtils.isBlank(zkConnect)) { throw new SamzaException("Missing zookeeper.connect config for admin for system " + systemName); } return () -> ZkUtils.apply(zkConnect, 6000, 6000, false); }
/** * Filter out properties from the original config that are not supported by Kafka. * For example, we allow users to set replication.factor as a property of the streams * and then parse it out so we can pass it separately as Kafka requires. But Kafka * will also throw if replication.factor is passed as a property on a new topic. * * @param originalConfig The original config to filter * @return The filtered config */ private static Map<String, String> filterUnsupportedProperties(Map<String, String> originalConfig) { Map<String, String> filteredConfig = new HashMap<>(); for (Map.Entry<String, String> entry: originalConfig.entrySet()) { // Kafka requires replication factor, but not as a property, so we have to filter it out. if (!KafkaConfig.TOPIC_REPLICATION_FACTOR().equals(entry.getKey())) { if (LogConfig.configNames().contains(entry.getKey())) { filteredConfig.put(entry.getKey(), entry.getValue()); } else { LOG.warn("Property '{}' is not a valid Kafka topic config. It will be ignored.", entry.getKey()); } } } return filteredConfig; }