protected final void startMessaging() throws IOException, InterruptedException { log.info("Starting local test Zookeeper server"); localZKServer = new LocalZKServer(localZKPort); localZKServer.start(); log.info("Starting local Kafka broker"); localKafkaBroker = new LocalKafkaBroker(localKafkaBrokerPort, localZKPort); localKafkaBroker.start(); String zkHostPort = "localhost:" + localZKPort; KafkaUtils.deleteTopic(zkHostPort, INPUT_TOPIC); KafkaUtils.deleteTopic(zkHostPort, UPDATE_TOPIC); KafkaUtils.maybeCreateTopic(zkHostPort, INPUT_TOPIC, 1); KafkaUtils.maybeCreateTopic(zkHostPort, UPDATE_TOPIC, 1); // Give topics a little time to elect leader try { Thread.sleep(5000); } catch (InterruptedException ie) { // continue } }
protected final JavaInputDStream<ConsumerRecord<K,M>> buildInputDStream( JavaStreamingContext streamingContext) { Preconditions.checkArgument( KafkaUtils.topicExists(inputTopicLockMaster, inputTopic), "Topic %s does not exist; did you create it?", inputTopic); if (updateTopic != null && updateTopicLockMaster != null) { Preconditions.checkArgument( KafkaUtils.topicExists(updateTopicLockMaster, updateTopic), "Topic %s does not exist; did you create it?", updateTopic); } String groupID = getGroupID(); Map<String,Object> kafkaParams = new HashMap<>(); kafkaParams.put("group.id", groupID); // Don't re-consume old messages from input by default kafkaParams.put("auto.offset.reset", "latest"); // Ignored by Kafka 0.10 Spark integration kafkaParams.put("bootstrap.servers", inputBroker); kafkaParams.put("key.deserializer", keyDecoderClass.getName()); kafkaParams.put("value.deserializer", messageDecoderClass.getName()); LocationStrategy locationStrategy = LocationStrategies.PreferConsistent(); ConsumerStrategy<K,M> consumerStrategy = ConsumerStrategies.Subscribe( Collections.singleton(inputTopic), kafkaParams, Collections.emptyMap()); return org.apache.spark.streaming.kafka010.KafkaUtils.createDirectStream( streamingContext, locationStrategy, consumerStrategy); }
/** * @param javaRDD RDD whose underlying RDD must be an instance of {@code HasOffsetRanges}, * such as {@code KafkaRDD} */ @Override public void call(JavaRDD<T> javaRDD) { OffsetRange[] ranges = ((HasOffsetRanges) javaRDD.rdd()).offsetRanges(); Map<Pair<String,Integer>,Long> newOffsets = new HashMap<>(ranges.length); for (OffsetRange range : ranges) { newOffsets.put(new Pair<>(range.topic(), range.partition()), range.untilOffset()); } log.info("Updating offsets: {}", newOffsets); KafkaUtils.setOffsets(inputTopicLockMaster, group, newOffsets); }
/** * @param zkServers Zookeeper server string: host1:port1[,host2:port2,...] * @param topic topic to create (if not already existing) * @param partitions number of topic partitions */ public static void maybeCreateTopic(String zkServers, String topic, int partitions) { maybeCreateTopic(zkServers, topic, partitions, new Properties()); }
@After public final void tearDownKafkaZK() { if (localKafkaBroker != null) { log.info("Deleting topics"); String zkHostPort = "localhost:" + localZKPort; KafkaUtils.deleteTopic(zkHostPort, INPUT_TOPIC); KafkaUtils.deleteTopic(zkHostPort, UPDATE_TOPIC); log.info("Stopping Kafka"); IOUtils.closeQuietly(localKafkaBroker); localKafkaBroker = null; } if (localZKServer != null) { log.info("Stopping Zookeeper"); IOUtils.closeQuietly(localZKServer); localZKServer = null; } }
/** * @param zkServers Zookeeper server string: host1:port1[,host2:port2,...] * @param topic topic to create (if not already existing) * @param partitions number of topic partitions */ public static void maybeCreateTopic(String zkServers, String topic, int partitions) { maybeCreateTopic(zkServers, topic, partitions, new Properties()); }
KafkaUtils.deleteTopic(zkHostPort, TOPIC); KafkaUtils.maybeCreateTopic(zkHostPort, TOPIC, 4); keys = consumeTopic.getKeys(); } finally { KafkaUtils.deleteTopic(zkHostPort, TOPIC);
Preconditions.checkArgument(KafkaUtils.topicExists(inputTopicLockMaster, inputTopic), "Topic %s does not exist; did you create it?", inputTopic); Preconditions.checkArgument(KafkaUtils.topicExists(updateTopicLockMaster, updateTopic), "Topic %s does not exist; did you create it?", updateTopic); inputProducer = new TopicProducerImpl<>(inputTopicBroker, inputTopic);
/** * @param javaRDD RDD whose underlying RDD must be an instance of {@code HasOffsetRanges}, * such as {@code KafkaRDD} */ @Override public void call(JavaRDD<T> javaRDD) { OffsetRange[] ranges = ((HasOffsetRanges) javaRDD.rdd()).offsetRanges(); Map<Pair<String,Integer>,Long> newOffsets = new HashMap<>(ranges.length); for (OffsetRange range : ranges) { newOffsets.put(new Pair<>(range.topic(), range.partition()), range.untilOffset()); } log.info("Updating offsets: {}", newOffsets); KafkaUtils.setOffsets(inputTopicLockMaster, group, newOffsets); }
KafkaUtils.deleteTopic(zkHostPort, TOPIC); KafkaUtils.maybeCreateTopic(zkHostPort, TOPIC, 1, ConfigUtils.keyValueToProperties( "max.message.bytes", maxMessageSize )); keyMessages = consumeTopic.getKeyMessages(); } finally { KafkaUtils.deleteTopic(zkHostPort, TOPIC);
protected final JavaInputDStream<ConsumerRecord<K,M>> buildInputDStream( JavaStreamingContext streamingContext) { Preconditions.checkArgument( KafkaUtils.topicExists(inputTopicLockMaster, inputTopic), "Topic %s does not exist; did you create it?", inputTopic); if (updateTopic != null && updateTopicLockMaster != null) { Preconditions.checkArgument( KafkaUtils.topicExists(updateTopicLockMaster, updateTopic), "Topic %s does not exist; did you create it?", updateTopic); } String groupID = getGroupID(); Map<String,Object> kafkaParams = new HashMap<>(); kafkaParams.put("group.id", groupID); // Don't re-consume old messages from input by default kafkaParams.put("auto.offset.reset", "latest"); // Ignored by Kafka 0.10 Spark integration kafkaParams.put("bootstrap.servers", inputBroker); kafkaParams.put("key.deserializer", keyDecoderClass.getName()); kafkaParams.put("value.deserializer", messageDecoderClass.getName()); LocationStrategy locationStrategy = LocationStrategies.PreferConsistent(); ConsumerStrategy<K,M> consumerStrategy = ConsumerStrategies.Subscribe( Collections.singleton(inputTopic), kafkaParams, Collections.emptyMap()); return org.apache.spark.streaming.kafka010.KafkaUtils.createDirectStream( streamingContext, locationStrategy, consumerStrategy); }