public int getReadParallelism() { return Math.max(1, getConf().getIntProperty(KAFKA_READ_PARALLELISM, 1)); }
@Override public void saveNextRunState(@NonNull final IMetadataManager<StringValue> metadataManager, final KafkaRunState nextRunState) { final String topicName = this.conf.getTopicName(); final String topicSpecificName = getTopicSpecificMetadataKey(topicName); nextRunState.getPartitionOffsets().entrySet().forEach( entry -> { metadataManager.set(topicSpecificName + entry.getKey(), new StringValue(entry.getValue().toString())); }); }
final long maxMessagesToRead = this.conf.getMaxMessagesToRead(); log.info("topicName:{}:newMessages:{}:maxMessagesToRead:{}", this.conf.getTopicName(), numMessages, maxMessagesToRead); final boolean hasExtraMessages = numMessages > maxMessagesToRead;
@Override public JavaRDD<AvroPayload> getData(@NonNull final KafkaWorkUnitCalculatorResult workUnitCalc) { Preconditions.checkState(workUnitCalc.hasWorkUnits(), "no work to do :" + this.conf.getTopicName()); final List<OffsetRange> workUnits = workUnitCalc.getWorkUnits(); if (workUnits.size() < this.conf.getReadParallelism()) { readParallelism = this.conf.getReadParallelism(); } else { readParallelism = workUnits.size(); = getKafkaPartitionOffsetToOutputSparkPartitionMap(this.conf.getTopicName(), workUnits, readParallelism); log.info("using partition offset mapping topic={} : mapping={}", this.conf.getTopicName(), kafkaPartitionOffsetToSparkPartitionMap);
@Override public KafkaWorkUnitCalculatorResult computeWorkUnits() { final String topicName = this.conf.getTopicName(); final KafkaConsumer kafkaConsumer = getKafkaConsumer(this.conf.getKafkaParams());
public static KafkaSourceConfiguration getKafkaSourceConfiguration(@NotEmpty final String topicName, @NotEmpty final String brokerAddress, @NotEmpty final String startDate) { Preconditions.checkArgument(!Strings.isNullOrEmpty(topicName)); Preconditions.checkArgument(!Strings.isNullOrEmpty(brokerAddress)); final Configuration conf = new Configuration(); KafkaTestHelper.setMandatoryConf(conf, Arrays.asList(KAFKA_BROKER_LIST, KAFKA_TOPIC_NAME, KAFKA_CLUSTER_NAME, KAFKA_START_DATE), Arrays.asList(brokerAddress, topicName, TEST_KAFKA_CLUSTER_NAME, startDate)); return new KafkaSourceConfiguration(conf); }
public long getMaxMessagesToRead() { return getConf().getLongProperty(KAFKA_MAX_MESSAGES_TO_READ, DEFAULT_KAFKA_MAX_MESSAGES_TO_READ); } }
@Override public void initPreviousRunState(@NonNull final IMetadataManager<StringValue> metadataManager) { final String topicName = this.conf.getTopicName(); final Map<Integer, Long> metadata = new HashMap<>(); final String topicSpecificName = getTopicSpecificMetadataKey(topicName); final List<String> toDelete = new LinkedList<>(); metadataManager.getAllKeys().forEach(key -> { if (key.startsWith(topicSpecificName)) { // this is my specific topic metadata.put(Integer.parseInt(key.substring(topicSpecificName.length())), Long.parseLong(metadataManager.get(key).get().getValue())); } else if (key.startsWith(KAFKA_METADATA_WITH_SEPARATOR)) { // this is a specific topic, but not mine. ignore. assert true; } else if (key.startsWith(KAFKA_METADATA_PREFIX)) { // this is unspecified topic metadata.put(Integer.parseInt(key.substring(KAFKA_METADATA_PREFIX.length())), Long.parseLong(metadataManager.get(key).get().getValue())); // delete the old, unspecified metadata toDelete.add(key); } }); toDelete.forEach(metadataManager::remove); this.previousRunState = Optional.of(new KafkaRunState(metadata)); }
public KafkaSourceConfiguration(@NonNull final Configuration conf) { super(conf); this.topicName = getConf().getProperty(KAFKA_TOPIC_NAME).get(); this.clusterName = getConf().getProperty(KAFKA_CLUSTER_NAME).get(); this.startTime = DateTime.parse(getConf().getProperty(KAFKA_START_DATE).get(), DateTimeFormat.forPattern(KafkaSourceConfiguration.KAFKA_START_DATE_FORMAT).withZoneUTC() ).toDate().getTime(); }