private void addTopicSpecificPropsToWorkUnit(WorkUnit workUnit, Map<String, State> topicSpecificStateMap) { if (workUnit instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { addTopicSpecificPropsToWorkUnit(wu, topicSpecificStateMap); } } else if (!workUnit.contains(TOPIC_NAME)) { return; } else { addDatasetUrnOptionally(workUnit); if (topicSpecificStateMap == null) { return; } else if (!topicSpecificStateMap.containsKey(workUnit.getProp(TOPIC_NAME))) { return; } else { workUnit.addAll(topicSpecificStateMap.get(workUnit.getProp(TOPIC_NAME))); } } }
private void createEmptyWorkUnitsForSkippedPartitions(Map<String, List<WorkUnit>> workUnits, Map<String, State> topicSpecificStateMap, SourceState state) { // in case the previous offset not been set getAllPreviousOffsetState(state); // For each partition that has a previous offset, create an empty WorkUnit for it if // it is not in this.partitionsToBeProcessed. for (Map.Entry<KafkaPartition, Long> entry : this.previousOffsets.entrySet()) { KafkaPartition partition = entry.getKey(); if (!this.partitionsToBeProcessed.contains(partition)) { String topicName = partition.getTopicName(); if (!this.isDatasetStateEnabled.get() || this.topicsToProcess.contains(topicName)) { long previousOffset = entry.getValue(); WorkUnit emptyWorkUnit = createEmptyWorkUnit(partition, previousOffset, this.previousOffsetFetchEpochTimes.get(partition), Optional.fromNullable(topicSpecificStateMap.get(partition.getTopicName()))); if (workUnits.containsKey(topicName)) { workUnits.get(topicName).add(emptyWorkUnit); } else { workUnits.put(topicName, Lists.newArrayList(emptyWorkUnit)); } } } } }
boolean previousOffsetNotFound = false; try { previousOffset = getPreviousOffsetForPartition(partition, state); offsets.setPreviousEndOffset(previousOffset); offsets.setPreviousStartOffset(getPreviousLowWatermark(partition, state)); offsets.setPreviousStartFetchEpochTime(getPreviousStartFetchEpochTimeForPartition(partition, state)); offsets.setPreviousStopFetchEpochTime(getPreviousStopFetchEpochTimeForPartition(partition, state)); offsets.setPreviousLatestOffset(getPreviousExpectedHighWatermark(partition, state)); previousOffsetFetchEpochTime = getPreviousOffsetFetchEpochTimeForPartition(partition, state); offsets.setPreviousOffsetFetchEpochTime(previousOffsetFetchEpochTime); } catch (PreviousOffsetNotFoundException e) { .format("Failed to retrieve earliest and/or latest offset for partition %s. This partition will be skipped.", partition)); return previousOffsetNotFound ? null : createEmptyWorkUnit(partition, previousOffset, previousOffsetFetchEpochTime, topicSpecificState); if (shouldMoveToLatestOffset(partition, state)) { offsets.startAtLatestOffset(); } else if (previousOffsetNotFound) { } else { LOG.warn(offsetOutOfRangeMsg + "This partition will be skipped."); return createEmptyWorkUnit(partition, previousOffset, previousOffsetFetchEpochTime, topicSpecificState); return getWorkUnitForTopicPartition(partition, offsets, topicSpecificState);
List<KafkaTopic> topics = getFilteredTopics(state); this.topicsToProcess = topics.stream().map(KafkaTopic::getName).collect(toSet()); createEmptyWorkUnitsForSkippedPartitions(workUnits, topicSpecificStateMap, state); addTopicSpecificPropsToWorkUnits(workUnitList, topicSpecificStateMap); setLimiterReportKeyListToWorkUnits(workUnitList, getLimiterExtractorReportKeys()); return workUnitList; } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
private long getPreviousStopFetchEpochTimeForPartition(KafkaPartition partition, SourceState state) { getAllPreviousOffsetState(state); return this.previousStopFetchEpochTimes.containsKey(partition) ? this.previousStopFetchEpochTimes.get(partition) : 0; }
Extract extract = this.createExtract(currentTableType, currentExtractNamespace, currentExtractTableName); if (isCurrentFullExtract) { extract.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); addDatasetUrnOptionally(workUnit); workUnit.setProp(PARTITION_ID, partition.getId()); workUnit.setProp(LEADER_ID, partition.getLeader().getId());
private void addTopicSpecificPropsToWorkUnits(List<WorkUnit> workUnits, Map<String, State> topicSpecificStateMap) { for (WorkUnit workUnit : workUnits) { addTopicSpecificPropsToWorkUnit(workUnit, topicSpecificStateMap); } }
boolean previousOffsetNotFound = false; try { previousOffset = getPreviousOffsetForPartition(partition, state); offsets.setPreviousEndOffset(previousOffset); offsets.setPreviousStartOffset(getPreviousLowWatermark(partition, state)); offsets.setPreviousStartFetchEpochTime(getPreviousStartFetchEpochTimeForPartition(partition, state)); offsets.setPreviousStopFetchEpochTime(getPreviousStopFetchEpochTimeForPartition(partition, state)); offsets.setPreviousLatestOffset(getPreviousExpectedHighWatermark(partition, state)); previousOffsetFetchEpochTime = getPreviousOffsetFetchEpochTimeForPartition(partition, state); offsets.setPreviousOffsetFetchEpochTime(previousOffsetFetchEpochTime); } catch (PreviousOffsetNotFoundException e) { .format("Failed to retrieve earliest and/or latest offset for partition %s. This partition will be skipped.", partition)); return previousOffsetNotFound ? null : createEmptyWorkUnit(partition, previousOffset, previousOffsetFetchEpochTime, topicSpecificState); if (shouldMoveToLatestOffset(partition, state)) { offsets.startAtLatestOffset(); } else if (previousOffsetNotFound) { } else { LOG.warn(offsetOutOfRangeMsg + "This partition will be skipped."); return createEmptyWorkUnit(partition, previousOffset, previousOffsetFetchEpochTime, topicSpecificState); return getWorkUnitForTopicPartition(partition, offsets, topicSpecificState);
List<KafkaTopic> topics = getFilteredTopics(state); this.topicsToProcess = topics.stream().map(KafkaTopic::getName).collect(toSet()); createEmptyWorkUnitsForSkippedPartitions(workUnits, topicSpecificStateMap, state); addTopicSpecificPropsToWorkUnits(workUnitList, topicSpecificStateMap); setLimiterReportKeyListToWorkUnits(workUnitList, getLimiterExtractorReportKeys()); return workUnitList; } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
private long getPreviousStartFetchEpochTimeForPartition(KafkaPartition partition, SourceState state) { getAllPreviousOffsetState(state); return this.previousStartFetchEpochTimes.containsKey(partition) ? this.previousStartFetchEpochTimes.get(partition) : 0; }
Extract extract = this.createExtract(currentTableType, currentExtractNamespace, currentExtractTableName); if (isCurrentFullExtract) { extract.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); addDatasetUrnOptionally(workUnit); workUnit.setProp(PARTITION_ID, partition.getId()); workUnit.setProp(LEADER_ID, partition.getLeader().getId());
private void addTopicSpecificPropsToWorkUnits(List<WorkUnit> workUnits, Map<String, State> topicSpecificStateMap) { for (WorkUnit workUnit : workUnits) { addTopicSpecificPropsToWorkUnit(workUnit, topicSpecificStateMap); } }
private long getPreviousExpectedHighWatermark(KafkaPartition partition, SourceState state) throws PreviousOffsetNotFoundException { getAllPreviousOffsetState(state); if (this.previousExpectedHighWatermarks.containsKey(partition)) { return this.previousExpectedHighWatermarks.get(partition); } throw new PreviousOffsetNotFoundException(String .format("Previous expected high watermark for topic %s, partition %s not found.", partition.getTopicName(), partition.getId())); }
private void addTopicSpecificPropsToWorkUnit(WorkUnit workUnit, Map<String, State> topicSpecificStateMap) { if (workUnit instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { addTopicSpecificPropsToWorkUnit(wu, topicSpecificStateMap); } } else if (!workUnit.contains(TOPIC_NAME)) { return; } else { addDatasetUrnOptionally(workUnit); if (topicSpecificStateMap == null) { return; } else if (!topicSpecificStateMap.containsKey(workUnit.getProp(TOPIC_NAME))) { return; } else { workUnit.addAll(topicSpecificStateMap.get(workUnit.getProp(TOPIC_NAME))); } } }
private void createEmptyWorkUnitsForSkippedPartitions(Map<String, List<WorkUnit>> workUnits, Map<String, State> topicSpecificStateMap, SourceState state) { // in case the previous offset not been set getAllPreviousOffsetState(state); // For each partition that has a previous offset, create an empty WorkUnit for it if // it is not in this.partitionsToBeProcessed. for (Map.Entry<KafkaPartition, Long> entry : this.previousOffsets.entrySet()) { KafkaPartition partition = entry.getKey(); if (!this.partitionsToBeProcessed.contains(partition)) { String topicName = partition.getTopicName(); if (!this.isDatasetStateEnabled.get() || this.topicsToProcess.contains(topicName)) { long previousOffset = entry.getValue(); WorkUnit emptyWorkUnit = createEmptyWorkUnit(partition, previousOffset, this.previousOffsetFetchEpochTimes.get(partition), Optional.fromNullable(topicSpecificStateMap.get(partition.getTopicName()))); if (workUnits.containsKey(topicName)) { workUnits.get(topicName).add(emptyWorkUnit); } else { workUnits.put(topicName, Lists.newArrayList(emptyWorkUnit)); } } } } }
private long getPreviousOffsetFetchEpochTimeForPartition(KafkaPartition partition, SourceState state) throws PreviousOffsetNotFoundException { getAllPreviousOffsetState(state); if (this.previousOffsetFetchEpochTimes.containsKey(partition)) { return this.previousOffsetFetchEpochTimes.get(partition); } throw new PreviousOffsetNotFoundException(String .format("Previous offset fetch epoch time for topic %s, partition %s not found.", partition.getTopicName(), partition.getId())); }
private long getPreviousLowWatermark(KafkaPartition partition, SourceState state) throws PreviousOffsetNotFoundException { getAllPreviousOffsetState(state); if (this.previousLowWatermarks.containsKey(partition)) { return this.previousLowWatermarks.get(partition); } throw new PreviousOffsetNotFoundException(String .format("Previous low watermark for topic %s, partition %s not found.", partition.getTopicName(), partition.getId())); }
private long getPreviousOffsetForPartition(KafkaPartition partition, SourceState state) throws PreviousOffsetNotFoundException { getAllPreviousOffsetState(state); if (this.previousOffsets.containsKey(partition)) { return this.previousOffsets.get(partition); } throw new PreviousOffsetNotFoundException(String .format("Previous offset for topic %s, partition %s not found.", partition.getTopicName(), partition.getId())); }
private long getPreviousStopFetchEpochTimeForPartition(KafkaPartition partition, SourceState state) { getAllPreviousOffsetState(state); return this.previousStopFetchEpochTimes.containsKey(partition) ? this.previousStopFetchEpochTimes.get(partition) : 0; }
private long getPreviousStartFetchEpochTimeForPartition(KafkaPartition partition, SourceState state) { getAllPreviousOffsetState(state); return this.previousStartFetchEpochTimes.containsKey(partition) ? this.previousStartFetchEpochTimes.get(partition) : 0; }