/** * Constructs a Samza stream partition object based upon an existing Samza stream partition. * @param other Reference to an already existing Samza stream partition. */ public SystemStreamPartition(SystemStreamPartition other) { this(other.getSystem(), other.getStream(), other.getPartition()); }
private Stream<SystemStreamPartition> mapSSMToSSP(Map.Entry<SystemStream, SystemStreamMetadata> ssMs) { return ssMs.getValue() .getSystemStreamPartitionMetadata() .keySet() .stream() .map(partition -> new SystemStreamPartition(ssMs.getKey(), partition)); }
@Override public SystemStreamPartition getPreviousSSP(SystemStreamPartition currentSystemStreamPartition, int previousPartitionCount, int currentPartitionCount) { Preconditions.checkNotNull(currentSystemStreamPartition); Preconditions.checkArgument(currentPartitionCount % previousPartitionCount == 0, String.format("New partition count: %d should be a multiple of previous partition count: %d.", currentPartitionCount, previousPartitionCount)); Partition partition = currentSystemStreamPartition.getPartition(); Preconditions.checkNotNull(partition, String.format("SystemStreamPartition: %s cannot have null partition", currentSystemStreamPartition)); int currentPartitionId = partition.getPartitionId(); int previousPartitionId = currentPartitionId % previousPartitionCount; return new SystemStreamPartition(currentSystemStreamPartition.getSystemStream(), new Partition(previousPartitionId)); } }
private List<String> getPartitionDescriptor(SystemStreamPartition systemStreamPartition) { String streamName = systemStreamPartition.getStream(); Partition partition = systemStreamPartition.getPartition(); try { return cachedPartitionDescriptorMap.get(streamName).get(partition); } catch (ExecutionException e) { throw new SamzaException("Failed to obtain descriptor for " + systemStreamPartition, e); } }
synchronized void free(SystemStreamPartition ssp) { boolean success = availableSsps.computeIfAbsent(ssp.getStream(), p -> new HashSet<>()).add(ssp); Validate.isTrue(success, String.format("Ssp %s is already in free pool.", ssp)); LOG.info("Number of unassigned partitions for system-stream {} is {}.", ssp.getSystemStream(), availableSsps.get(ssp.getStream()).size()); } }
/** * Fetch system stream metadata for the given streams. * * @param systemName system name * @param streamNames set of input streams * * @return a {@link Map} of stream to {@link SystemStreamMetadata} */ Map<String, SystemStreamMetadata> getSystemStreamMetadata(String systemName, Set<String> streamNames) { Map<String, Map<SystemStreamPartition, List<IncomingMessageEnvelope>>> result = bufferedMessages.entrySet() .stream() .filter(entry -> systemName.equals(entry.getKey().getSystem()) && streamNames.contains(entry.getKey().getStream())) .collect(Collectors.groupingBy(entry -> entry.getKey().getStream(), Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); return result.entrySet() .stream() .collect(Collectors.toMap( Map.Entry::getKey, entry -> constructSystemStreamMetadata(entry.getKey(), entry.getValue()))); }
void broadcastToOtherPartitions(ControlMessage message, SystemStreamPartition ssp, MessageCollector collector) { SystemStream systemStream = ssp.getSystemStream(); int partitionCount = getPartitionCount(systemStream); int currentPartition = ssp.getPartition().getPartitionId(); for (int i = 0; i < partitionCount; i++) { if (i != currentPartition) { OutgoingMessageEnvelope envelopeOut = new OutgoingMessageEnvelope(systemStream, i, null, message); collector.send(envelopeOut); } } }
@Override public void register(SystemStreamPartition ssp, String offset) { LOG.info("Register called with ssp {} and offset {}. Offset will be ignored.", ssp, offset); String stream = ssp.getStream(); streams.add(stream); sspAllocator.free(ssp); super.register(ssp, offset); }
@Test public void testGetFileOffsets() { final String storeName = "test-get-file-offsets-store"; final String taskName = "test-get-file-offsets-task"; final String offset = "123"; Set<SystemStreamPartition> ssps = IntStream.range(1, 6) .mapToObj(idx -> new SystemStreamPartition("test-system", "test-stream", new Partition(idx))) .collect(Collectors.toSet()); TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ssps) .build(); initializeSideInputStorageManager(testSideInputStorageManager); ssps.forEach(ssp -> testSideInputStorageManager.updateLastProcessedOffset(ssp, offset)); testSideInputStorageManager.writeOffsetFiles(); Map<SystemStreamPartition, String> fileOffsets = testSideInputStorageManager.getFileOffsets(); ssps.forEach(ssp -> { assertTrue("Failed to get offset for ssp: " + ssp.toString() + " from file.", fileOffsets.containsKey(ssp)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp), offset); }); }
/** * Gets the starting offsets for the {@link SystemStreamPartition}s belonging to all the side input stores. * If the local file offset is available and is greater than the oldest available offset from source, uses it, * else falls back to oldest offset in the source. * * @param fileOffsets offsets from the local offset file * @param oldestOffsets oldest offsets from the source * @return a {@link Map} of {@link SystemStreamPartition} to offset */ @VisibleForTesting Map<SystemStreamPartition, String> getStartingOffsets( Map<SystemStreamPartition, String> fileOffsets, Map<SystemStreamPartition, String> oldestOffsets) { Map<SystemStreamPartition, String> startingOffsets = new HashMap<>(); sspsToStores.keySet().forEach(ssp -> { String fileOffset = fileOffsets.get(ssp); String oldestOffset = oldestOffsets.get(ssp); startingOffsets.put(ssp, StorageManagerUtil.getStartingOffset( ssp, systemAdmins.getSystemAdmin(ssp.getSystem()), fileOffset, oldestOffset)); }); return startingOffsets; }
/** * Constructing the end-of-stream states for a task * @param ssps all the ssps assigned to this task * @param producerTaskCounts mapping from a stream to the number of upstream tasks that produce to it */ EndOfStreamStates(Set<SystemStreamPartition> ssps, Map<SystemStream, Integer> producerTaskCounts) { Map<SystemStreamPartition, EndOfStreamState> states = new HashMap<>(); ssps.forEach(ssp -> { states.put(ssp, new EndOfStreamState(producerTaskCounts.getOrDefault(ssp.getSystemStream(), 0))); }); this.eosStates = Collections.unmodifiableMap(states); }
@Override public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) { Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>(); for (SystemStreamPartition ssp : ssps) { if (broadcastStreams.contains(ssp)) { continue; } HashSet<SystemStreamPartition> sspSet = new HashSet<SystemStreamPartition>(); sspSet.add(ssp); groupedMap.put(new TaskName(ssp.toString()), sspSet); } // assign the broadcast streams to all the taskNames if (!broadcastStreams.isEmpty()) { for (Set<SystemStreamPartition> value : groupedMap.values()) { value.addAll(broadcastStreams); } } return groupedMap; } }
void setAggregateTime(SystemStreamPartition systemStreamPartition, long time) { final Gauge<Long> aggregate = aggregates.computeIfAbsent(systemStreamPartition, ssp -> newGauge(String.format("%s-%s-aggr-watermark", ssp.getStream(), ssp.getPartition().getPartitionId()), 0L)); aggregate.set(time); } }
/** * Fetch system stream metadata for the given streams. * * @param systemName system name * @param streamNames set of input streams * * @return a {@link Map} of stream to {@link SystemStreamMetadata} */ Map<String, SystemStreamMetadata> getSystemStreamMetadata(String systemName, Set<String> streamNames) { Map<String, Map<SystemStreamPartition, List<IncomingMessageEnvelope>>> result = bufferedMessages.entrySet() .stream() .filter(entry -> systemName.equals(entry.getKey().getSystem()) && streamNames.contains(entry.getKey().getStream())) .collect(Collectors.groupingBy(entry -> entry.getKey().getStream(), Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); return result.entrySet() .stream() .collect(Collectors.toMap( Map.Entry::getKey, entry -> constructSystemStreamMetadata(entry.getKey(), entry.getValue()))); }
void broadcastToOtherPartitions(ControlMessage message, SystemStreamPartition ssp, MessageCollector collector) { SystemStream systemStream = ssp.getSystemStream(); int partitionCount = getPartitionCount(systemStream); int currentPartition = ssp.getPartition().getPartitionId(); for (int i = 0; i < partitionCount; i++) { if (i != currentPartition) { OutgoingMessageEnvelope envelopeOut = new OutgoingMessageEnvelope(systemStream, i, null, message); collector.send(envelopeOut); } } }
@SuppressWarnings("unchecked") private boolean isSspAvailable(SSPAllocator sspAllocator, SystemStreamPartition ssp) throws NoSuchFieldException, IllegalAccessException { Field f = sspAllocator.getClass().getDeclaredField("availableSsps"); f.setAccessible(true); Map<String, Set<SystemStreamPartition>> availableSsps = (Map<String, Set<SystemStreamPartition>>) f.get( sspAllocator); return availableSsps.containsKey(ssp.getStream()) && availableSsps.get(ssp.getStream()).contains(ssp); } }
@Test public void testWriteOffsetFilesForPersistedStore() { final String storeName = "test-write-offset-persisted-store"; final String storeName2 = "test-write-offset-persisted-store-2"; final String taskName = "test-write-offset-for-persisted-task"; final String offset = "123"; final SystemStreamPartition ssp = new SystemStreamPartition("test-system", "test-stream", new Partition(0)); final SystemStreamPartition ssp2 = new SystemStreamPartition("test-system2", "test-stream2", new Partition(0)); TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ImmutableSet.of(ssp)) .addLoggedStore(storeName2, ImmutableSet.of(ssp2)) .build(); initializeSideInputStorageManager(testSideInputStorageManager); testSideInputStorageManager.updateLastProcessedOffset(ssp, offset); testSideInputStorageManager.updateLastProcessedOffset(ssp2, offset); testSideInputStorageManager.writeOffsetFiles(); File storeDir = testSideInputStorageManager.getStoreLocation(storeName); assertTrue("Store directory: " + storeDir.getPath() + " is missing.", storeDir.exists()); Map<SystemStreamPartition, String> fileOffsets = testSideInputStorageManager.getFileOffsets(); assertTrue("Failed to get offset for ssp: " + ssp.toString() + " from file.", fileOffsets.containsKey(ssp)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp), offset); assertTrue("Failed to get offset for ssp: " + ssp2.toString() + " from file.", fileOffsets.containsKey(ssp2)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp2), offset); }
/** * Gets the starting offsets for the {@link SystemStreamPartition}s belonging to all the side input stores. * If the local file offset is available and is greater than the oldest available offset from source, uses it, * else falls back to oldest offset in the source. * * @param fileOffsets offsets from the local offset file * @param oldestOffsets oldest offsets from the source * @return a {@link Map} of {@link SystemStreamPartition} to offset */ @VisibleForTesting Map<SystemStreamPartition, String> getStartingOffsets( Map<SystemStreamPartition, String> fileOffsets, Map<SystemStreamPartition, String> oldestOffsets) { Map<SystemStreamPartition, String> startingOffsets = new HashMap<>(); sspsToStores.keySet().forEach(ssp -> { String fileOffset = fileOffsets.get(ssp); String oldestOffset = oldestOffsets.get(ssp); startingOffsets.put(ssp, StorageManagerUtil.getStartingOffset( ssp, systemAdmins.getSystemAdmin(ssp.getSystem()), fileOffset, oldestOffset)); }); return startingOffsets; }
@Override protected void put(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) { try { super.put(ssp, envelope); } catch (Exception e) { LOG.error("Exception while putting record. Shutting down SystemStream {}", ssp.getSystemStream(), e); Thread.currentThread().interrupt(); } }
@Override public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) { Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>(); for (SystemStreamPartition ssp : ssps) { if (broadcastStreams.contains(ssp)) { continue; } HashSet<SystemStreamPartition> sspSet = new HashSet<SystemStreamPartition>(); sspSet.add(ssp); groupedMap.put(new TaskName(ssp.toString()), sspSet); } // assign the broadcast streams to all the taskNames if (!broadcastStreams.isEmpty()) { for (Set<SystemStreamPartition> value : groupedMap.values()) { for (SystemStreamPartition ssp : broadcastStreams) { value.add(ssp); } } } return groupedMap; }