private ConsumerWorkingSet readState() { byte[] bytes = statePersistor.readState(); return bytes == null ? new ConsumerWorkingSet() : ConsumerWorkingSet.fromBytes(bytes); } }
/** * Removes the list of partitions that have failed processing the configured number of times from the working set and * returns them. */ protected List<PartitionDetail> removeDiscardedPartitions(ConsumerWorkingSet workingSet) { List<PartitionDetail> failedPartitions = new ArrayList<>(); Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator(); while (iter.hasNext()) { ConsumablePartition partition = iter.next(); if (partition.getProcessState() == ProcessState.DISCARDED) { failedPartitions.add(getPartitionedFileSet().getPartition(partition.getPartitionKey())); iter.remove(); } } return failedPartitions; }
/** * Removes the given partition keys from the working set, as they have been successfully processed. */ protected void commit(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) { for (PartitionKey key : partitionKeys) { ConsumablePartition consumablePartition = workingSet.lookup(key); assertInProgress(consumablePartition); workingSet.remove(key); } }
private void testSerDe(ConsumerWorkingSet stateToSerialize) { byte[] bytes = stateToSerialize.toBytes(); // Assert that the serialization format version is 0 Assert.assertEquals(0, bytes[0]); ConsumerWorkingSet deserializedState = ConsumerWorkingSet.fromBytes(bytes); Assert.assertEquals(stateToSerialize, deserializedState); }
@Test public void testByteSerialization() { ConsumerWorkingSet workingSet = new ConsumerWorkingSet(); // test with empty partitions lists testSerDe(workingSet); // test with two elements in AVAILABLE and none in IN_PROGRESS workingSet.getPartitions().add(new DefaultConsumablePartition(generateUniqueKey())); workingSet.getPartitions().add(new DefaultConsumablePartition(generateUniqueKey())); testSerDe(workingSet); // test with three elements in partitions and none in inProgressPartitions workingSet.getPartitions().add(new DefaultConsumablePartition(generateUniqueKey())); testSerDe(workingSet); // mark the first element as IN_PROGRESS workingSet.getPartitions().get(0).take(); workingSet.getPartitions().get(0).setTimestamp(System.currentTimeMillis()); testSerDe(workingSet); }
@Override public PartitionConsumerResult doConsume(ConsumerWorkingSet workingSet, PartitionAcceptor acceptor) { doExpiry(workingSet); workingSet.populate(getPartitionedFileSet(), getConfiguration()); List<? extends ConsumablePartition> partitions = workingSet.getPartitions(); if (partitions.size() >= 1) { ConsumablePartition firstPartition = partitions.get(0);
@Override public void untakeWithKeys(List<? extends PartitionKey> partitionKeys) { ConsumerWorkingSet workingSet = readState(); untake(workingSet, partitionKeys); statePersistor.persistState(workingSet.toBytes()); }
public static ConsumerWorkingSet fromBytes(byte[] bytes) { ByteBuffer bb = ByteBuffer.wrap(bytes); byte serializationFormatVersion = bb.get(); if (serializationFormatVersion != VERSION) { throw new IllegalArgumentException("Unsupported serialization format: " + serializationFormatVersion); } int numPartitions = bb.getInt(); List<ConsumablePartition> partitions = new ArrayList<>(numPartitions); for (int i = 0; i < numPartitions; i++) { int consumablePartitionBytesLength = bb.getInt(); byte[] consumablePartitionBytes = new byte[consumablePartitionBytesLength]; bb.get(consumablePartitionBytes, 0, consumablePartitionBytesLength); partitions.add(DefaultConsumablePartition.fromBytes(consumablePartitionBytes)); } int sizeOfMarker = bb.getInt(); byte[] markerBytes = new byte[sizeOfMarker]; bb.get(markerBytes); return new ConsumerWorkingSet(PartitionConsumerState.fromBytes(markerBytes), partitions); }
@Override public PartitionConsumerResult doConsume(ConsumerWorkingSet workingSet, PartitionAcceptor acceptor) { doExpiry(workingSet); workingSet.populate(getPartitionedFileSet(), getConfiguration()); List<PartitionDetail> toConsume = selectPartitions(acceptor, workingSet); return new PartitionConsumerResult(toConsume, removeDiscardedPartitions(workingSet)); }
@Override public void untake(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) { doExpiry(workingSet); for (PartitionKey key : partitionKeys) { ConsumablePartition consumablePartition = workingSet.lookup(key); // don't need to assertInProgress because untake() already does that consumablePartition.untake(); } }
/** * Populates the ConsumerWorkingSet by fetching partitions from the given PartitionedFileSet. * * @param partitionedFileSet the PartitionedFileSet to fetch partitions from * @param configuration the ConsumerConfiguration which defines parameters for consuming */ public void populate(PartitionedFileSet partitionedFileSet, ConsumerConfiguration configuration) { int numToPopulate = configuration.getMaxWorkingSetSize() - partitions.size(); Predicate<PartitionDetail> predicate = configuration.getPartitionPredicate(); co.cask.cdap.api.dataset.lib.PartitionConsumerResult result = partitionedFileSet.consumePartitions(partitionConsumerState, numToPopulate, predicate); List<PartitionDetail> partitions = result.getPartitions(); for (PartitionDetail partition : partitions) { addPartition(partition.getPartitionKey()); } partitionConsumerState = result.getPartitionConsumerState(); }
@Override public void onFinishWithKeys(List<? extends PartitionKey> partitionKeys, boolean succeeded) { ConsumerWorkingSet workingSet = readState(); doFinish(workingSet, partitionKeys, succeeded); statePersistor.persistState(workingSet.toBytes()); }
/** * Resets the process state of the given partition keys, as they were not successfully processed, or discards the * partition if it has already been attempted the configured number of attempts. */ protected void abort(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) { List<PartitionKey> discardedPartitions = new ArrayList<>(); for (PartitionKey key : partitionKeys) { ConsumablePartition consumablePartition = workingSet.lookup(key); assertInProgress(consumablePartition); // either reset its processState, or remove it from the workingSet, depending on how many tries it already has if (consumablePartition.getNumFailures() < getConfiguration().getMaxRetries()) { consumablePartition.retry(); } else { discardedPartitions.add(key); workingSet.lookup(key).discard(); } } if (!discardedPartitions.isEmpty()) { LOG.warn("Discarded keys due to being retried {} times: {}", getConfiguration().getMaxRetries(), discardedPartitions); } }
@Override public PartitionConsumerResult consumePartitions(PartitionAcceptor acceptor) { ConsumerWorkingSet workingSet = readState(); PartitionConsumerResult partitionsResult = doConsume(workingSet, acceptor); statePersistor.persistState(workingSet.toBytes()); return partitionsResult; }
private List<PartitionDetail> selectPartitions(PartitionAcceptor acceptor, ConsumerWorkingSet workingSet) { long now = System.currentTimeMillis(); List<PartitionDetail> toConsume = new ArrayList<>(); Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator(); while (iter.hasNext()) { ConsumablePartition consumablePartition = iter.next(); if (ProcessState.AVAILABLE != consumablePartition.getProcessState()) { continue; } PartitionDetail partition = getPartitionedFileSet().getPartition(consumablePartition.getPartitionKey()); if (partition == null) { // no longer exists, so skip it and remove it from the working set iter.remove(); continue; } PartitionAcceptor.Return accept = acceptor.accept(partition); switch (accept) { case ACCEPT: consumablePartition.take(); consumablePartition.setTimestamp(now); toConsume.add(partition); continue; case SKIP: continue; case STOP: return toConsume; } } return toConsume; }
/** * Goes through all partitions. If any IN_PROGRESS partition is older than the configured timeout, reset its state * to AVAILABLE, unless it has already been retried the configured number of times, in which case it is discarded. */ protected void doExpiry(ConsumerWorkingSet workingSet) { long expiryTime = getExpiryBorder(); List<PartitionKey> expiredPartitions = new ArrayList<>(); List<PartitionKey> discardedPartitions = new ArrayList<>(); for (ConsumablePartition partition : workingSet.getPartitions()) { if (partition.getProcessState() == ProcessState.IN_PROGRESS && partition.getTimestamp() < expiryTime) { // either reset its processState, or remove it from the workingSet, depending on how many tries it already has if (partition.getNumFailures() < getConfiguration().getMaxRetries()) { partition.retry(); } else { partition.discard(); } expiredPartitions.add(partition.getPartitionKey()); } } if (!expiredPartitions.isEmpty()) { LOG.warn("Expiring in progress partitions: {}", expiredPartitions); if (!discardedPartitions.isEmpty()) { LOG.warn("Discarded keys due to being retried {} times: {}", getConfiguration().getMaxRetries(), discardedPartitions); } } } }