@Override public void apply() throws Exception { List<PartitionDetail> partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(numPartitions, partitions.size()); partitionConsumer.onFinish(partitions, false); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(numPartitions, partitions.size()); partitionConsumer.onFinish(partitions, false); // after two failure attempts, the partitions are now returned individually partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); } });
@Override public void onFinishWithKeys(final List<? extends PartitionKey> partitionKeys, boolean succeeded) { Transactionals.execute(transactional, context -> { getPartitionConsumer(context).onFinishWithKeys(partitionKeys, succeeded); }); }
@Override public void untake(List<? extends Partition> partitions) { Transactionals.execute(transactional, context -> { getPartitionConsumer(context).untake(partitions); }); }
@Override public PartitionConsumerResult consumePartitions(PartitionAcceptor acceptor) { return Transactionals.execute(transactional, context -> { return getPartitionConsumer(context).consumePartitions(acceptor); }); }
@Override public void apply() throws Exception { List<PartitionDetail> consumedBy1 = partitionConsumer1.consumePartitions(1).getPartitions(); Assert.assertEquals(1, consumedBy1.size()); List<PartitionDetail> consumedBy2 = partitionConsumer2.consumePartitions(10).getPartitions(); Assert.assertEquals(9, consumedBy2.size()); Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size()); partitionConsumer1.onFinish(consumedBy1, false); consumedBy1.clear(); List<PartitionDetail> consumedBy3 = partitionConsumer3.consumePartitions(2).getPartitions(); Assert.assertEquals(1, consumedBy3.size()); partitionConsumer3.onFinish(consumedBy3, true); partitionConsumer2.onFinishWithKeys(keysConsumedBy2, true); Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
@Override public void apply() throws Exception { // consume all the partitions List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(partitionKeys, toKeys(consumedPartitions)); // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); // we configured the max number of retries to be 1. However, we are putting back all the partitions 5 times, // and testing that they are still available for processing, and that there are no failed partitions for (int i = 0; i < 5; i++) { partitionConsumer.untake(consumedPartitions); PartitionConsumerResult result = partitionConsumer.consumePartitions(); consumedPartitions = result.getPartitions(); Assert.assertEquals(partitionKeys, toKeys(consumedPartitions)); Assert.assertEquals(0, result.getFailedPartitions().size()); } // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); // test functionality to put back a partial subset of the retrieved the partitions Partition firstConsumedPartition = consumedPartitions.get(0); // test the untakeWithKeys method partitionConsumer.untakeWithKeys(ImmutableList.of(firstConsumedPartition.getPartitionKey())); consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, consumedPartitions.size()); Assert.assertEquals(firstConsumedPartition, consumedPartitions.get(0)); } });
@Override public void untakeWithKeys(List<? extends PartitionKey> partitionKeys) { Transactionals.execute(transactional, context -> { getPartitionConsumer(context).untakeWithKeys(partitionKeys); }); }
@Override public void onFinish(final List<? extends Partition> partitions, boolean succeeded) { Transactionals.execute(transactional, context -> { getPartitionConsumer(context).onFinish(partitions, succeeded); }); }
@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); // aborting the processing of the partition partitionConsumer.onFinish(partitionDetails, false); // calling abort on the partition again throws IllegalArgumentException, because the partitions passed in to // abort were not found to have IN_PROGRESS state try { partitionConsumer.onFinish(partitionDetails, false); Assert.fail("Expected not to be able to abort a partition that is not IN_PROGRESS"); } catch (IllegalStateException expected) { } // try to process the partition again, this time marking it as complete (by passing in true) partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); partitionConsumer.onFinish(partitionDetails, true); // attempting to mark it as complete a second time will an IllegalArgumentException, because the partition // is not found to have an IN_PROGRESS state try { partitionConsumer.onFinish(partitionDetails, true); Assert.fail("Expected not to be able to call onFinish on a partition is not IN_PROGRESS"); } catch (IllegalArgumentException expected) { } } });
@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed // from the working set List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey()); // aborting the processing of the partition, to put it back in the working set partitionConsumer.onFinish(partitionDetails, false); } });
@Override public void apply() throws Exception { // the consumed partition keys should correspond to partitionKeys2, and not include the dropped, but unconsumed // partitions added before them Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
/** * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job. * It does this by reading back the previous state, determining the new partitions to read, computing the new * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is * passed in. * * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is * configured * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is * managed * @param consumerConfiguration defines parameters for the partition consumption * @return a BatchPartitionCommitter used to persist the state of the partition consumer */ public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) { PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName); final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration); final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Map<String, String> arguments = new HashMap<>(); PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions); mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments)); return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded); }
@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed // from the working set for (int i = 0; i < numRetries + 1; i++) { List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey()); // aborting the processing of the partition partitionConsumer.onFinish(partitionDetails, false); } // after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming PartitionConsumerResult result = partitionConsumer.consumePartitions(1); Assert.assertEquals(0, result.getPartitions().size()); Assert.assertEquals(1, result.getFailedPartitions().size()); Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey()); } });
@Override public void apply() throws Exception { // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only // those partitions are added to the dataset at this point List<Partition> consumedPartitions = new ArrayList<>(); // with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1).getPartitions()); Assert.assertEquals(1, consumedPartitions.size()); // ask for 5 more Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions()); Assert.assertEquals(6, consumedPartitions.size()); // ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10). Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions()); Assert.assertEquals(10, consumedPartitions.size()); Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions)); } });
@Override public void apply() throws Exception { // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only // those partitions are added to the dataset at this point List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions)); } });