@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); } });
@Override public void apply() throws Exception { // the consumed partition keys should correspond to partitionKeys2, and not include the dropped, but unconsumed // partitions added before them Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only // those partitions are added to the dataset at this point List<Partition> consumedPartitions = new ArrayList<>(); // with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1).getPartitions()); Assert.assertEquals(1, consumedPartitions.size()); // ask for 5 more Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions()); Assert.assertEquals(6, consumedPartitions.size()); // ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10). Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions()); Assert.assertEquals(10, consumedPartitions.size()); Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions)); } });
@Override public void apply() throws Exception { // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only // those partitions are added to the dataset at this point List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions)); } });
@Override public void apply() throws Exception { // consume 3 of the 5 initial partitions Assert.assertEquals(partitionKeys1, toKeys(partitionConsumer.consumePartitions(3).getPartitions())); } });
@Override public void apply() throws Exception { // first call to consume will drop the partition from the working set, and return nothing, since it was // the only partition in the working set PartitionConsumerResult result = partitionConsumer.consumePartitions(1); Assert.assertEquals(0, result.getPartitions().size()); Assert.assertEquals(0, result.getFailedPartitions().size()); // following calls to consumePartitions will repopulate the working set and return additional partition(s) result = partitionConsumer.consumePartitions(1); Assert.assertEquals(1, result.getPartitions().size()); Assert.assertEquals(partitionKey2, result.getPartitions().get(0).getPartitionKey()); } });
@Override public void apply() throws Exception { // creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator // with all the partition keys List<? extends Partition> consumedPartitions = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor()).consumePartitions().getPartitions(); Set<PartitionKey> allKeys = new HashSet<>(); allKeys.addAll(partitionKeys1); allKeys.addAll(partitionKeys2); Assert.assertEquals(allKeys, toKeys(consumedPartitions)); } });
@Override public void apply() throws Exception { List<Partition> consumedPartitions = new ArrayList<>(); // specify a PartitionAcceptor that only limits to partitions where 's' field is equal to 'partitionKeys1' // so it will get all the partitions in partitionKeys1 Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(new CustomAcceptor("partitionKeys1")).getPartitions()); // assert that we consumed all the partitions represented by partitionsKeys1 Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions)); consumedPartitions.clear(); // ask for partitions where 's' field is equal to 'partitionKeys2', but stop iterating upon 'i' field == 8 Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(new CustomAcceptor("partitionKeys2", 8)).getPartitions()); // this will give us 8 of partitionKeys2 Assert.assertEquals(8, consumedPartitions.size()); // ask for the remainder of the partitions - i ranging from [8,15). Then, we will have all of 'partitionKeys2' Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions().getPartitions()); Assert.assertEquals(partitionKeys2, toKeys(consumedPartitions)); } });
@Override public void apply() throws Exception { // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed // from the working set List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey()); // aborting the processing of the partition, to put it back in the working set partitionConsumer.onFinish(partitionDetails, false); } });
@Override public void apply() throws Exception { List<PartitionDetail> partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(numPartitions, partitions.size()); partitionConsumer.onFinish(partitions, false); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(numPartitions, partitions.size()); partitionConsumer.onFinish(partitions, false); // after two failure attempts, the partitions are now returned individually partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); partitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, partitions.size()); partitionConsumer.onFinish(partitions, true); } });
@Override public void apply() throws Exception { // consume all the partitions List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(partitionKeys, toKeys(consumedPartitions)); // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); // we configured the max number of retries to be 1. However, we are putting back all the partitions 5 times, // and testing that they are still available for processing, and that there are no failed partitions for (int i = 0; i < 5; i++) { partitionConsumer.untake(consumedPartitions); PartitionConsumerResult result = partitionConsumer.consumePartitions(); consumedPartitions = result.getPartitions(); Assert.assertEquals(partitionKeys, toKeys(consumedPartitions)); Assert.assertEquals(0, result.getFailedPartitions().size()); } // consuming the partitions again, without adding any new partitions returns an empty iterator Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty()); // test functionality to put back a partial subset of the retrieved the partitions Partition firstConsumedPartition = consumedPartitions.get(0); // test the untakeWithKeys method partitionConsumer.untakeWithKeys(ImmutableList.of(firstConsumedPartition.getPartitionKey())); consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Assert.assertEquals(1, consumedPartitions.size()); Assert.assertEquals(firstConsumedPartition, consumedPartitions.get(0)); } });
@Override public void apply() throws Exception { // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed // from the working set for (int i = 0; i < numRetries + 1; i++) { List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey()); // aborting the processing of the partition partitionConsumer.onFinish(partitionDetails, false); } // after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming PartitionConsumerResult result = partitionConsumer.consumePartitions(1); Assert.assertEquals(0, result.getPartitions().size()); Assert.assertEquals(1, result.getFailedPartitions().size()); Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey()); } });
/** * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job. * It does this by reading back the previous state, determining the new partitions to read, computing the new * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is * passed in. * * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is * configured * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is * managed * @param consumerConfiguration defines parameters for the partition consumption * @return a BatchPartitionCommitter used to persist the state of the partition consumer */ public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) { PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName); final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration); final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Map<String, String> arguments = new HashMap<>(); PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions); mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments)); return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded); }
@Override public void apply() throws Exception { // creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator // with all the partition keys added after the deletions ConcurrentPartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor()); Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer2.consumePartitions().getPartitions())); } });
@Override public void apply() throws Exception { List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); // aborting the processing of the partition partitionConsumer.onFinish(partitionDetails, false); // calling abort on the partition again throws IllegalArgumentException, because the partitions passed in to // abort were not found to have IN_PROGRESS state try { partitionConsumer.onFinish(partitionDetails, false); Assert.fail("Expected not to be able to abort a partition that is not IN_PROGRESS"); } catch (IllegalStateException expected) { } // try to process the partition again, this time marking it as complete (by passing in true) partitionDetails = partitionConsumer.consumePartitions(1).getPartitions(); Assert.assertEquals(1, partitionDetails.size()); partitionConsumer.onFinish(partitionDetails, true); // attempting to mark it as complete a second time will an IllegalArgumentException, because the partition // is not found to have an IN_PROGRESS state try { partitionConsumer.onFinish(partitionDetails, true); Assert.fail("Expected not to be able to call onFinish on a partition is not IN_PROGRESS"); } catch (IllegalArgumentException expected) { } } });
Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(4).getPartitions()); Assert.assertEquals(4, consumedPartitions.size()); Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(3).getPartitions()); Assert.assertEquals(6, consumedPartitions.size());