/** * Sets a partition as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partition the partition to add as input */ public static void addInputPartition(Map<String, String> arguments, Partition partition) { addInputPartitions(arguments, Collections.singletonList(partition)); }
/** * Sets partitions as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partitions an iterable of partitions to add as input */ public static void addInputPartitions(Map<String, String> arguments, Iterable<? extends Partition> partitions) { addInputPartitions(arguments, partitions.iterator()); }
/** * Used from the initialize method of the implementing batch job to configure as input a PartitionedFileSet that has * specified a set of {@link Partition}s of a {@link PartitionedFileSet} to be processed by the run of the batch job. * It does this by reading back the previous state, determining the new partitions to read, computing the new * state, and persisting this new state. It then configures this dataset as input to the mapreduce context that is * passed in. * * @param mapreduceContext MapReduce context used to access the PartitionedFileSet, and on which the input is * configured * @param partitionedFileSetName the name of the {@link PartitionedFileSet} to consume partitions from * @param statePersistor a {@link DatasetStatePersistor} responsible for defining how the partition consumer state is * managed * @param consumerConfiguration defines parameters for the partition consumption * @return a BatchPartitionCommitter used to persist the state of the partition consumer */ public static BatchPartitionCommitter setInput(MapReduceContext mapreduceContext, String partitionedFileSetName, DatasetStatePersistor statePersistor, ConsumerConfiguration consumerConfiguration) { PartitionedFileSet partitionedFileSet = mapreduceContext.getDataset(partitionedFileSetName); final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(partitionedFileSet, new DelegatingStatePersistor(mapreduceContext, statePersistor), consumerConfiguration); final List<PartitionDetail> consumedPartitions = partitionConsumer.consumePartitions().getPartitions(); Map<String, String> arguments = new HashMap<>(); PartitionedFileSetArguments.addInputPartitions(arguments, consumedPartitions); mapreduceContext.addInput(Input.ofDataset(partitionedFileSetName, arguments)); return succeeded -> partitionConsumer.onFinish(consumedPartitions, succeeded); }
@Test public void testGetInputPartitionKeys() throws Exception { Map<String, String> arguments = new HashMap<>(); Assert.assertEquals(0, PartitionedFileSetArguments.getInputPartitionKeys(arguments).size()); List<? extends Partition> partitions = Lists.newArrayList(new BasicPartition(null, "path/doesn't/matter/1", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/2", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/3", generateUniqueKey())); for (Partition partition : partitions) { PartitionedFileSetArguments.addInputPartition(arguments, partition); } List<PartitionKey> inputPartitionKeys = Lists.transform(partitions, new Function<Partition, PartitionKey>() { @Nullable @Override public PartitionKey apply(Partition input) { return input.getPartitionKey(); } }); Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments)); arguments.clear(); PartitionedFileSetArguments.addInputPartitions(arguments, partitions.iterator()); Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments)); }