/** * Partitions the given {@link RowsColumnRangeBatchRequest} into multiple, preserving the ordering of rows. Each * partitioned {@link RowsColumnRangeBatchRequest} will have exactly {@code partitionSize} rows in total, except * possibly for the last one, which may have fewer rows (but not more). No row will be split across partitions. */ public static List<RowsColumnRangeBatchRequest> partition(RowsColumnRangeBatchRequest batch, int partitionSize) { if (getAllRowsInOrder(batch).size() <= partitionSize) { return ImmutableList.of(batch); } List<List<byte[]>> partitionedRows = Lists.partition(getAllRowsInOrder(batch), partitionSize); List<RowsColumnRangeBatchRequest> partitions = new ArrayList<>(partitionedRows.size()); partitions.add(getFirstRequestInPartition(batch, partitionedRows.get(0))); for (int partitionNumber = 1; partitionNumber < partitionedRows.size() - 1; partitionNumber++) { RowsColumnRangeBatchRequest partition = ImmutableRowsColumnRangeBatchRequest.builder() .columnRangeSelection(batch.getColumnRangeSelection()) .rowsToLoadFully(partitionedRows.get(partitionNumber)) .build(); partitions.add(partition); } partitions.add(getLastRequestInPartition(batch, Iterables.getLast(partitionedRows))); return partitions; }
private static void assertRowsInPartitionsMatchOriginal( RowsColumnRangeBatchRequest original, List<RowsColumnRangeBatchRequest> partitions) { List<byte[]> actualAllRows = partitions.stream() .flatMap(partition -> RowsColumnRangeBatchRequests.getAllRowsInOrder(partition).stream()) .collect(Collectors.toList()); Assert.assertEquals(RowsColumnRangeBatchRequests.getAllRowsInOrder(original), actualAllRows); }
private static void testPartition(RowsColumnRangeBatchRequest request, int partitionSize) { List<RowsColumnRangeBatchRequest> partitions = RowsColumnRangeBatchRequests.partition(request, partitionSize); assertIntermediatePartitionsHaveNoPartialRows(partitions); assertRowsInPartitionsMatchOriginal(request, partitions); assertColumnRangesInPartitionsMatchOriginal(request, partitions); assertPartitionsHaveCorrectSize(partitions, partitionSize); }
private static void assertPartitionsHaveCorrectSize( List<RowsColumnRangeBatchRequest> partitions, int expectedSize) { for (int i = 0; i < partitions.size(); i++) { int actualPartitionSize = RowsColumnRangeBatchRequests.getAllRowsInOrder(partitions.get(i)).size(); if (i < partitions.size() - 1) { Assert.assertEquals(expectedSize, actualPartitionSize); } else { Assert.assertTrue(actualPartitionSize <= expectedSize); } } } }
/** * Partitions the given {@link RowsColumnRangeBatchRequest} into multiple, preserving the ordering of rows. Each * partitioned {@link RowsColumnRangeBatchRequest} will have exactly {@code partitionSize} rows in total, except * possibly for the last one, which may have fewer rows (but not more). No row will be split across partitions. */ public static List<RowsColumnRangeBatchRequest> partition(RowsColumnRangeBatchRequest batch, int partitionSize) { if (getAllRowsInOrder(batch).size() <= partitionSize) { return ImmutableList.of(batch); } List<List<byte[]>> partitionedRows = Lists.partition(getAllRowsInOrder(batch), partitionSize); List<RowsColumnRangeBatchRequest> partitions = new ArrayList<>(partitionedRows.size()); partitions.add(getFirstRequestInPartition(batch, partitionedRows.get(0))); for (int partitionNumber = 1; partitionNumber < partitionedRows.size() - 1; partitionNumber++) { RowsColumnRangeBatchRequest partition = ImmutableRowsColumnRangeBatchRequest.builder() .columnRangeSelection(batch.getColumnRangeSelection()) .rowsToLoadFully(partitionedRows.get(partitionNumber)) .build(); partitions.add(partition); } partitions.add(getLastRequestInPartition(batch, Iterables.getLast(partitionedRows))); return partitions; }
private Map<byte[], List<Entry<Cell, Value>>> extractRowColumnRangePage( TableReference tableRef, RowsColumnRangeBatchRequest rowsColumnRangeBatch, long ts) { return batchingQueryRunner.runTask( rowsColumnRangeBatch, RowsColumnRangeBatchRequests::partition, AccumulatorStrategies.forMap(), batch -> runRead(tableRef, table -> extractRowColumnRangePageInternal( table, tableRef, () -> table.getRowsColumnRange(batch, ts), RowsColumnRangeBatchRequests.getAllRowsInOrder(batch)))); }
private Map<byte[], List<Entry<Cell, Value>>> extractRowColumnRangePage( TableReference tableRef, RowsColumnRangeBatchRequest rowsColumnRangeBatch, long ts) { return batchingQueryRunner.runTask( rowsColumnRangeBatch, RowsColumnRangeBatchRequests::partition, AccumulatorStrategies.forMap(), batch -> runRead(tableRef, table -> extractRowColumnRangePageInternal( table, tableRef, () -> table.getRowsColumnRange(batch, ts), RowsColumnRangeBatchRequests.getAllRowsInOrder(batch)))); }