/** * The complexity in this method is due to a desire to avoid scanning over large numbers of tombstoned rows. * <p> * Details: An unbounded CQL query will scan over an unbounded number of tombstoned rows in an attempt to find * either the end of the table, or the desired number of results. This can lead to timeouts if there are large * segments of tombstoned rows (which can happen with THOROUGH sweep). Thrift, on the other hand, will actually * return the tombstoned rows (they will just be empty KeySlices), which allows us to page over them. So, we first * execute a thrift range scan to determine an upper bound on the CQL range scan, and page over the data until we * find some live cells. * <p> * This is a hack (and a perf hit) until we have a better solution to avoid scanning massive numbers of * tombstones (ideally this will involve catching some exception and reducing the range appropriately). */ private void fetchBatchOfTimestampsBeginningAtStartRow() { byte[] rangeStart = startRowInclusive; Integer executorThreads = config.sweepReadThreads(); ExecutorService executor = PTExecutors.newFixedThreadPool(executorThreads); while (timestamps.isEmpty()) { List<byte[]> rows = getRows(rangeStart); if (rows.isEmpty()) { return; } // Note that both ends of this range are *inclusive* List<CellWithTimestamp> batch = cqlExecutor.getTimestamps(tableRef, rows, batchHint, executor, executorThreads); timestamps.addAll(batch); rangeStart = RangeRequests.nextLexicographicName(Iterables.getLast(rows)); } executor.shutdown(); }
@Test public void getTimestampsForGivenRows() { String expected = "SELECT key, column1, column2 FROM \"foo__bar\"" + " WHERE key = ? LIMIT 100;"; int executorThreads = AtlasDbConstants.DEFAULT_SWEEP_CASSANDRA_READ_THREADS; executor.getTimestamps(TABLE_REF, ImmutableList.of(ROW, END_ROW), LIMIT, PTExecutors.newFixedThreadPool(executorThreads), executorThreads); verify(queryExecutor).prepare(argThat(byteBufferMatcher(expected)), eq(ROW), any()); verify(queryExecutor).executePrepared(eq(1), eq(ImmutableList.of(ByteBuffer.wrap(ROW)))); verify(queryExecutor).executePrepared(eq(1), eq(ImmutableList.of(ByteBuffer.wrap(END_ROW)))); }
/** * The complexity in this method is due to a desire to avoid scanning over large numbers of tombstoned rows. * <p> * Details: An unbounded CQL query will scan over an unbounded number of tombstoned rows in an attempt to find * either the end of the table, or the desired number of results. This can lead to timeouts if there are large * segments of tombstoned rows (which can happen with THOROUGH sweep). Thrift, on the other hand, will actually * return the tombstoned rows (they will just be empty KeySlices), which allows us to page over them. So, we first * execute a thrift range scan to determine an upper bound on the CQL range scan, and page over the data until we * find some live cells. * <p> * This is a hack (and a perf hit) until we have a better solution to avoid scanning massive numbers of * tombstones (ideally this will involve catching some exception and reducing the range appropriately). */ private void fetchBatchOfTimestampsBeginningAtStartRow() { byte[] rangeStart = startRowInclusive; Integer executorThreads = config.sweepReadThreads(); ExecutorService executor = PTExecutors.newFixedThreadPool(executorThreads); while (timestamps.isEmpty()) { List<byte[]> rows = getRows(rangeStart); if (rows.isEmpty()) { return; } // Note that both ends of this range are *inclusive* List<CellWithTimestamp> batch = cqlExecutor.getTimestamps(tableRef, rows, batchHint, executor, executorThreads); timestamps.addAll(batch); rangeStart = RangeRequests.nextLexicographicName(Iterables.getLast(rows)); } executor.shutdown(); }