/** * Split this query into multiple queries that logically combine into this query. This is intended * to be used by map reduce style frameworks like Beam to split a query across multiple workers. * * <p>Expected Usage: * * <pre>{@code * List<ByteString> splitPoints = ...; * List<Query> queryShards = myQuery.shard(splitPoints); * List<ApiFuture<List<Row>>> futures = new ArrayList(); * for (Query subQuery : queryShards) { * futures.add(dataClient.readRowsCallable().all().futureCall(subQuery)); * } * List<List<Row>> results = ApiFutures.allAsList(futures).get(); * }</pre> */ public List<Query> shard(SortedSet<ByteString> splitPoints) { Preconditions.checkState(builder.getRowsLimit() == 0, "Can't shard a query with a row limit"); List<RowSet> shardedRowSets = RowSetUtil.shard(builder.getRows(), splitPoints); List<Query> shards = Lists.newArrayListWithCapacity(shardedRowSets.size()); for (RowSet rowSet : shardedRowSets) { Query queryShard = new Query(tableId); queryShard.builder.mergeFrom(this.builder.build()); queryShard.builder.setRows(rowSet); shards.add(queryShard); } return shards; }
/** * Split this query into multiple queries that can be evenly distributed across Bigtable nodes and * be run in parallel. This method takes the results from {@link * com.google.cloud.bigtable.data.v2.BigtableDataClient#sampleRowKeysAsync(String)} to divide this * query into a set of disjoint queries that logically combine into form this query. * * <p>Expected Usage: * * <pre>{@code * List<KeyOffset> keyOffsets = dataClient.sampleRowKeysAsync("my-table").get(); * List<Query> queryShards = myQuery.shard(keyOffsets); * List<ApiFuture<List<Row>>> futures = new ArrayList(); * for (Query subQuery : queryShards) { * futures.add(dataClient.readRowsCallable().all().futureCall(subQuery)); * } * List<List<Row>> results = ApiFutures.allAsList(futures).get(); * }</pre> */ public List<Query> shard(List<KeyOffset> sampledRowKeys) { Preconditions.checkState(builder.getRowsLimit() == 0, "Can't shard query with row limits"); ImmutableSortedSet.Builder<ByteString> splitPoints = ImmutableSortedSet.orderedBy(ByteStringComparator.INSTANCE); for (KeyOffset keyOffset : sampledRowKeys) { if (!keyOffset.getKey().isEmpty()) { splitPoints.add(keyOffset.getKey()); } } return shard(splitPoints.build()); }
/** * Split this query into multiple queries that logically combine into this query. This is intended * to be used by map reduce style frameworks like Beam to split a query across multiple workers. * * <p>Expected Usage: * * <pre>{@code * List<ByteString> splitPoints = ...; * List<Query> queryShards = myQuery.shard(splitPoints); * List<ApiFuture<List<Row>>> futures = new ArrayList(); * for (Query subQuery : queryShards) { * futures.add(dataClient.readRowsCallable().all().futureCall(subQuery)); * } * List<List<Row>> results = ApiFutures.allAsList(futures).get(); * }</pre> */ public List<Query> shard(SortedSet<ByteString> splitPoints) { Preconditions.checkState(builder.getRowsLimit() == 0, "Can't shard a query with a row limit"); List<RowSet> shardedRowSets = RowSetUtil.shard(builder.getRows(), splitPoints); List<Query> shards = Lists.newArrayListWithCapacity(shardedRowSets.size()); for (RowSet rowSet : shardedRowSets) { Query queryShard = new Query(tableId); queryShard.builder.mergeFrom(this.builder.build()); queryShard.builder.setRows(rowSet); shards.add(queryShard); } return shards; }
/** * Split this query into multiple queries that can be evenly distributed across Bigtable nodes and * be run in parallel. This method takes the results from {@link * com.google.cloud.bigtable.data.v2.BigtableDataClient#sampleRowKeysAsync(String)} to divide this * query into a set of disjoint queries that logically combine into form this query. * * <p>Expected Usage: * * <pre>{@code * List<KeyOffset> keyOffsets = dataClient.sampleRowKeysAsync("my-table").get(); * List<Query> queryShards = myQuery.shard(keyOffsets); * List<ApiFuture<List<Row>>> futures = new ArrayList(); * for (Query subQuery : queryShards) { * futures.add(dataClient.readRowsCallable().all().futureCall(subQuery)); * } * List<List<Row>> results = ApiFutures.allAsList(futures).get(); * }</pre> */ public List<Query> shard(List<KeyOffset> sampledRowKeys) { Preconditions.checkState(builder.getRowsLimit() == 0, "Can't shard query with row limits"); ImmutableSortedSet.Builder<ByteString> splitPoints = ImmutableSortedSet.orderedBy(ByteStringComparator.INSTANCE); for (KeyOffset keyOffset : sampledRowKeys) { if (!keyOffset.getKey().isEmpty()) { splitPoints.add(keyOffset.getKey()); } } return shard(splitPoints.build()); }