public static PartitionDesign getPartitionDesign(KuduTable table) { Schema schema = table.getSchema(); PartitionDesign partitionDesign = new PartitionDesign(); PartitionSchema partitionSchema = table.getPartitionSchema(); List<HashPartitionDefinition> hashPartitions = partitionSchema.getHashBucketSchemas().stream() .map(hashBucketSchema -> { HashPartitionDefinition hash = new HashPartitionDefinition(); List<String> cols = hashBucketSchema.getColumnIds().stream() .map(idx -> schema.getColumnByIndex(idx).getName()).collect(toImmutableList()); hash.setColumns(cols); hash.setBuckets(hashBucketSchema.getNumBuckets()); return hash; }).collect(toImmutableList()); partitionDesign.setHash(hashPartitions); List<Integer> rangeColumns = partitionSchema.getRangeSchema().getColumns(); if (!rangeColumns.isEmpty()) { RangePartitionDefinition definition = new RangePartitionDefinition(); definition.setColumns(rangeColumns.stream() .map(i -> schema.getColumns().get(i).getName()) .collect(ImmutableList.toImmutableList())); partitionDesign.setRange(definition); } return partitionDesign; }
Schema schema = scanner.table.getSchema(); PartitionSchema partitionSchema = scanner.table.getPartitionSchema(); PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); Map<String, KuduPredicate> predicates = scanner.predicates; if (partitionSchema.isSimpleRangePartitioning()) { if (Bytes.memcmp(rangeLowerBound, scanner.lowerBoundPrimaryKey) < 0) { rangeLowerBound = scanner.lowerBoundPrimaryKey; List<BitSet> hashComponents = new ArrayList<>(partitionSchema.getHashBucketSchemas().size()); for (PartitionSchema.HashBucketSchema hashSchema : partitionSchema.getHashBucketSchemas()) { hashComponents.add(pruneHashComponent(schema, hashSchema, predicates)); constrainedIndex = partitionSchema.getHashBucketSchemas().size(); } else { int numBuckets = partitionSchema.getHashBucketSchemas().get(i - 1).getNumBuckets(); BitSet hashBuckets = hashComponents.get(i - 1); if (hashBuckets.nextClearBit(0) < numBuckets) {
private static RangeBoundValue buildRangePartitionBound(KuduTable table, byte[] rangeKey) { if (rangeKey.length == 0) { return null; } else { Schema schema = table.getSchema(); PartitionSchema partitionSchema = table.getPartitionSchema(); PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); List<Integer> rangeColumns = rangeSchema.getColumns(); final int numColumns = rangeColumns.size(); PartialRow bound = KeyEncoderAccessor.decodeRangePartitionKey(schema, partitionSchema, rangeKey); ArrayList<Object> list = new ArrayList<>(); for (int i = 0; i < numColumns; i++) { Object obj = toValue(schema, bound, rangeColumns.get(i)); list.add(obj); } return new RangeBoundValue(list); } }
/** * Decodes a partition key into a list of hash buckets and range key * * @param schema the schema of the table * @param partitionSchema the partition schema of the table * @param key the encoded partition key * @return the decoded buckets and range key */ public static Pair<List<Integer>, PartialRow> decodePartitionKey(Schema schema, PartitionSchema partitionSchema, byte[] key) { ByteBuffer buf = ByteBuffer.wrap(key); buf.order(ByteOrder.BIG_ENDIAN); List<Integer> buckets = new ArrayList<>(); for (HashBucketSchema hashSchema : partitionSchema.getHashBucketSchemas()) { if (buf.hasRemaining()) { buckets.add(buf.getInt()); } else { buckets.add(0); } } return new Pair<>(buckets, decodeRangePartitionKey(schema, partitionSchema, buf)); }
/** * Builds the default partition schema for a schema. * @param schema the schema * @return a default partition schema */ private PartitionSchema defaultPartitionSchema(Schema schema) { List<Integer> columnIds = new ArrayList<>(); for (int i = 0; i < schema.getPrimaryKeyColumnCount(); i++) { // Schema does not provide a way to lookup a column ID by column index, // so instead we assume that the IDs for the primary key columns match // their respective index, which holds up when the schema is created // with buildSchema. columnIds.add(i); } return new PartitionSchema( new PartitionSchema.RangeSchema(columnIds), ImmutableList.<PartitionSchema.HashBucketSchema>of(), schema); }
/** * Test creating and deleting a table through a KuduClient. */ @Test(timeout = 100000) public void testCreateDeleteTable() throws Exception { // Check that we can create a table. client.createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions()); assertFalse(client.getTablesList().getTablesList().isEmpty()); assertTrue(client.getTablesList().getTablesList().contains(TABLE_NAME)); // Check that we can delete it. client.deleteTable(TABLE_NAME); assertFalse(client.getTablesList().getTablesList().contains(TABLE_NAME)); // Check that we can re-recreate it, with a different schema. List<ColumnSchema> columns = new ArrayList<>(basicSchema.getColumns()); columns.add(new ColumnSchema.ColumnSchemaBuilder("one more", Type.STRING).build()); Schema newSchema = new Schema(columns); client.createTable(TABLE_NAME, newSchema, getBasicCreateTableOptions()); // Check that we can open a table and see that it has the new schema. KuduTable table = client.openTable(TABLE_NAME); assertEquals(newSchema.getColumnCount(), table.getSchema().getColumnCount()); assertTrue(table.getPartitionSchema().isSimpleRangePartitioning()); // Check that the block size parameter we specified in the schema is respected. assertEquals(4096, newSchema.getColumn("column3_s").getDesiredBlockSize()); assertEquals(ColumnSchema.Encoding.DICT_ENCODING, newSchema.getColumn("column3_s").getEncoding()); assertEquals(ColumnSchema.CompressionAlgorithm.LZ4, newSchema.getColumn("column3_s").getCompressionAlgorithm()); }
@Override public byte[] partitionKey() { return this.getTable().getPartitionSchema().encodePartitionKey(row); }
private static List<RangePartition> getRangePartitionList(KuduTable table, long deadline) { List<RangePartition> rangePartitions = new ArrayList<>(); if (!table.getPartitionSchema().getRangeSchema().getColumns().isEmpty()) { try { Iterator var4 = table.getTabletsLocations(deadline).iterator(); while (var4.hasNext()) { LocatedTablet tablet = (LocatedTablet) var4.next(); Partition partition = tablet.getPartition(); if (Iterators.all(partition.getHashBuckets().iterator(), Predicates.equalTo(0))) { RangePartition rangePartition = buildRangePartition(table, partition); rangePartitions.add(rangePartition); } } } catch (Exception e) { throw new RuntimeException(e); } } return rangePartitions; }
/** * Factory method for creating a {@code PartitionSchema} from a protobuf message. * * @param pb the partition schema protobuf message * @return a partition instance */ static PartitionSchema pbToPartitionSchema(Common.PartitionSchemaPB pb, Schema schema) { List<Integer> rangeColumns = pbToIds(pb.getRangeSchema().getColumnsList()); PartitionSchema.RangeSchema rangeSchema = new PartitionSchema.RangeSchema(rangeColumns); ImmutableList.Builder<PartitionSchema.HashBucketSchema> hashSchemas = ImmutableList.builder(); for (Common.PartitionSchemaPB.HashBucketSchemaPB hashBucketSchemaPB : pb.getHashBucketSchemasList()) { List<Integer> hashColumnIds = pbToIds(hashBucketSchemaPB.getColumnsList()); PartitionSchema.HashBucketSchema hashSchema = new PartitionSchema.HashBucketSchema(hashColumnIds, hashBucketSchemaPB.getNumBuckets(), hashBucketSchemaPB.getSeed()); hashSchemas.add(hashSchema); } return new PartitionSchema(rangeSchema, hashSchemas.build(), schema); }
/** * Encodes the provided row into a partition key according to the partition schema. * * @param row the row to encode * @param partitionSchema the partition schema describing the table's partitioning * @return an encoded partition key */ public static byte[] encodePartitionKey(PartialRow row, PartitionSchema partitionSchema) { ByteVec buf = ByteVec.create(); if (!partitionSchema.getHashBucketSchemas().isEmpty()) { for (final HashBucketSchema hashSchema : partitionSchema.getHashBucketSchemas()) { encodeHashBucket(getHashBucket(row, hashSchema), buf); } } encodeColumns(row, partitionSchema.getRangeSchema().getColumns(), buf); return buf.toArray(); }
/** * Decodes a range partition key into a partial row. * * @param schema the schema of the table * @param partitionSchema the partition schema of the table * @param buf the encoded range partition key * @return the decoded range key */ private static PartialRow decodeRangePartitionKey(Schema schema, PartitionSchema partitionSchema, ByteBuffer buf) { PartialRow row = schema.newPartialRow(); Iterator<Integer> rangeIds = partitionSchema.getRangeSchema().getColumns().iterator(); while (rangeIds.hasNext()) { int idx = schema.getColumnIndex(rangeIds.next()); if (buf.hasRemaining()) { decodeColumn(buf, row, idx, !rangeIds.hasNext()); } else { row.setMin(idx); } } if (buf.hasRemaining()) { throw new IllegalArgumentException("Unable to decode all partition key bytes"); } return row; }
new PartitionSchema(new RangeSchema(ImmutableList.of(0, 1, 2)), ImmutableList.of( new HashBucketSchema(ImmutableList.of(0, 1), 32, 0),
byte[] lowerBound, byte[] upperBound) { if (partitionSchema.getRangeSchema().getColumns().isEmpty() && partitionSchema.getHashBucketSchemas().isEmpty()) { assert lowerBound.length == 0 && upperBound.length == 0; return "<no-partitioning>"; if (partitionSchema.getRangeSchema().getColumns().size() > 0) { if (!hashBuckets.isEmpty()) { sb.append(", "); for (int id : partitionSchema.getRangeSchema().getColumns()) { idxs.add(schema.getColumnIndex(id));
private static List<RangePartition> getRangePartitionList(KuduTable table, long deadline) { List<RangePartition> rangePartitions = new ArrayList<>(); if (!table.getPartitionSchema().getRangeSchema().getColumns().isEmpty()) { try { Iterator var4 = table.getTabletsLocations(deadline).iterator(); while (var4.hasNext()) { LocatedTablet tablet = (LocatedTablet) var4.next(); Partition partition = tablet.getPartition(); if (Iterators.all(partition.getHashBuckets().iterator(), Predicates.equalTo(0))) { RangePartition rangePartition = buildRangePartition(table, partition); rangePartitions.add(rangePartition); } } } catch (Exception e) { throw new RuntimeException(e); } } return rangePartitions; }
public static PartitionDesign getPartitionDesign(KuduTable table) { Schema schema = table.getSchema(); PartitionDesign partitionDesign = new PartitionDesign(); PartitionSchema partitionSchema = table.getPartitionSchema(); List<HashPartitionDefinition> hashPartitions = partitionSchema.getHashBucketSchemas().stream() .map(hashBucketSchema -> { HashPartitionDefinition hash = new HashPartitionDefinition(); List<String> cols = hashBucketSchema.getColumnIds().stream() .map(idx -> schema.getColumnByIndex(idx).getName()).collect(toImmutableList()); hash.setColumns(cols); hash.setBuckets(hashBucketSchema.getNumBuckets()); return hash; }).collect(toImmutableList()); partitionDesign.setHash(hashPartitions); List<Integer> rangeColumns = partitionSchema.getRangeSchema().getColumns(); if (!rangeColumns.isEmpty()) { RangePartitionDefinition definition = new RangePartitionDefinition(); definition.setColumns(rangeColumns.stream() .map(i -> schema.getColumns().get(i).getName()) .collect(ImmutableList.toImmutableList())); partitionDesign.setRange(definition); } return partitionDesign; }
private static List<RangePartition> getRangePartitionList(KuduTable table, long deadline) { List<RangePartition> rangePartitions = new ArrayList(); if (!table.getPartitionSchema().getRangeSchema().getColumns().isEmpty()) { try { Iterator var4 = table.getTabletsLocations(deadline).iterator(); while (var4.hasNext()) { LocatedTablet tablet = (LocatedTablet) var4.next(); Partition partition = tablet.getPartition(); if (Iterators.all(partition.getHashBuckets().iterator(), Predicates.equalTo(0))) { RangePartition rangePartition = buildRangePartition(table, partition); rangePartitions.add(rangePartition); } } } catch (Exception e) { throw new RuntimeException(e); } } return rangePartitions; }
public static PartitionDesign getPartitionDesign(KuduTable table) { Schema schema = table.getSchema(); PartitionDesign partitionDesign = new PartitionDesign(); PartitionSchema partitionSchema = table.getPartitionSchema(); List<HashPartitionDefinition> hashPartitions = partitionSchema.getHashBucketSchemas().stream() .map(hashBucketSchema -> { HashPartitionDefinition hash = new HashPartitionDefinition(); List<String> cols = hashBucketSchema.getColumnIds().stream() .map(idx -> schema.getColumnByIndex(idx).getName()).collect(toImmutableList()); hash.setColumns(cols); hash.setBuckets(hashBucketSchema.getNumBuckets()); return hash; }).collect(toImmutableList()); partitionDesign.setHash(hashPartitions); List<Integer> rangeColumns = partitionSchema.getRangeSchema().getColumns(); if (!rangeColumns.isEmpty()) { RangePartitionDefinition definition = new RangePartitionDefinition(); definition.setColumns(rangeColumns.stream() .map(i -> schema.getColumns().get(i).getName()) .collect(ImmutableList.toImmutableList())); partitionDesign.setRange(definition); } return partitionDesign; }
private static RangeBoundValue buildRangePartitionBound(KuduTable table, byte[] rangeKey) { if (rangeKey.length == 0) { return null; } else { Schema schema = table.getSchema(); PartitionSchema partitionSchema = table.getPartitionSchema(); PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); List<Integer> rangeColumns = rangeSchema.getColumns(); final int numColumns = rangeColumns.size(); PartialRow bound = KeyEncoderAccessor.decodeRangePartitionKey(schema, partitionSchema, rangeKey); ArrayList<Object> list = new ArrayList<>(); for (int i = 0; i < numColumns; i++) { Object obj = toValue(schema, bound, rangeColumns.get(i)); list.add(obj); } return new RangeBoundValue(list); } }
private static RangeBoundValue buildRangePartitionBound(KuduTable table, byte[] rangeKey) throws Exception { if (rangeKey.length == 0) { return null; } else { Schema schema = table.getSchema(); PartitionSchema partitionSchema = table.getPartitionSchema(); PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); List<Integer> rangeColumns = rangeSchema.getColumns(); final int numColumns = rangeColumns.size(); PartialRow bound = KeyEncoderAccessor.decodeRangePartitionKey(schema, partitionSchema, rangeKey); RangeBoundValue value = new RangeBoundValue(); ArrayList<Object> list = new ArrayList<>(); for (int i = 0; i < numColumns; i++) { Object obj = toValue(schema, bound, rangeColumns.get(i)); list.add(obj); } value.setValues(list); return value; } }
Schema schema = table.getSchema(); PartitionSchema partitionSchema = table.getPartitionSchema(); PartitionSchema.RangeSchema rangeSchema = partitionSchema.getRangeSchema(); for (int id : partitionSchema.getRangeSchema().getColumns()) { idxs.add(schema.getColumnIndex(id));