private static PartitionSpec identitySpec(Schema schema, List<String> partitionNames) { if (partitionNames == null || partitionNames.isEmpty()) { return null; } PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); for (String partitionName : partitionNames) { builder.identity(partitionName); } return builder.build(); }
public static PartitionSpec fromJson(Schema schema, JsonNode json) { Preconditions.checkArgument(json.isObject(), "Cannot parse spec from non-object: %s", json); int specId = JsonUtil.getInt(SPEC_ID, json); PartitionSpec.Builder builder = PartitionSpec.builderFor(schema).withSpecId(specId); buildFromJsonFields(builder, json.get(FIELDS)); return builder.build(); }
static PartitionSpec fromJsonFields(Schema schema, int specId, JsonNode json) { PartitionSpec.Builder builder = PartitionSpec.builderFor(schema).withSpecId(specId); buildFromJsonFields(builder, json); return builder.build(); }
public static TableMetadata newTableMetadata(TableOperations ops, Schema schema, PartitionSpec spec, String location, Map<String, String> properties) { // reassign all column ids to ensure consistency AtomicInteger lastColumnId = new AtomicInteger(0); Schema freshSchema = TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet); // rebuild the partition spec using the new column ids PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(freshSchema) .withSpecId(INITIAL_SPEC_ID); for (PartitionField field : spec.fields()) { // look up the name of the source field in the old schema to get the new schema's id String sourceName = schema.findColumnName(field.sourceId()); specBuilder.add( freshSchema.findField(sourceName).fieldId(), field.name(), field.transform().toString()); } PartitionSpec freshSpec = specBuilder.build(); return new TableMetadata(ops, null, location, System.currentTimeMillis(), lastColumnId.get(), freshSchema, INITIAL_SPEC_ID, ImmutableList.of(freshSpec), ImmutableMap.copyOf(properties), -1, ImmutableList.of(), ImmutableList.of()); }
private static PartitionSpec freshSpec(int specId, Schema schema, PartitionSpec partitionSpec) { PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(schema) .withSpecId(specId); for (PartitionField field : partitionSpec.fields()) { // look up the name of the source field in the old schema to get the new schema's id String sourceName = partitionSpec.schema().findColumnName(field.sourceId()); specBuilder.add( schema.findField(sourceName).fieldId(), field.name(), field.transform().toString()); } return specBuilder.build(); }
@Test public void testFilterFilesPartitionedTable() { PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); Table table = TestTables.create(tableDir, "test", schema, spec); testFilterFiles(table); }
@Test public void testEscapedStrings() { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .identity("data") .truncate("data", 10) .build(); Assert.assertEquals("Should escape / as %2F", "data=a%2Fb%2Fc%2Fd/data_trunc=a%2Fb%2Fc%2Fd", spec.partitionToPath(Row.of("a/b/c/d", "a/b/c/d"))); } }
@Test public void testCreateTable() throws Exception { PartitionSpec expectedSpec = PartitionSpec.builderFor(TABLE_SCHEMA) .bucket("data", 16) .build(); Assert.assertEquals("Table schema should match schema with reassigned ids", TABLE_SCHEMA.asStruct(), table.schema().asStruct()); Assert.assertEquals("Table partition spec should match with reassigned ids", expectedSpec, table.spec()); List<FileScanTask> tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should not create any scan tasks", 0, tasks.size()); Assert.assertTrue("Table location should exist", tableDir.exists()); Assert.assertTrue("Should create metadata folder", metadataDir.exists() && metadataDir.isDirectory()); Assert.assertTrue("Should create v1 metadata", version(1).exists() && version(1).isFile()); Assert.assertFalse("Should not create v2 or newer verions", version(2).exists()); Assert.assertTrue("Should create version hint file", versionHintFile.exists()); Assert.assertEquals("Should write the current version to the hint file", 1, readVersionHint()); List<File> manifests = listManifestFiles(); Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size()); }
); PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(5).build();
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("data").build(); Table table = tables.create(SCHEMA, spec, location.toString());
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("data").build(); Table table = tables.create(SCHEMA, spec, location.toString()); table.updateProperties()
PartitionSpec newSpec = PartitionSpec.builderFor(base.schema()) .bucket("data", 16) .bucket("id", 4)
); PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(5).build();
PartitionSpec newSpec = PartitionSpec.builderFor(base.schema()) .bucket("data", 16) .bucket("id", 4)
@Test @SuppressWarnings("unchecked") public void testPartitionPath() { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .hour("ts") .bucket("id", 10) .build(); Transform hour = spec.getFieldBySourceId(3).transform(); Transform bucket = spec.getFieldBySourceId(1).transform(); Literal<Long> ts = Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.withoutZone()); Object tsHour = hour.apply(ts.value()); Object idBucket = bucket.apply(1); Row partition = Row.of(tsHour, idBucket); Assert.assertEquals("Should produce expected partition key", "ts_hour=2017-12-01-10/id_bucket=" + idBucket, spec.partitionToPath(partition)); }
); PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .identity("id") .build();
); PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .identity("id") .build();
); PartitionSpec spec = PartitionSpec.builderFor(schema) .identity("dateint") .build();
required(4, "dateint", Types.IntegerType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema) .identity("dateint") .build();
PartitionSpec spec = PartitionSpec.builderFor(schema) .identity("i") .identity("l")