private static Set<Integer> identitySourceIds(PartitionSpec spec) { Set<Integer> sourceIds = Sets.newHashSet(); List<PartitionField> fields = spec.fields(); for (int i = 0; i < fields.size(); i += 1) { PartitionField field = fields.get(i); if ("identity".equals(field.transform().toString())) { sourceIds.add(field.sourceId()); } } return sourceIds; }
@Override public Table create(Schema schema, String tableIdentifier) { return create(schema, PartitionSpec.unpartitioned(), tableIdentifier); }
private static PartitionSpec identitySpec(Schema schema, List<String> partitionNames) { if (partitionNames == null || partitionNames.isEmpty()) { return null; } PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); for (String partitionName : partitionNames) { builder.identity(partitionName); } return builder.build(); }
public PartitionSpec build() { PartitionSpec spec = new PartitionSpec(schema, specId, fields); checkCompatibility(spec, schema); return spec; } }
public String partitionToPath(StructLike data) { StringBuilder sb = new StringBuilder(); Class<?>[] javaClasses = javaClasses(); for (int i = 0; i < javaClasses.length; i += 1) { PartitionField field = fields[i]; String valueString = field.transform().toHumanString(get(data, i, javaClasses[i])); if (i > 0) { sb.append("/"); } sb.append(field.name()).append("=").append(escape(valueString)); } return sb.toString(); }
public TableMetadata updatePartitionSpec(PartitionSpec partitionSpec) { PartitionSpec.checkCompatibility(partitionSpec, schema); // if the spec already exists, use the same ID. otherwise, use 1 more than the highest ID. int newDefaultSpecId = INITIAL_SPEC_ID; for (PartitionSpec spec : specs) { if (partitionSpec.compatibleWith(spec)) { newDefaultSpecId = spec.specId(); break; } else if (newDefaultSpecId <= spec.specId()) { newDefaultSpecId = spec.specId() + 1; } } Preconditions.checkArgument(defaultSpecId != newDefaultSpecId, "Cannot set default partition spec to the current default"); ImmutableList.Builder<PartitionSpec> builder = ImmutableList.<PartitionSpec>builder() .addAll(specs); if (!specsById.containsKey(newDefaultSpecId)) { // get a fresh spec to ensure the spec ID is set to the new default builder.add(freshSpec(newDefaultSpecId, schema, partitionSpec)); } return new TableMetadata(ops, null, location, System.currentTimeMillis(), lastColumnId, schema, newDefaultSpecId, builder.build(), properties, currentSnapshotId, snapshots, snapshotLog); }
@Test public void testReplaceWithNewPartitionSpec() { PartitionSpec newSpec = PartitionSpec.unpartitioned(); Snapshot start = table.currentSnapshot(); Schema schema = table.schema(); table.newAppend() .appendFile(FILE_A) .commit(); Assert.assertEquals("Version should be 1", 1L, (long) version()); validateSnapshot(start, table.currentSnapshot(), FILE_A); Transaction replace = TestTables.beginReplace(tableDir, "test", table.schema(), newSpec); replace.commitTransaction(); table.refresh(); Assert.assertEquals("Version should be 2", 2L, (long) version()); Assert.assertNull("Table should not have a current snapshot", table.currentSnapshot()); Assert.assertEquals("Schema should use new schema, not compatible with previous", schema.asStruct(), table.schema().asStruct()); Assert.assertEquals("Table should have new unpartitioned spec", 0, table.spec().fields().size()); }
private static PartitionSpec freshSpec(int specId, Schema schema, PartitionSpec partitionSpec) { PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(schema) .withSpecId(specId); for (PartitionField field : partitionSpec.fields()) { // look up the name of the source field in the old schema to get the new schema's id String sourceName = partitionSpec.schema().findColumnName(field.sourceId()); specBuilder.add( schema.findField(sourceName).fieldId(), field.name(), field.transform().toString()); } return specBuilder.build(); }
private static PartitionData copyPartitionData(PartitionSpec spec, StructLike partitionData, PartitionData reuse) { PartitionData data = reuse; if (data == null) { data = newPartitionData(spec); } Class<?>[] javaClasses = spec.javaClasses(); List<PartitionField> fields = spec.fields(); for (int i = 0; i < fields.size(); i += 1) { data.set(i, partitionData.get(i, javaClasses[i])); } return data; }
); PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .identity("id") .build(); BoundPredicate<?> bound = assertAndUnwrap(predicate.bind(spec.schema().asStruct()));
public static TableMetadata newTableMetadata(TableOperations ops, Schema schema, PartitionSpec spec, String location, Map<String, String> properties) { // reassign all column ids to ensure consistency AtomicInteger lastColumnId = new AtomicInteger(0); Schema freshSchema = TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet); // rebuild the partition spec using the new column ids PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(freshSchema) .withSpecId(INITIAL_SPEC_ID); for (PartitionField field : spec.fields()) { // look up the name of the source field in the old schema to get the new schema's id String sourceName = schema.findColumnName(field.sourceId()); specBuilder.add( freshSchema.findField(sourceName).fieldId(), field.name(), field.transform().toString()); } PartitionSpec freshSpec = specBuilder.build(); return new TableMetadata(ops, null, location, System.currentTimeMillis(), lastColumnId.get(), freshSchema, INITIAL_SPEC_ID, ImmutableList.of(freshSpec), ImmutableMap.copyOf(properties), -1, ImmutableList.of(), ImmutableList.of()); }
@SuppressWarnings("unchecked") PartitionKey(PartitionSpec spec) { this.spec = spec; List<PartitionField> fields = spec.fields(); this.size = fields.size(); this.partitionTuple = new Object[size]; this.transforms = new Transform[size]; this.accessors = (Accessor<InternalRow>[]) Array.newInstance(Accessor.class, size); Schema schema = spec.schema(); Map<Integer, Accessor<InternalRow>> accessors = buildAccessors(schema); for (int i = 0; i < size; i += 1) { PartitionField field = fields.get(i); Accessor<InternalRow> accessor = accessors.get(field.sourceId()); if (accessor == null) { throw new RuntimeException( "Cannot build accessor for field: " + schema.findField(field.sourceId())); } this.accessors[i] = accessor; this.transforms[i] = field.transform(); } }
@Test @SuppressWarnings("unchecked") public void testPartitionPath() { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .hour("ts") .bucket("id", 10) .build(); Transform hour = spec.getFieldBySourceId(3).transform(); Transform bucket = spec.getFieldBySourceId(1).transform(); Literal<Long> ts = Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.withoutZone()); Object tsHour = hour.apply(ts.value()); Object idBucket = bucket.apply(1); Row partition = Row.of(tsHour, idBucket); Assert.assertEquals("Should produce expected partition key", "ts_hour=2017-12-01-10/id_bucket=" + idBucket, spec.partitionToPath(partition)); }
); PartitionSpec spec = PartitionSpec.builderFor(schema).identity("x").withSpecId(6).build(); new GenericManifestFile(localInput("file:/tmp/manfiest.1.avro"), spec.specId()))); long currentSnapshotId = System.currentTimeMillis(); Snapshot currentSnapshot = new BaseSnapshot( ops, currentSnapshotId, previousSnapshotId, currentSnapshotId, ImmutableList.of( new GenericManifestFile(localInput("file:/tmp/manfiest.2.avro"), spec.specId()))); expected.schema().asStruct(), metadata.schema().asStruct()); Assert.assertEquals("Partition spec should be the default", expected.spec().toString(), metadata.spec().toString()); Assert.assertEquals("Default spec ID should default to TableMetadata.INITIAL_SPEC_ID", TableMetadata.INITIAL_SPEC_ID, metadata.defaultSpecId()); 1, metadata.specs().size()); Assert.assertTrue("PartitionSpec should contain the spec", metadata.specs().get(0).compatibleWith(spec)); Assert.assertEquals("PartitionSpec should have ID TableMetadata.INITIAL_SPEC_ID", TableMetadata.INITIAL_SPEC_ID, metadata.specs().get(0).specId()); Assert.assertEquals("Properties should match", expected.properties(), metadata.properties());
private ManifestReader(PartitionSpec spec, Iterable<ManifestEntry> entries) { this.file = null; this.metadata = ImmutableMap.of(); this.spec = spec; this.schema = spec.schema(); this.entries = entries; }
private static <D> FileAppender<D> newAppender(FileFormat format, PartitionSpec spec, OutputFile file) { Schema manifestSchema = ManifestEntry.getSchema(spec.partitionType()); try { switch (format) { case AVRO: return Avro.write(file) .schema(manifestSchema) .named("manifest_entry") .meta("schema", SchemaParser.toJson(spec.schema())) .meta("partition-spec", PartitionSpecParser.toJsonFields(spec)) .meta("partition-spec-id", String.valueOf(spec.specId())) .build(); default: throw new IllegalArgumentException("Unsupported format: " + format); } } catch (IOException e) { throw new RuntimeIOException(e, "Failed to create manifest writer for path: " + file); } } }
@Test public void testEscapedStrings() { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA) .identity("data") .truncate("data", 10) .build(); Assert.assertEquals("Should escape / as %2F", "data=a%2Fb%2Fc%2Fd/data_trunc=a%2Fb%2Fc%2Fd", spec.partitionToPath(Row.of("a/b/c/d", "a/b/c/d"))); } }
); PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(5).build(); new GenericManifestFile(localInput("file:/tmp/manfiest.1.avro"), spec.specId()))); long currentSnapshotId = System.currentTimeMillis(); Snapshot currentSnapshot = new BaseSnapshot( null, currentSnapshotId, previousSnapshotId, currentSnapshotId, ImmutableList.of( new GenericManifestFile(localInput("file:/tmp/manfiest.2.avro"), spec.specId()))); expected.schema().asStruct(), metadata.schema().asStruct()); Assert.assertEquals("Partition spec should match", expected.spec().toString(), metadata.spec().toString()); Assert.assertEquals("Default spec ID should match", expected.defaultSpecId(), metadata.defaultSpecId());
); PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(5).build(); new GenericManifestFile(localInput("file:/tmp/manfiest.1.avro"), spec.specId()))); long currentSnapshotId = System.currentTimeMillis(); Snapshot currentSnapshot = new BaseSnapshot( ops, currentSnapshotId, previousSnapshotId, currentSnapshotId, ImmutableList.of( new GenericManifestFile(localInput("file:/tmp/manfiest.2.avro"), spec.specId())));
PartitionSummary(PartitionSpec spec) { this.javaClasses = spec.javaClasses(); this.fields = new PartitionFieldStats[javaClasses.length]; List<Types.NestedField> partitionFields = spec.partitionType().fields(); for (int i = 0; i < fields.length; i += 1) { this.fields[i] = new PartitionFieldStats<>(partitionFields.get(i).type()); } }