private static PartitionData newPartitionData(PartitionSpec spec) { return new PartitionData(spec.partitionType()); }
private Evaluator evaluator() { if (lazyEvaluator == null) { this.lazyEvaluator = new Evaluator(reader.spec().partitionType(), partFilter); } return lazyEvaluator; }
PartitionSummary(PartitionSpec spec) { this.javaClasses = spec.javaClasses(); this.fields = new PartitionFieldStats[javaClasses.length]; List<Types.NestedField> partitionFields = spec.partitionType().fields(); for (int i = 0; i < fields.length; i += 1) { this.fields[i] = new PartitionFieldStats<>(partitionFields.get(i).type()); } }
public InclusiveManifestEvaluator(PartitionSpec spec, Expression rowFilter) { this.struct = spec.partitionType(); this.expr = Binder.bind(struct, rewriteNot(Projections.inclusive(spec).project(rowFilter))); }
ManifestWriter(PartitionSpec spec, OutputFile file, long snapshotId) { this.location = file.location(); this.file = file; this.specId = spec.specId(); this.writer = newAppender(FileFormat.AVRO, spec, file); this.snapshotId = snapshotId; this.reused = new ManifestEntry(spec.partitionType()); this.stats = new PartitionSummary(spec); }
CloseableIterable<ManifestEntry> entries(Collection<String> columns) { if (entries != null) { // if this reader is an in-memory list or if the entries have been cached, return the list. return CloseableIterable.withNoopClose(entries); } FileFormat format = FileFormat.fromFileName(file.location()); Preconditions.checkArgument(format != null, "Unable to determine format of manifest: " + file); Schema schema = ManifestEntry.projectSchema(spec.partitionType(), columns); switch (format) { case AVRO: AvroIterable<ManifestEntry> reader = Avro.read(file) .project(schema) .rename("manifest_entry", ManifestEntry.class.getName()) .rename("partition", PartitionData.class.getName()) .rename("r102", PartitionData.class.getName()) .rename("data_file", GenericDataFile.class.getName()) .rename("r2", GenericDataFile.class.getName()) .reuseContainers() .build(); addCloseable(reader); return reader; default: throw new UnsupportedOperationException("Invalid format for manifest file: " + format); } }
private static <D> FileAppender<D> newAppender(FileFormat format, PartitionSpec spec, OutputFile file) { Schema manifestSchema = ManifestEntry.getSchema(spec.partitionType()); try { switch (format) { case AVRO: return Avro.write(file) .schema(manifestSchema) .named("manifest_entry") .meta("schema", SchemaParser.toJson(spec.schema())) .meta("partition-spec", PartitionSpecParser.toJsonFields(spec)) .meta("partition-spec-id", String.valueOf(spec.specId())) .build(); default: throw new IllegalArgumentException("Unsupported format: " + format); } } catch (IOException e) { throw new RuntimeIOException(e, "Failed to create manifest writer for path: " + file); } } }
@Override @SuppressWarnings("unchecked") public <T> Expression predicate(BoundPredicate<T> pred) { // Get the strict projection of this predicate in partition data, then use it to determine // whether to return the original predicate. The strict projection returns true iff the // original predicate would have returned true, so the predicate can be eliminated if the // strict projection evaluates to true. // // If there is no strict projection or if it evaluates to false, then return the predicate. PartitionField part = spec.getFieldBySourceId(pred.ref().fieldId()); if (part == null) { return pred; // not associated inclusive a partition field, can't be evaluated } UnboundPredicate<?> strictProjection = ((Transform<T, ?>) part.transform()) .projectStrict(part.name(), pred); if (strictProjection != null) { Expression bound = strictProjection.bind(spec.partitionType()); if (bound instanceof BoundPredicate) { // the predicate methods will evaluate and return alwaysTrue or alwaysFalse return super.predicate((BoundPredicate<?>) bound); } return bound; // use the non-predicate residual (e.g. alwaysTrue) } // if the predicate could not be projected, it must be in the residual return pred; }
@Override public List<ManifestFile> apply(TableMetadata base) { if (validateAddedFiles) { PartitionSpec spec = writeSpec(); Expression rowFilter = rowFilter(); Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter); Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr); Expression strictExpr = Projections.strict(spec).project(rowFilter); Evaluator strict = new Evaluator(spec.partitionType(), strictExpr); StrictMetricsEvaluator metrics = new StrictMetricsEvaluator( base.schema(), rowFilter); for (DataFile file : addedFiles()) { // the real test is that the strict or metrics test matches the file, indicating that all // records in the file match the filter. inclusive is used to avoid testing the metrics, // which is more complicated ValidationException.check( inclusive.eval(file.partition()) && (strict.eval(file.partition()) || metrics.eval(file)), "Cannot append file with rows that do not match filter: %s: %s", rowFilter, file.path()); } } return super.apply(base); } }
.inclusive(reader.spec()) .project(deleteExpression); Evaluator inclusive = new Evaluator(reader.spec().partitionType(), inclusiveExpr); Evaluator strict = new Evaluator(reader.spec().partitionType(), strictExpr);