@Override public String manifestListLocation() { return manifestList != null ? manifestList.location() : null; }
GenericManifestFile(InputFile file, int specId) { this.avroSchema = AVRO_SCHEMA; this.file = file; this.manifestPath = file.location(); this.length = null; // lazily loaded from file this.specId = specId; this.snapshotId = null; this.addedFilesCount = null; this.existingFilesCount = null; this.deletedFilesCount = null; this.partitions = null; this.fromProjectionPos = null; }
private static ParquetFileReader newReader(InputFile file, ParquetReadOptions options) { try { return ParquetFileReader.open(ParquetIO.file(file), options); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to open Parquet file: %s", file.location()); } } }
public Builder withInputFile(InputFile file) { if (file instanceof HadoopInputFile) { return withStatus(((HadoopInputFile) file).getStat()); } this.filePath = file.location(); this.fileSizeInBytes = file.getLength(); return this; }
public static TableMetadata read(TableOperations ops, InputFile file) { try { InputStream is = file.location().endsWith("gz") ? new GzipCompressorInputStream(file.newStream()): file.newStream(); return fromJson(ops, file, JsonUtil.mapper().readValue(is, JsonNode.class)); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to read file: %s", file); } }
public OrcIterator build() { Preconditions.checkNotNull(schema, "Schema is required"); try { Path path = new Path(file.location()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); ColumnIdMap columnIds = new ColumnIdMap(); TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds); Reader.Options options = reader.options(); if (start != null) { options.range(start, length); } options.schema(orcSchema); return new OrcIterator(path, orcSchema, reader.rows(options)); } catch (IOException e) { throw new RuntimeException("Can't open " + file.location(), e); } } }
public static DataFile fromParquetInputFile(InputFile file, PartitionData partition, Metrics metrics) { if (file instanceof HadoopInputFile) { return fromParquetStat(((HadoopInputFile) file).getStat(), partition, metrics); } String location = file.location(); FileFormat format = FileFormat.PARQUET; return new GenericDataFile( location, format, partition, file.getLength(), DEFAULT_BLOCK_SIZE, metrics); }
@Override public List<ManifestFile> manifests() { if (manifests == null) { // if manifests isn't set, then the snapshotFile is set and should be read to get the list try (CloseableIterable<ManifestFile> files = Avro.read(manifestList) .rename("manifest_file", GenericManifestFile.class.getName()) .rename("partitions", GenericPartitionFieldSummary.class.getName()) .rename("r508", GenericPartitionFieldSummary.class.getName()) .project(ManifestFile.schema()) .reuseContainers(false) .build()) { this.manifests = Lists.newLinkedList(files); } catch (IOException e) { throw new RuntimeIOException(e, "Cannot read snapshot file: %s", manifestList.location()); } } return manifests; }
public static DataFile fromInputFile(InputFile file, PartitionData partition, long rowCount) { if (file instanceof HadoopInputFile) { return fromStat(((HadoopInputFile) file).getStat(), partition, rowCount); } String location = file.location(); FileFormat format = FileFormat.fromFileName(location); return new GenericDataFile( location, format, partition, rowCount, file.getLength(), DEFAULT_BLOCK_SIZE); }
public static DataFile fromInputFile(InputFile file, PartitionData partition, Metrics metrics) { if (file instanceof HadoopInputFile) { return fromStat(((HadoopInputFile) file).getStat(), partition, metrics); } String location = file.location(); FileFormat format = FileFormat.fromFileName(location); return new GenericDataFile( location, format, partition, file.getLength(), DEFAULT_BLOCK_SIZE, metrics); }
public static DataFile fromInputFile(InputFile file, long rowCount) { if (file instanceof HadoopInputFile) { return fromStat(((HadoopInputFile) file).getStat(), rowCount); } String location = file.location(); FileFormat format = FileFormat.fromFileName(location); return new GenericDataFile(location, format, rowCount, file.getLength(), DEFAULT_BLOCK_SIZE); }
CloseableIterable<ManifestEntry> entries(Collection<String> columns) { if (entries != null) { // if this reader is an in-memory list or if the entries have been cached, return the list. return CloseableIterable.withNoopClose(entries); } FileFormat format = FileFormat.fromFileName(file.location()); Preconditions.checkArgument(format != null, "Unable to determine format of manifest: " + file); Schema schema = ManifestEntry.projectSchema(spec.partitionType(), columns); switch (format) { case AVRO: AvroIterable<ManifestEntry> reader = Avro.read(file) .project(schema) .rename("manifest_entry", ManifestEntry.class.getName()) .rename("partition", PartitionData.class.getName()) .rename("r102", PartitionData.class.getName()) .rename("data_file", GenericDataFile.class.getName()) .rename("r2", GenericDataFile.class.getName()) .reuseContainers() .build(); addCloseable(reader); return reader; default: throw new UnsupportedOperationException("Invalid format for manifest file: " + format); } }