@Override public void write(InternalRow record) { appender.add(record); }
@Override public void addAll(Iterator<ManifestFile> values) { writer.addAll(values); }
@Override public void close() throws IOException { this.closed = true; writer.close(); }
@Override public Metrics metrics() { return writer.metrics(); }
@Override public void close() throws IOException { writer.close(); }
@Override public Metrics metrics() { return writer.metrics(); }
@Override public void add(ManifestFile file) { writer.add(file); }
@Override public void addAll(Iterable<ManifestFile> values) { writer.addAll(values); }
private void closeCurrent() throws IOException { if (currentAppender != null) { currentAppender.close(); // metrics are only valid after the appender is closed Metrics metrics = currentAppender.metrics(); this.currentAppender = null; InputFile inFile = HadoopInputFile.fromPath(currentPath, conf); DataFile dataFile = DataFiles.builder(spec) .withInputFile(inFile) .withPartition(currentKey) .withMetrics(metrics) .build(); completedPartitions.add(currentKey); completedFiles.add(dataFile); } } }
@Override public void abort() throws IOException { FileSystem fs = currentPath.getFileSystem(conf); // clean up files created by this writer Tasks.foreach(completedFiles) .throwFailureWhenFinished() .noRetry() .run(file -> fs.delete(new Path(file.path().toString())), IOException.class); if (currentAppender != null) { currentAppender.close(); this.currentAppender = null; fs.delete(currentPath); } }
default void addAll(Iterator<D> values) { while (values.hasNext()) { add(values.next()); } }
default void addAll(Iterable<D> values) { addAll(values.iterator()); }
@Override public void write(InternalRow row) throws IOException { key.partition(row); if (!key.equals(currentKey)) { closeCurrent(); if (completedPartitions.contains(key)) { // if rows are not correctly grouped, detect and fail the write PartitionKey existingKey = Iterables.find(completedPartitions, key::equals, null); LOG.warn("Duplicate key: {} == {}", existingKey, key); throw new IllegalStateException("Already closed file for partition: " + key.toPath()); } this.currentKey = key.copy(); this.currentPath = outputPathFunc.apply(currentKey); OutputFile file = HadoopOutputFile.fromPath(currentPath, conf); this.currentAppender = factory.newAppender(file, format); } currentAppender.add(row); }
private File writeTestData(Schema schema, int n, int seed) throws IOException { File testFile = temp.newFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)) .schema(schema) .build()) { writer.addAll(RandomData.generate(schema, n, seed)); } return testFile; } }
public void add(ManifestEntry entry) { switch (entry.status()) { case ADDED: addedFiles += 1; break; case EXISTING: existingFiles += 1; break; case DELETED: deletedFiles += 1; break; } stats.update(entry.file().partition()); writer.add(entry); }
.schema(tableSchema) .build()) { writer.addAll(expected);
protected GenericData.Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, GenericData.Record record) throws IOException { File file = temp.newFile(desc + ".avro"); file.delete(); try (FileAppender<GenericData.Record> appender = Avro.write(Files.localOutput(file)) .schema(writeSchema) .build()) { appender.add(record); } Iterable<GenericData.Record> records = Avro.read(Files.localInput(file)) .project(readSchema) .build(); return Iterables.getOnlyElement(records); } }
private InputFile writeFile(String location, String filename, List<Record> records) throws IOException { Path path = new Path(location, filename); FileFormat format = FileFormat.fromFileName(filename); Preconditions.checkNotNull(format, "Cannot determine format for file: %s", filename); switch (format) { case AVRO: try (FileAppender<Record> appender = Avro.write(fromPath(path, CONF)) .schema(SCHEMA) .createWriterFunc(DataWriter::create) .named(format.name()) .build()) { appender.addAll(records); } return HadoopInputFile.fromPath(path, CONF); case PARQUET: try (FileAppender<Record> appender = Parquet.write(fromPath(path, CONF)) .schema(SCHEMA) .createWriterFunc(GenericParquetWriter::buildWriter) .build()) { appender.addAll(records); } return HadoopInputFile.fromPath(path, CONF); default: throw new UnsupportedOperationException("Cannot write format: " + format); } }
protected void writeAndValidate(Schema schema) throws IOException { List<Record> expected = RandomAvroData.generate(schema, 100, 0L); File testFile = temp.newFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)) .schema(schema) .named("test") .build()) { for (Record rec : expected) { writer.add(rec); } } List<Record> rows; try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile)) .project(schema) .build()) { rows = Lists.newArrayList(reader); } for (int i = 0; i < expected.size(); i += 1) { AvroTestHelpers.assertEquals(schema.asStruct(), expected.get(i), rows.get(i)); } } }