@Override public void addAll(Iterator<ManifestFile> values) { writer.addAll(values); }
@Override public void addAll(Iterable<ManifestFile> values) { writer.addAll(values); }
default void addAll(Iterable<D> values) { addAll(values.iterator()); }
private File writeTestData(Schema schema, int n, int seed) throws IOException { File testFile = temp.newFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)) .schema(schema) .build()) { writer.addAll(RandomData.generate(schema, n, seed)); } return testFile; } }
.schema(tableSchema) .build()) { writer.addAll(expected);
private InputFile writeFile(String location, String filename, List<Record> records) throws IOException { Path path = new Path(location, filename); FileFormat format = FileFormat.fromFileName(filename); Preconditions.checkNotNull(format, "Cannot determine format for file: %s", filename); switch (format) { case AVRO: try (FileAppender<Record> appender = Avro.write(fromPath(path, CONF)) .schema(SCHEMA) .createWriterFunc(DataWriter::create) .named(format.name()) .build()) { appender.addAll(records); } return HadoopInputFile.fromPath(path, CONF); case PARQUET: try (FileAppender<Record> appender = Parquet.write(fromPath(path, CONF)) .schema(SCHEMA) .createWriterFunc(GenericParquetWriter::buildWriter) .build()) { appender.addAll(records); } return HadoopInputFile.fromPath(path, CONF); default: throw new UnsupportedOperationException("Cannot write format: " + format); } }
protected void writeAndValidate(Schema schema) throws IOException { List<Record> expected = RandomGenericData.generate(schema, 100, 0L); File testFile = temp.newFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender<Record> appender = Parquet.write(Files.localOutput(testFile)) .schema(schema) .createWriterFunc(GenericParquetWriter::buildWriter) .build()) { appender.addAll(expected); } List<Record> rows; try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)) .project(schema) .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)) .build()) { rows = Lists.newArrayList(reader); } for (int i = 0; i < expected.size(); i += 1) { DataTestHelpers.assertEquals(schema.asStruct(), expected.get(i), rows.get(i)); } } }
.schema(tableSchema) .build()) { writer.addAll(expected);
protected void writeAndValidate(Schema schema) throws IOException { Assume.assumeTrue("Parquet Avro cannot write non-string map keys", null == TypeUtil.find(schema, type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())); List<GenericData.Record> expected = RandomData.generateList(schema, 100, 0L); File testFile = temp.newFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender<GenericData.Record> writer = Parquet.write(Files.localOutput(testFile)) .schema(schema) .named("test") .build()) { writer.addAll(expected); } try (CloseableIterable<InternalRow> reader = Parquet.read(Files.localInput(testFile)) .project(schema) .createReaderFunc(type -> SparkParquetReaders.buildReader(schema, type)) .build()) { Iterator<InternalRow> rows = reader.iterator(); for (int i = 0; i < expected.size(); i += 1) { Assert.assertTrue("Should have expected number of rows", rows.hasNext()); assertEqualsUnsafe(schema.asStruct(), expected.get(i), rows.next()); } Assert.assertFalse("Should not have extra rows", rows.hasNext()); } } }