@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = new Schema.Parser().parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.write(wrapper.datum()); if (writer.sizeEstimate() >= blockSize) // block full flush(); } public void close(Reporter reporter) throws IOException {
private int write(Object o, Schema s, int column) throws IOException { if (isSimple(s)) { writeValue(o, s, column); return column+1; writer.writeValue(null, column); writer.writeValue(e.getKey(), column+1); int c = write(e.getValue(), s.getValueType(), column+2); assert(c == column+arrayWidths[column]); case RECORD: for (Field f : s.getFields()) column = write(model.getField(o,f.name(),f.pos()), f.schema(), column); return column; case ARRAY: if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writeValue(element, s.getElementType(), column); return column+1; int c = write(element, s.getElementType(), column+1); assert(c == column+arrayWidths[column]); writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else { writer.writeValue(null, column); column = write(o, branch, column+1);
private void flush() throws IOException { OutputStream out = fs.create(new Path(dir, "part-"+(part++)+EXT)); try { writer.writeTo(out); } finally { out.close(); } writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); }
/** * Constructor. * @param context The TaskAttempContext to supply the writer with information form the job configuration */ public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException { schema = initSchema(context); meta = filterMetadata(context.getConfiguration()); writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); Path outputPath = FileOutputFormat.getOutputPath(context); String dir = FileOutputFormat.getUniqueFile(context, "part", ""); dirPath = new Path(outputPath.toString() + "/" + dir); fs = dirPath.getFileSystem(context.getConfiguration()); fs.mkdirs(dirPath); blockSize = fs.getDefaultBlockSize(); }
/** Add a row to the file. */ public void write(D value) throws IOException { writer.startRow(); int count = write(value, schema, 0); assert(count == writer.getColumnCount()); writer.endRow(); }
/** * A Trevni flush will close the current file and prep a new writer * @throws IOException */ public void flush() throws IOException { OutputStream out = fs.create(new Path(dirPath, "part-" + (part++) + EXT)); try { writer.writeTo(out); } finally { out.close(); } writer = new AvroColumnWriter<>(schema, meta, ReflectData.get()); }
new AvroColumnWriter<>(schema, meta, ReflectData.get());
/** Add a row to the file. */ public void write(D value) throws IOException { writer.startRow(); int count = write(value, schema, 0); assert(count == writer.getColumnCount()); writer.endRow(); }
private void checkWrite(Schema schema) throws IOException { AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); int count = 0; for (Object datum : new RandomData(schema, COUNT, SEED)) { //System.out.println("datum="+datum); writer.write(datum); } writer.writeTo(FILE); }
private void flush() throws IOException { OutputStream out = fs.create(new Path(dir, "part-"+(part++)+EXT)); try { writer.writeTo(out); } finally { out.close(); } writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get()); }
/** {@inheritDoc} */ @Override public void write(AvroKey<T> key, NullWritable value) throws IOException, InterruptedException { writer.write(key.datum()); if (writer.sizeEstimate() >= blockSize) // block full flush(); }
private int write(Object o, Schema s, int column) throws IOException { if (isSimple(s)) { writeValue(o, s, column); return column+1; writer.writeValue(null, column); writer.writeValue(e.getKey(), column+1); int c = write(e.getValue(), s.getValueType(), column+2); assert(c == column+arrayWidths[column]); case RECORD: for (Field f : s.getFields()) column = write(model.getField(o,f.name(),f.pos()), f.schema(), column); return column; case ARRAY: if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writeValue(element, s.getElementType(), column); return column+1; int c = write(element, s.getElementType(), column+1); assert(c == column+arrayWidths[column]); writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else { writer.writeValue(null, column); column = write(o, branch, column+1);
new AvroColumnWriter<T>(schema, meta, ReflectData.get());
OutputStream outs = Util.fileOrStdout(args.get(1), out); AvroColumnWriter<Object> writer = new AvroColumnWriter<>(reader.getSchema(), new ColumnFileMetaData() .setCodec(codec.value(opts))); for (Object datum : reader) writer.write(datum); writer.writeTo(outs); outs.close(); reader.close();
/** {@inheritDoc} */ @Override public void write(AvroKey<K> key, AvroValue<V> value) throws IOException, InterruptedException { keyValueRecord.setKey(key.datum()); keyValueRecord.setValue(value.datum()); writer.write(keyValueRecord.get()); if (writer.sizeEstimate() >= blockSize) // block full flush(); }
private void runCase(File dir) throws Exception { Schema schema = Schema.parse(new File(dir, "input.avsc")); List<Object> data = fromJson(schema, new File(dir, "input.json")); // write full data AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : data) writer.write(datum); writer.writeTo(FILE); // test that the full schema reads correctly checkRead(schema, data); // test that sub-schemas read correctly for (File f : dir.listFiles()) if (f.isDirectory() && !f.getName().startsWith(".")) { Schema s = Schema.parse(new File(f, "sub.avsc")); checkRead(s, fromJson(s, new File(f, "sub.json"))); } }
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.write(wrapper.datum()); if (writer.sizeEstimate() >= blockSize) // block full flush(); } public void close(Reporter reporter) throws IOException {
@Test public void testTrevniEvolvedRead() throws IOException { AvroColumnWriter<GenericRecord> acw = new AvroColumnWriter<>(writer, new ColumnFileMetaData()); acw.write(writtenRecord); File serializedTrevni = File.createTempFile("trevni", null); acw.writeTo(serializedTrevni); AvroColumnReader.Params params = new Params(serializedTrevni); params.setSchema(evolved); AvroColumnReader<GenericRecord> acr = new AvroColumnReader<>(params); GenericRecord readRecord = acr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(acr.hasNext()); }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = Schema.parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }