static ColumnFileMetaData filterMetadata(final JobConf job) { final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Map.Entry<String,String> e : job) if (e.getKey().startsWith(META_PREFIX)) meta.put(e.getKey().substring(META_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8)); return meta; }
static ColumnFileMetaData read(InputBuffer in) throws IOException { ColumnFileMetaData result = new ColumnFileMetaData(); MetaData.read(in, result); return result; }
static ColumnFileMetaData filterMetadata(final Configuration configuration) { final ColumnFileMetaData meta = new ColumnFileMetaData(); Iterator<Entry<String, String>> keyIterator = configuration.iterator(); while (keyIterator.hasNext()) { Entry<String, String> confEntry = keyIterator.next(); if (confEntry.getKey().startsWith(META_PREFIX)) meta.put(confEntry.getKey().substring(META_PREFIX.length()), confEntry .getValue().getBytes(MetaData.UTF8)); } return meta; } }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = new Schema.Parser().parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
private void checkWrite(Schema schema) throws IOException { AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); int count = 0; for (Object datum : new RandomData(schema, COUNT, SEED)) { //System.out.println("datum="+datum); writer.write(datum); } writer.writeTo(FILE); }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec(codec) .setChecksum(checksum); }
AvroColumnWriter<Object> writer = new AvroColumnWriter<>(reader.getSchema(), new ColumnFileMetaData() .setCodec(codec.value(opts))); for (Object datum : reader)
private void runCase(File dir) throws Exception { Schema schema = Schema.parse(new File(dir, "input.avsc")); List<Object> data = fromJson(schema, new File(dir, "input.json")); // write full data AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : data) writer.write(datum); writer.writeTo(FILE); // test that the full schema reads correctly checkRead(schema, data); // test that sub-schemas read correctly for (File f : dir.listFiles()) if (f.isDirectory() && !f.getName().startsWith(".")) { Schema s = Schema.parse(new File(f, "sub.avsc")); checkRead(s, fromJson(s, new File(f, "sub.json"))); } }
@Test public void testTrevniEvolvedRead() throws IOException { AvroColumnWriter<GenericRecord> acw = new AvroColumnWriter<>(writer, new ColumnFileMetaData()); acw.write(writtenRecord); File serializedTrevni = File.createTempFile("trevni", null); acw.writeTo(serializedTrevni); AvroColumnReader.Params params = new Params(serializedTrevni); params.setSchema(evolved); AvroColumnReader<GenericRecord> acr = new AvroColumnReader<>(params); GenericRecord readRecord = acr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(acr.hasNext()); }
static ColumnFileMetaData read(InputBuffer in) throws IOException { ColumnFileMetaData result = new ColumnFileMetaData(); MetaData.read(in, result); return result; }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = Schema.parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec("null") .setChecksum("null"); }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = Schema.parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
: AvroJob.getOutputSchema(job); final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Map.Entry<String,String> e : job) if (e.getKey().startsWith(META_PREFIX))
private void checkWrite(Schema schema) throws IOException { AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); int count = 0; for (Object datum : new RandomData(schema, COUNT)) { //System.out.println("datum="+datum); writer.write(datum); } writer.writeTo(FILE); }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec(codec) .setChecksum(checksum); }
AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(reader.getSchema(), new ColumnFileMetaData() .setCodec(codec.value(opts))); for (Object datum : reader)
private void runCase(File dir) throws Exception { Schema schema = Schema.parse(new File(dir, "input.avsc")); List<Object> data = fromJson(schema, new File(dir, "input.json")); // write full data AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : data) writer.write(datum); writer.writeTo(FILE); // test that the full schema reads correctly checkRead(schema, data); // test that sub-schemas read correctly for (File f : dir.listFiles()) if (f.isDirectory()) { Schema s = Schema.parse(new File(f, "sub.avsc")); checkRead(s, fromJson(s, new File(f, "sub.json"))); } }