@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = new Schema.Parser().parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
for (Schema.Field field : schema.getFields()) { Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d+1) : GenericData.get().getDefaultValue(field); record.put(field.name(), value); new GenericData.Array(length<=0?0:length, schema); for (int i = 0; i < length; i++) array.add(generate(schema.getElementType(), random, d+1)); return array; case MAP: Map<Object,Object> map = new HashMap<>(length <= 0 ? 0 : length); for (int i = 0; i < length; i++) { map.put(randomString(random, 40), generate(schema.getValueType(), random, d+1)); return generate(types.get(random.nextInt(types.size())), random, d); case FIXED: byte[] bytes = new byte[schema.getFixedSize()]; random.nextBytes(bytes); return new GenericData.Fixed(schema, bytes); case STRING: return randomString(random, 40); case BYTES: return randomBytes(random, 40); case INT: return random.nextInt(); case LONG: return random.nextLong();
public static void main(String[] args) throws Exception { if (args.length < 3 || args.length > 4) { System.out.println("Usage: RandomData <schemafile> <outputfile> <count> [codec]"); System.exit(-1); } Schema sch = new Schema.Parser().parse(new File(args[0])); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); writer.create(sch, new File(args[1])); try { for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } } finally { writer.close(); } } }
private void check(String... extraArgs) throws Exception { ArrayList<String> args = new ArrayList<>(); args.addAll(Arrays.asList(new String[] { OUT_FILE.toString(), "--count", COUNT, "--schema-file", SCHEMA_FILE.toString(), "--seed", Long.toString(SEED) })); args.addAll(Arrays.asList(extraArgs)); run(args); DataFileReader<Object> reader = new DataFileReader<Object>(OUT_FILE, new GenericDatumReader<>()); Iterator<Object> found = reader.iterator(); for (Object expected : new RandomData(schemaParser.parse(SCHEMA_FILE), Integer.parseInt(COUNT), SEED)) assertEquals(expected, found.next()); reader.close(); }
@Test public void testStdOut() throws Exception { TestUtil.resetRandomSeed(); run(Arrays.asList(new String[] { "-", "--count", COUNT, "--schema-file", SCHEMA_FILE.toString(), "--seed", Long.toString(SEED) })); byte[] file = out.toByteArray(); DataFileStream<Object> reader = new DataFileStream<Object>(new ByteArrayInputStream(file), new GenericDatumReader<>()); Iterator<Object> found = reader.iterator(); for (Object expected : new RandomData(schemaParser.parse(SCHEMA_FILE), Integer.parseInt(COUNT), SEED)) assertEquals(expected, found.next()); reader.close(); } }
for (Object datum : new RandomData(sch, numObjects)) { dout.write(datum, bufOut); blockCount++;
@BeforeClass public static void generateData() throws IOException { seed = (int)System.currentTimeMillis(); // note some tests (testSkipping) rely on this explicitly String jsonSchema = "{\"type\": \"record\", \"name\": \"Test\", \"fields\": [" +"{\"name\":\"intField\", \"type\":\"int\"}," +"{\"name\":\"bytesField\", \"type\":\"bytes\"}," +"{\"name\":\"booleanField\", \"type\":\"boolean\"}," +"{\"name\":\"stringField\", \"type\":\"string\"}," +"{\"name\":\"floatField\", \"type\":\"float\"}," +"{\"name\":\"doubleField\", \"type\":\"double\"}," +"{\"name\":\"arrayField\", \"type\": " + "{\"type\":\"array\", \"items\":\"boolean\"}}," +"{\"name\":\"longField\", \"type\":\"long\"}]}"; schema = Schema.parse(jsonSchema); GenericDatumWriter<Object> writer = new GenericDatumWriter<>(); writer.setSchema(schema); ByteArrayOutputStream baos = new ByteArrayOutputStream(8192); BinaryEncoder encoder = e_factory.binaryEncoder(baos, null); for (Object datum : new RandomData(schema, count, seed)) { writer.write(datum, encoder); records.add(datum); } encoder.flush(); data = baos.toByteArray(); }
@Test public void test() throws Exception { Schema schema = new Schema.Parser().parse(SCHEMA_FILE); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.create(schema, Util.createFromFS(AVRO_FILE.toString())); for (Object datum : new RandomData(schema, COUNT, SEED)) writer.append(datum); writer.close(); run(AVRO_FILE.toString(), TREVNI_FILE.toString()); AvroColumnReader<Object> reader = new AvroColumnReader<>(new AvroColumnReader.Params(TREVNI_FILE)); Iterator<Object> found = reader.iterator(); for (Object expected : new RandomData(schema, COUNT, SEED)) assertEquals(expected, found.next()); reader.close(); }
.appendTo(file); try { for (Object datum : new RandomData(SCHEMA, COUNT, SEED+1)) { writer.append(datum); Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED+1)) { datum = reader.next(datum); assertEquals(expected, datum);
private void checkWrite(Schema schema) throws IOException { AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); int count = 0; for (Object datum : new RandomData(schema, COUNT, SEED)) { //System.out.println("datum="+datum); writer.write(datum); } writer.writeTo(FILE); }
public void testGenericRead() throws IOException { DataFileReader<Object> reader = new DataFileReader<>(makeFile(), new GenericDatumReader<>()); try { Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED)) { datum = reader.next(datum); assertEquals(expected, datum); } } else { for (int i = 0; i < COUNT; i++) { datum = reader.next(datum); } } } finally { reader.close(); } }
RandomData rd = seed == null ? new RandomData(schema, countValue) : new RandomData(schema, countValue, seed); for (Object datum : rd) writer.append(datum);
int flushCounter = 0; try { for (Object datum : new RandomData(SCHEMA, COUNT, SEED+1)) { currentCount++; writer.append(datum);
checkProp(schema); Object reuse = null; for (Object datum : new RandomData(schema, COUNT, true)) {
@Test public void testNoWritingAfterException() throws IOException { DataFileWriter<Object> writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>()); try { writer.create(SCHEMA, new FailingOutputStream(100000)); int recordCnt = 0; for (Object datum : new RandomData(SCHEMA, 100000, 42)) { writer.append(datum); if (++recordCnt % 17 == 0) { writer.flush(); } } } catch (IOException e) { return; } finally { writer.close(); } fail("IOException should have been thrown"); }
for (Object datum : new RandomData(SCHEMA, COUNT, SEED + 1)) { currentCount++; writer.append(datum);
private void checkRead(Schema schema) throws IOException { AvroColumnReader<Object> reader = new AvroColumnReader<>(new AvroColumnReader.Params(FILE) .setSchema(schema)); for (Object expected : new RandomData(schema, COUNT, SEED)) assertEquals(expected, reader.next()); reader.close(); }
try { int count = 0; for (Object datum : new RandomData(SCHEMA, COUNT, SEED)) { writer.append(datum); if (++count%(COUNT/3) == 0)
for (int k = 0; k < 5; k++) { int syncInterval = 460 +k; RandomData data1 = new RandomData(SCHEMA, COUNT, SEED); RandomData data2 = new RandomData(SCHEMA, COUNT, SEED+1); File file1 = makeFile((codec == null ? "null" : codec.toString()) + "-A"); File file2 = makeFile((codec2 == null ? "null" : codec2.toString()) + "-B");