@Override protected DatasetWriter<T> createWriter() { if (Formats.AVRO.getName().equals(getDatasetDefinition().getFormat().getName())) { Dataset<T> dataset = DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(), getDatasetDefinition(), getEntityClass(), getEntityClass()); return dataset.newWriter(); } else { throw new StoreException("Invalid format " + getDatasetDefinition().getFormat() + " specified, you must use 'avro' with " + this.getClass().getSimpleName() + "."); } }
@Override protected DatasetWriter<GenericRecord> createWriter() { if (Formats.PARQUET.getName().equals(getDatasetDefinition().getFormat().getName())) { Dataset<GenericRecord> dataset = DatasetUtils.getOrCreateDataset(getDatasetRepositoryFactory(), getDatasetDefinition(), getEntityClass(), GenericRecord.class); schema = dataset.getDescriptor().getSchema(); return dataset.newWriter(); } else { throw new StoreException("Invalid format " + getDatasetDefinition().getFormat() + " specified, you must use 'parquet' with " + this.getClass().getSimpleName() + "."); } }
private void populateOutputDataset() { DatasetWriter<GenericData.Record> writer = outputDataset.newWriter(); writer.write(newStatsRecord(4, "date")); writer.close(); }
writer = staging.newWriter();
@Override public int run(String[] args) throws Exception { // Create a dataset of users with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .build(); Dataset<Record> users = Datasets.create("dataset:hive?dataset=users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
@Override public int run(String[] args) throws Exception { // Create a dataset of users with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
@Override public int run(String[] args) throws Exception { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .format(Formats.PARQUET) .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericRecord.class); }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<StandardEvent> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = writerDataset.newWriter(); for (long i = 0; i < totalRecords; i++) { String text = String.valueOf(i); writer.write(new StandardEvent(text, text, i, text, text, i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericData.Record.class); }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", TestGenericRecord.class); }
writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<StandardEvent> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .format(Formats.PARQUET) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = writerDataset.newWriter(); for (long i = 0; i < totalRecords; i++) { String text = String.valueOf(i); writer.write(new StandardEvent(text, text, i, text, text, i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericRecord.class); }
@Override public int run(String[] args) throws Exception { // Create a dataset of products with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(Product.class) .build(); Dataset<Product> products = Datasets.create( "dataset:hdfs:/tmp/data/products", descriptor, Product.class); // Get a writer for the dataset and write some products to it DatasetWriter<Product> writer = null; try { writer = products.newWriter(); int i = 0; for (String name : names) { Product product = new Product(); product.setName(name); product.setId(i++); writer.write(product); } } finally { if (writer != null) { writer.close(); } } return 0; }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<GenericRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(DatasetTestUtilities.OLD_VALUE_SCHEMA) .format(Formats.PARQUET) .build(), GenericRecord.class); DatasetWriter<GenericRecord> writer = writerDataset.newWriter(); GenericRecord record = new GenericData.Record(DatasetTestUtilities.OLD_VALUE_SCHEMA); for (long i = 0; i < totalRecords; i++) { record.put("value", Long.valueOf(i)); writer.write(record); } writer.close(); repo.update("ns", "test", new DatasetDescriptor.Builder(writerDataset.getDescriptor()) .schema(Value.class).build()); readerDataset = repo.load("ns", "test", GenericRecord.class); }
writer = testDataset.newWriter(); writer.write(sepEvent); writer.write(octEvent);
writer = hellos.newWriter();
@Test public void testMixedProjection() throws IOException { Dataset<StandardEvent> original = repo.create("ns", "mixedProjection", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = null; try { writer = original.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
@Test public void testRefineIdentity() throws Exception { PartitionStrategy strategy = new PartitionStrategy.Builder() .identity("user_id") .build(); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:standard_event.avsc") .partitionStrategy(strategy) .build(); // Create a separate dataset to avoid conflicts with the above. Dataset<StandardEvent> identityDataset = repo.create( "ns", "test_identity", descriptor); DatasetWriter<StandardEvent> writer = null; try { writer = identityDataset.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } assertContentEquals(Sets.newHashSet(sepEvent, novEvent), identityDataset.with("user_id", 0L)); } }
private void populateInputDataset() { DatasetWriter<GenericData.Record> writer = inputDataset.newWriter(); writer.write(newStringRecord("apple")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("carrot")); writer.write(newStringRecord("apple")); writer.write(newStringRecord("apple")); writer.close(); }
@Test public void testReflectProjectionLoad() throws IOException { Dataset<ReflectStandardEvent> original = repo.create( "ns", "reflectProjection", new DatasetDescriptor.Builder() .schema(ReflectStandardEvent.class) .build(), ReflectStandardEvent.class); DatasetWriter<ReflectStandardEvent> writer = null; try { writer = original.newWriter(); writer.write(new ReflectStandardEvent(sepEvent)); writer.write(new ReflectStandardEvent(octEvent)); writer.write(new ReflectStandardEvent(novEvent)); } finally { Closeables.close(writer, false); } View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }