/** * Returns the correct metadata path for the given dataset. * @param root A Path * @param name A String dataset name * @return the metadata Path */ private static Path pathForMetadata(Path root, String namespace, String name) { return new Path( FileSystemDatasetRepository.pathForDataset(root, namespace, name), METADATA_DIRECTORY); }
@Override @SuppressWarnings("deprecation") public DatasetRepository newRepo(MetadataProvider provider) { // this purposely does not set the Configuration to test that the code // relies on filesystem URIs set in the DatasetDescriptor. return new FileSystemDatasetRepository( conf, testDirectory, provider); }
@Override public boolean delete(String namespace, String name) { return deleteWithTrash(namespace, name, false); }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericRecord.class); }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<GenericRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(DatasetTestUtilities.OLD_VALUE_SCHEMA) .format(Formats.PARQUET) .build(), GenericRecord.class); DatasetWriter<GenericRecord> writer = writerDataset.newWriter(); GenericRecord record = new GenericData.Record(DatasetTestUtilities.OLD_VALUE_SCHEMA); for (long i = 0; i < totalRecords; i++) { record.put("value", Long.valueOf(i)); writer.write(record); } writer.close(); repo.update("ns", "test", new DatasetDescriptor.Builder(writerDataset.getDescriptor()) .schema(Value.class).build()); readerDataset = repo.load("ns", "test", GenericRecord.class); }
Path suggestedLocation = pathForDataset(namespace, name); .descriptor(newDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(newDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build();
@Before public void setUp() throws IOException { this.conf = new Configuration(); this.fileSystem = FileSystem.get(conf); this.testDirectory = new Path(Files.createTempDir().getAbsolutePath()); this.repo = new FileSystemDatasetRepository(conf, testDirectory); PartitionStrategy partitionStrategy = new PartitionStrategy.Builder() .hash("username", 2).build(); FileSystemDataset<Object> users = (FileSystemDataset<Object>) repo.create( "ns", "users", new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .partitionStrategy(partitionStrategy) .build()); writer = PartitionedDatasetWriter.newWriter( new FileSystemView<Object>(users, null, null, Object.class)); }
@Override public <E> Dataset<E> load(String namespace, String name, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); LOG.debug("Loading dataset: {}", name); DatasetDescriptor descriptor = metadataProvider.load(namespace, name); FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(descriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); LOG.debug("Loaded dataset:{}", ds); return ds; }
@Test public void testLocalAbsolute() throws URISyntaxException { URI repositoryUri = new URI("repo:file:///tmp/dsr-repo-test"); DatasetRepository repository = DatasetRepositories.repositoryFor(repositoryUri); FileSystemMetadataProvider provider = (FileSystemMetadataProvider) ((FileSystemDatasetRepository) repository).getMetadataProvider(); Assert.assertEquals("Root directory should be the correct qualified path", new Path("file:/tmp/dsr-repo-test"), provider.getRootDirectory()); Assert.assertEquals("Repository URI", repositoryUri, repository.getUri()); }
@Override public boolean delete(String namespace, String name) { try { if (isManaged(namespace, name)) { // avoids calling fsRepository.delete, which deletes the data path return getMetadataProvider().delete(namespace, name); } return super.delete(namespace, name); } catch (DatasetNotFoundException e) { return false; } }
FileSystem dataFS = fsForPath(dataLocation, conf);
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", TestGenericRecord.class); }
@Before public void setUp() throws IOException { this.conf = new Configuration(); this.fileSystem = FileSystem.get(conf); this.testDirectory = new Path(Files.createTempDir().getAbsolutePath()); this.repo = new FileSystemDatasetRepository(conf, testDirectory, new EnusrePartitionPathDoesNotExistMetadataProvider(conf, testDirectory)); partitionStrategy = new PartitionStrategy.Builder() .hash("username", 2).build(); FileSystemDataset<Object> users = (FileSystemDataset<Object>) repo.create( "ns", "users", new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .partitionStrategy(partitionStrategy) .build()); view = new FileSystemView<Object>(users, null, null, Object.class); }
@Override public <E> Dataset<E> update(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); DatasetDescriptor oldDescriptor = metadataProvider.load(namespace, name); // oldDescriptor is valid if load didn't throw NoSuchDatasetException Compatibility.checkUpdate(oldDescriptor, descriptor); DatasetDescriptor updatedDescriptor = metadataProvider.update(namespace, name, descriptor); LOG.debug("Updated dataset: {} schema: {} location: {}", new Object[] { name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation() }); return new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(updatedDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(updatedDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
@Test public void testLocalRelative() throws URISyntaxException { URI repositoryUri = new URI("repo:file:target/dsr-repo-test"); DatasetRepository repository = DatasetRepositories.repositoryFor(repositoryUri); // We only do the deeper implementation checks one per combination. Assert.assertNotNull("Received a repository", repository); Assert.assertTrue("Repo is a FileSystem repo", repository instanceof FileSystemDatasetRepository); MetadataProvider provider = ((FileSystemDatasetRepository) repository) .getMetadataProvider(); Assert.assertTrue("Repo is using a FileSystemMetadataProvider", provider instanceof FileSystemMetadataProvider); FileSystemMetadataProvider fsProvider = (FileSystemMetadataProvider) provider; Assert.assertTrue("FileSystem is a LocalFileSystem", fsProvider.getFileSytem() instanceof LocalFileSystem); Path expected = fsProvider.getFileSytem().makeQualified( new Path("target/dsr-repo-test")); Assert.assertEquals("Root directory should be the correct qualified path", expected, fsProvider.getRootDirectory()); Assert.assertEquals("Repository URI scheme", "repo", repository.getUri() .getScheme()); Assert.assertEquals("Repository URI scheme", expected.toUri(), new URI(repository.getUri().getSchemeSpecificPart())); }
@Override public boolean delete(String namespace, String name) { try { if (isManaged(namespace, name)) { // avoids calling fsRepository.delete, which deletes the data path return getMetadataProvider().delete(namespace, name); } return super.delete(namespace, name); } catch (DatasetNotFoundException e) { return false; } }
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<StandardEvent> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = writerDataset.newWriter(); for (long i = 0; i < totalRecords; i++) { String text = String.valueOf(i); writer.write(new StandardEvent(text, text, i, text, text, i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericData.Record.class); }
private Path expectedPathForDataset(String namespace, String name) { return rootFileSystem.makeQualified( FileSystemDatasetRepository.pathForDataset(rootDirectory, namespace, name)); }
/** * Build an instance of the configured {@link FileSystemDatasetRepository}. * * @since 0.9.0 */ public FileSystemDatasetRepository build() { if (configuration == null) { this.configuration = new Configuration(); } Preconditions.checkState(this.rootDirectory != null, "No root directory defined"); // the rootDirectory can have a scheme/authority that overrides if (fileSystem != null) { // if the FS doesn't match, this will throw IllegalArgumentException this.rootDirectory = fileSystem.makeQualified(rootDirectory); } return new FileSystemDatasetRepository(configuration, rootDirectory); } }
@Test public void testHdfsAbsolute() throws URISyntaxException { URI hdfsUri = getDFS().getUri(); URI repositoryUri = new URI("repo:hdfs://" + hdfsUri.getAuthority() + "/tmp/dsr-repo-test"); DatasetRepository repository = DatasetRepositories.repositoryFor(repositoryUri); // We only do the deeper implementation checks one per combination. Assert.assertNotNull("Received a repository", repository); Assert.assertTrue("Repo is a FileSystem repo", repository instanceof FileSystemDatasetRepository); MetadataProvider provider = ((FileSystemDatasetRepository) repository) .getMetadataProvider(); Assert.assertTrue("Repo is using a FileSystemMetadataProvider", provider instanceof FileSystemMetadataProvider); FileSystemMetadataProvider fsProvider = (FileSystemMetadataProvider) provider; Assert.assertTrue("FileSystem is a DistributedFileSystem", fsProvider.getFileSytem() instanceof DistributedFileSystem); Path expected = fsProvider.getFileSytem().makeQualified( new Path("/tmp/dsr-repo-test")); Assert.assertEquals("Root directory should be the correct qualified path", expected, fsProvider.getRootDirectory()); Assert.assertEquals("Repository URI", repositoryUri, repository.getUri()); }