protected static PathFilter hidden() { final PathFilter notHidden = PathFilters.notHidden(); return new PathFilter() { @Override public boolean accept(Path path) { return !notHidden.accept(path); } }; } }
/** * Returns whether the given {@code Path} contains directories with * {@code Dataset} metadata. * * @param dir a Path to check * @return {@code true} if there is a direct sub-directory with metadata * @throws IOException */ @SuppressWarnings("deprecation") private boolean isNamespace(Path dir) throws IOException { FileStatus[] stats = rootFileSystem.listStatus(dir, PathFilters.notHidden()); for (FileStatus stat : stats) { if (stat.isDir() && isDataset(stat.getPath())) { return true; } } return false; }
@SuppressWarnings("deprecation") @Override public Set<String> namespaces() { Set<String> namespaces = Sets.newHashSet(); try { FileStatus[] entries = rootFileSystem.listStatus(rootDirectory, PathFilters.notHidden()); for (FileStatus entry : entries) { if (entry.isDir()) { // may want to add a check: !RESERVED_NAMES.contains(name) if (isNamespace(entry.getPath())) { namespaces.add(entry.getPath().getName()); } else if (isDataset(entry.getPath())) { // add the default namespace for datasets with no namespace namespaces.add(DEFAULT_NAMESPACE); } } } } catch (FileNotFoundException ex) { // the repo hasn't created any files yet return namespaces; } catch (IOException ex) { throw new DatasetIOException("Could not list namespaces", ex); } return namespaces; }
stats = fs.listStatus(root, PathFilters.notHidden()); } catch (IOException ex) { throw new DatasetIOException("Cannot list files in " + root, ex); try { stats = fs.listStatus( new Path(root, key.getPath()), PathFilters.notHidden()); } catch (IOException ex) { throw new DatasetIOException("Cannot list files in " + key.getPath(), ex);
for (FileStatus stat : fs.listStatus(dir, PathFilters.notHidden())) { if (stat.isDir()) {
FileStatus[] directEntries = rootFileSystem.listStatus( rootDirectory, PathFilters.notHidden()); for (FileStatus entry : directEntries) { if (entry.isDir() && isDataset(entry.getPath())) { FileStatus[] entries = rootFileSystem.listStatus( new Path(rootDirectory, namespace), PathFilters.notHidden()); for (FileStatus entry : entries) { if (entry.isDir() && isDataset(entry.getPath())) {
private static <T> T visit(PathVisitor<T> visitor, FileSystem fs, Path path, List<Path> followedLinks) throws IOException { if (fs.isFile(path)) { return visitor.file(fs, path); } else if (IS_SYMLINK != null && IS_SYMLINK.<Boolean>invoke(fs.getFileStatus(path))) { Preconditions.checkArgument(!followedLinks.contains(path), "Encountered recursive path structure at link: " + path); followedLinks.add(path); // no need to remove return visit(visitor, fs, fs.getLinkTarget(path), followedLinks); } List<T> children = Lists.newArrayList(); FileStatus[] statuses = fs.listStatus(path, PathFilters.notHidden()); for (FileStatus stat : statuses) { children.add(visit(visitor, fs, stat.getPath())); } return visitor.directory(fs, path, children); }
@Test public void testDiscardEmptyFiles() throws IOException { init(fsWriter); fsWriter.close(); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should not contain any hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should not contain any visible files", 0, stats.length); }
FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain no visible files", 0, stats.length); stats = fs.listStatus(testDirectory); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should contain no hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible file", 1, stats.length); FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain no visible files", 0, stats.length); stats = fs.listStatus(testDirectory); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should contain no hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible file", 1, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible data file", 1, stats.length);
FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain no visible files", 0, stats.length); stats = fs.listStatus(testDirectory); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should contain no hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible file", 1, stats.length); rolledFilePath = stats[0].getPath(); FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a rolled file", 1, stats.length); 2, stats.length); } else { FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a new file and a rolled file", 2, stats.length); FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain two visible data files", 2, stats.length);
PathFilters.notHidden()); } catch (IOException e) { throw new DatasetIOException("Unable to list partition directory for directory " + directory, e);
@Test public void testDiscardErrorFiles() throws IOException { init(fsWriter); for (long i = 0; i < 10000; i += 1) { fsWriter.write(record(i, "test-" + i)); } // put the writer into an error state, simulating either: // 1. A failed record with an IOException or unknown RuntimeException // 2. A failed flush or sync for IncrementableWriters fsWriter.state = ReaderWriterState.ERROR; fsWriter.close(); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should not contain any hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should not contain any visible files", 0, stats.length); }
FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain no visible files", 0, stats.length); stats = fs.listStatus(testDirectory); FileStatus[] stats = fs.listStatus(testDirectory, hidden()); Assert.assertEquals("Should contain no hidden files", 0, stats.length); stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible file", 1, stats.length); rolledFilePath = stats[0].getPath(); FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a rolled file", 1, stats.length); 2, stats.length); } else { FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a new file and a rolled file", 2, stats.length); FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain two visible data files", 2, stats.length);
FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should contain a visible data file", 1, stats.length);
@Test public void testWriteWithOldSchema() throws IOException { Schema writerSchema = SchemaBuilder.record("Message").fields() .requiredLong("id") .requiredString("message") .endRecord(); fsWriter = newWriter(testDirectory, TEST_SCHEMA, writerSchema); init(fsWriter); for (long i = 0; i < 1000; i += 1) { GenericRecordBuilder recordBuilder = new GenericRecordBuilder(TEST_SCHEMA) .set("id", i).set("message","test-"+ i); fsWriter.write(recordBuilder.build()); } fsWriter.close(); final FileStatus[] stats = fs.listStatus(testDirectory, PathFilters.notHidden()); Assert.assertEquals("Should match with writer schema", writerSchema, FileSystemUtil.schema("record", fs, stats[0].getPath())); }