private Long generateTestFileId(final FileStatus fs, List<HdfsFileStatusWithId> files, int i) { final Long fileId = HdfsUtils.createTestFileId(fs.getPath().toUri().getPath(), fs, false, null); files.set(i, new HdfsFileStatusWithId() { @Override public FileStatus getFileStatus() { return fs; } @Override public Long getFileId() { return fileId; } }); return fileId; }
public static Object getFileId( FileSystem fileSystem, Path path, boolean allowSynthetic) throws IOException { if (fileSystem instanceof DistributedFileSystem) { return SHIMS.getFileId(fileSystem, path.toUri().getPath()); } if (!allowSynthetic) { LOG.warn("Cannot get unique file ID from " + fileSystem.getClass().getSimpleName() + "; returning null"); return null; } FileStatus fs = fileSystem.getFileStatus(path); return new SyntheticFileId(path, fs.getLen(), fs.getModificationTime()); }
public void run() throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(_segmentPath); FileStatus[] fileStatusArr = fs.globStatus(path); for (FileStatus fileStatus : fileStatusArr) { if (fileStatus.isDirectory()) { pushDir(fs, fileStatus.getPath()); } else { pushOneTarFile(fs, fileStatus.getPath()); } } }
@Override public long getBlockSize() { long blocksize = fileStatus.getBlockSize(); if (blocksize > fileStatus.getLen()) { return fileStatus.getLen(); } return blocksize; }
protected FileStatus swizzleFileStatus(FileStatus orig, boolean isParam) { FileStatus ret = new FileStatus(orig.getLen(), orig.isDir(), orig.getReplication(), orig.getBlockSize(), orig.getModificationTime(), orig.getAccessTime(), orig.getPermission(), orig.getOwner(), orig.getGroup(), isParam ? swizzleParamPath(orig.getPath()) : swizzleReturnPath(orig.getPath())); return ret; }
@Override public UfsFileStatus getFileStatus(String path) throws IOException { Path tPath = new Path(path); FileSystem hdfs = getFs(); FileStatus fs = hdfs.getFileStatus(tPath); String contentHash = UnderFileSystemUtils.approximateContentHash(fs.getLen(), fs.getModificationTime()); return new UfsFileStatus(path, contentHash, fs.getLen(), fs.getModificationTime(), fs.getOwner(), fs.getGroup(), fs.getPermission().toShort()); }
@Test public void testFromInstrumentedScheme() throws Exception { File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); FileSystem fs = FileSystem.get(new URI(InstrumentedLocalFileSystem.SCHEME + ":///"), new Configuration()); Assert.assertTrue(fs instanceof InstrumentedLocalFileSystem); Assert.assertTrue(DecoratorUtils.resolveUnderlyingObject(fs) instanceof LocalFileSystem); Assert.assertEquals(fs.getFileStatus(new Path("/tmp")).getPath(), new Path("instrumented-file:///tmp")); Assert.assertEquals(fs.getUri().getScheme(), "instrumented-file"); Path basePath = new Path(tmpDir.getAbsolutePath()); Assert.assertTrue(fs.exists(basePath)); Path file = new Path(basePath, "file"); Assert.assertFalse(fs.exists(file)); fs.create(new Path(basePath, "file")); Assert.assertTrue(fs.exists(file)); Assert.assertEquals(fs.getFileStatus(file).getLen(), 0); Assert.assertEquals(fs.listStatus(basePath).length, 1); fs.delete(file, false); Assert.assertFalse(fs.exists(file)); }
@Test public void testFromConfigurationOverride() throws Exception { Configuration configuration = new Configuration(); configuration.set("fs.file.impl", InstrumentedLocalFileSystem.class.getName()); FileSystem fs = FileSystem.newInstance(new URI("file:///"), configuration); Assert.assertTrue(fs instanceof InstrumentedLocalFileSystem); Assert.assertTrue(DecoratorUtils.resolveUnderlyingObject(fs) instanceof LocalFileSystem); Assert.assertEquals(fs.getFileStatus(new Path("/tmp")).getPath(), new Path("file:///tmp")); Assert.assertEquals(fs.getUri().getScheme(), "file"); }
public static String[] list(String path) throws IOException { FileSystem fs = getFileSystem(path); Path fsPath = new Path(path); ArrayList<String> paths = new ArrayList<String>(); FileStatus[] statuses = fs.listStatus(fsPath); if (statuses != null) { for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (path.startsWith("s3://") || path.startsWith("s3n://") || path.startsWith("s3a://") || path.startsWith("swift://") || path.startsWith("gs://")) { paths.add(statusPath.toUri().toString()); } else { paths.add(statusPath.toUri().getPath()); } } } return paths.toArray(new String[] {}); }
@Test public void testGetAnyNonHiddenFile() throws IOException { final String file1 = "test1"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path baseDir = new Path(FILE_UTILS_TEST_DIR, "anyFileDir"); try { if (localFs.exists(baseDir)) { localFs.delete(baseDir, true); } localFs.mkdirs(baseDir); Path emptySubDir = new Path(baseDir, "emptySubDir"); localFs.mkdirs(emptySubDir); Path hiddenDir = new Path(baseDir, "_hidden"); localFs.mkdirs(hiddenDir); localFs.create(new Path(hiddenDir, file1)); Path dataDir = new Path(baseDir, "dataDir"); localFs.mkdirs(dataDir); File dataFile = new File(dataDir.toString(), file1); localFs.create(new Path(dataDir, file1)); FileStatus file = FileListUtils.getAnyNonHiddenFile(localFs, baseDir); Assert.assertEquals(file.getPath().toString(), dataFile.toURI().toString()); } finally { localFs.delete(baseDir, true); } }
/** * Uploads a local file to HDFS * This method is not thread safe * * @param source * @param conf * @return * @throws IOException */ public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException { Path localFile = new Path(source.getPath()); Path remoteFile = new Path(SessionState.get().getSparkSession().getHDFSSessionDir(), getFileName(source)); FileSystem fileSystem = FileSystem.get(remoteFile.toUri(), conf); // Overwrite if the remote file already exists. Whether the file can be added // on executor is up to spark, i.e. spark.files.overwrite fileSystem.copyFromLocalFile(false, true, localFile, remoteFile); Path fullPath = fileSystem.getFileStatus(remoteFile).getPath(); return fullPath.toUri(); }
private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes) throws Exception { Path bPath = new Path(file); Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt"); FileSystem fs = bPath.getFileSystem(conf); FileStatus fileStatus = fs.getFileStatus(bPath); int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes; byte[] buffer = new byte[len]; FSDataInputStream fdis = fs.open(bPath); fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length)); fdis.close(); FSDataOutputStream fdos = fs.create(cPath, true); fdos.write(buffer, 0, buffer.length); fdos.close(); fs.delete(bPath, false); fs.rename(cPath, bPath); }
private static class ThreadLocalHive extends ThreadLocal<Hive> { @Override protected Hive initialValue() { return null; } @Override public synchronized void set(Hive hiveObj) { Hive currentHive = this.get(); if (currentHive != hiveObj) { // Remove/close current thread-local Hive object before overwriting with new Hive object. remove(); super.set(hiveObj); } } @Override public synchronized void remove() { Hive currentHive = this.get(); if (currentHive != null) { // Close the metastore connections before removing it from thread local hiveDB. currentHive.close(false); super.remove(); } } }
/** * Set file system based source and destination dataset for this {@link CopyableFile} * * @param originFs {@link FileSystem} where this {@link CopyableFile} origins * @param targetFs {@link FileSystem} where this {@link CopyableFile} is copied to */ public void setFsDatasets(FileSystem originFs, FileSystem targetFs) { /* * By default, the raw Gobblin dataset for CopyableFile lineage is its parent folder * if itself is not a folder */ boolean isDir = origin.isDirectory(); Path fullSourcePath = Path.getPathWithoutSchemeAndAuthority(origin.getPath()); String sourceDatasetName = isDir ? fullSourcePath.toString() : fullSourcePath.getParent().toString(); DatasetDescriptor sourceDataset = new DatasetDescriptor(originFs.getScheme(), sourceDatasetName); sourceDataset.addMetadata(DatasetConstants.FS_URI, originFs.getUri().toString()); sourceData = sourceDataset; Path fullDestinationPath = Path.getPathWithoutSchemeAndAuthority(destination); String destinationDatasetName = isDir ? fullDestinationPath.toString() : fullDestinationPath.getParent().toString(); DatasetDescriptor destinationDataset = new DatasetDescriptor(targetFs.getScheme(), destinationDatasetName); destinationDataset.addMetadata(DatasetConstants.FS_URI, targetFs.getUri().toString()); destinationData = destinationDataset; }
@Override public void commitJob(JobContext context) throws IOException { JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); Path tmpLocation = new Path(conf.get(TMP_LOCATION));//this contains base_xxx or delta_xxx_yyy Path finalLocation = new Path(conf.get(FINAL_LOCATION)); FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString()); FileStatus[] contents = fs.listStatus(tmpLocation);//expect 1 base or delta dir in this list //we have MIN_TXN, MAX_TXN and IS_MAJOR in JobConf so we could figure out exactly what the dir //name is that we want to rename; leave it for another day for (int i = 0; i < contents.length; i++) { Path newPath = new Path(finalLocation, contents[i].getPath().getName()); fs.rename(contents[i].getPath(), newPath); } fs.delete(tmpLocation, true); }
public void testProjectionNoreducer(String inputDirectory) throws Exception { JobConf job = new JobConf(); long onel = 1; Schema readerSchema = Schema.create(Schema.Type.STRING); AvroJob.setInputSchema(job, readerSchema); Path inputPath = new Path(inputDirectory + "/myavro2-m-00000.avro"); FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath); FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job); AvroRecordReader<Utf8> recordReader = new AvroRecordReader<>(job, fileSplit); AvroWrapper<Utf8> inputPair = new AvroWrapper<>(null); NullWritable ignore = NullWritable.get(); while (recordReader.next(inputPair, ignore)) { long testl = Long.parseLong(inputPair.datum().toString().split(":")[2].replace("}", "").trim()); Assert.assertEquals(onel, testl); } } }
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> result = super.listStatus(job); Iterator<FileStatus> it = result.iterator(); while (it.hasNext()) { FileStatus stat = it.next(); if (!stat.isFile() || (stat.getLen() == 0 && !stat.getPath().toUri().getScheme().equals("nullscan"))) { it.remove(); } } return result; } };
private static void split(final Configuration conf, final Path p) throws IOException { FileSystem fs = FSUtils.getWALFileSystem(conf); if (!fs.exists(p)) { throw new FileNotFoundException(p.toString()); } if (!fs.getFileStatus(p).isDirectory()) { throw new IOException(p + " is not a directory"); } final Path baseDir = FSUtils.getWALRootDir(conf); Path archiveDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME); if (conf.getBoolean(AbstractFSWALProvider.SEPARATE_OLDLOGDIR, AbstractFSWALProvider.DEFAULT_SEPARATE_OLDLOGDIR)) { archiveDir = new Path(archiveDir, p.getName()); } WALSplitter.split(baseDir, p, archiveDir, fs, conf, WALFactory.getInstance(conf)); }
/** * Tests that HdfsUtils#setFullFileStatus * does not thrown an exception when setting permissions and without recursion. */ @Test public void testSetFullFileStatusFailInheritPerms() throws IOException { Configuration conf = new Configuration(); conf.set("dfs.namenode.acls.enabled", "false"); HdfsUtils.HadoopFileStatus mockHadoopFileStatus = mock(HdfsUtils.HadoopFileStatus.class); FileStatus mockSourceStatus = mock(FileStatus.class); FileSystem mockFs = mock(FileSystem.class); when(mockSourceStatus.getPermission()).thenReturn(new FsPermission((short) 777)); when(mockHadoopFileStatus.getFileStatus()).thenReturn(mockSourceStatus); doThrow(RuntimeException.class).when(mockFs).setPermission(any(Path.class), any(FsPermission.class)); HdfsUtils.setFullFileStatus(conf, mockHadoopFileStatus, null, mockFs, new Path("fakePath"), false); verify(mockFs).setPermission(any(Path.class), any(FsPermission.class)); }
/** * Tests that link HdfsUtils#setFullFileStatus * does not thrown an exception when setting ACLs and without recursion. */ @Test public void testSetFullFileStatusFailInheritAcls() throws IOException { Configuration conf = new Configuration(); conf.set("dfs.namenode.acls.enabled", "true"); HdfsUtils.HadoopFileStatus mockHadoopFileStatus = mock(HdfsUtils.HadoopFileStatus.class); FileStatus mockSourceStatus = mock(FileStatus.class); AclStatus mockAclStatus = mock(AclStatus.class); FileSystem mockFs = mock(FileSystem.class); when(mockSourceStatus.getPermission()).thenReturn(new FsPermission((short) 777)); when(mockAclStatus.toString()).thenReturn(""); when(mockHadoopFileStatus.getFileStatus()).thenReturn(mockSourceStatus); when(mockHadoopFileStatus.getAclEntries()).thenReturn(new ArrayList<>()); when(mockHadoopFileStatus.getAclStatus()).thenReturn(mockAclStatus); doThrow(RuntimeException.class).when(mockFs).setAcl(any(Path.class), any(List.class)); HdfsUtils.setFullFileStatus(conf, mockHadoopFileStatus, null, mockFs, new Path("fakePath"), false); verify(mockFs).setAcl(any(Path.class), any(List.class)); }