public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException { FileStatus[] statuses; FileSystem fs = path.getFileSystem(conf); if (filter == null) { statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path); } else { statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter); } if (ordering != null) { Arrays.sort(statuses, ordering); } return statuses; }
public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException { FileStatus[] statuses; FileSystem fs = path.getFileSystem(conf); if (filter == null) { statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path); } else { statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter); } if (ordering != null) { Arrays.sort(statuses, ordering); } return statuses; }
public static FileStatus[] getFileStatus(Path path, PathType pathType, PathFilter filter, Comparator<FileStatus> ordering, Configuration conf) throws IOException { FileStatus[] statuses; FileSystem fs = path.getFileSystem(conf); if (filter == null) { statuses = pathType == PathType.GLOB ? fs.globStatus(path) : listStatus(fs, path); } else { statuses = pathType == PathType.GLOB ? fs.globStatus(path, filter) : listStatus(fs, path, filter); } if (ordering != null) { Arrays.sort(statuses, ordering); } return statuses; }
private static void deleteContentsOfPath(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); FileStatus[] statuses = HadoopUtil.listStatus(fs, path); for (FileStatus status : statuses) { fs.delete(status.getPath(), true); } }
for (FileStatus status : HadoopUtil.listStatus(FileSystem.get(conf), input, PathFilters.logsCRCFilter())) { intermediateCentroidFutures.add(pool.submit(new StreamingKMeansThread(status.getPath(), conf)));
for (FileStatus status : HadoopUtil.listStatus(FileSystem.get(conf), input, PathFilters.logsCRCFilter())) { intermediateCentroidFutures.add(pool.submit(new StreamingKMeansThread(status.getPath(), conf)));
for (FileStatus status : HadoopUtil.listStatus(FileSystem.get(conf), input, PathFilters.logsCRCFilter())) { intermediateCentroidFutures.add(pool.submit(new StreamingKMeansThread(status.getPath(), conf)));
@Test public void testTimesSquaredVectorTempDirDeletion() throws Exception { Configuration conf = getConfiguration(); Vector v = new RandomAccessSparseVector(50); v.assign(1.0); DistributedRowMatrix dm = randomDistributedMatrix(100, 90, 50, 20, 1.0, false); dm.setConf(getConfiguration()); Path outputPath = dm.getOutputTempPath(); FileSystem fs = outputPath.getFileSystem(conf); deleteContentsOfPath(conf, outputPath); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); Vector result1 = dm.timesSquared(v); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); deleteContentsOfPath(conf, outputPath); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true); dm.setConf(conf); Vector result2 = dm.timesSquared(v); FileStatus[] outputStatuses = fs.listStatus(outputPath); assertEquals(1, outputStatuses.length); Path outputTempPath = outputStatuses[0].getPath(); Path inputVectorPath = new Path(outputTempPath, TimesSquaredJob.INPUT_VECTOR); Path outputVectorPath = new Path(outputTempPath, TimesSquaredJob.OUTPUT_VECTOR_FILENAME); assertEquals(1, fs.listStatus(inputVectorPath, PathFilters.logsCRCFilter()).length); assertEquals(1, fs.listStatus(outputVectorPath, PathFilters.logsCRCFilter()).length); assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON); }
@Test public void testTimesVectorTempDirDeletion() throws Exception { Configuration conf = getConfiguration(); Vector v = new RandomAccessSparseVector(50); v.assign(1.0); DistributedRowMatrix dm = randomDistributedMatrix(100, 90, 50, 20, 1.0, false); dm.setConf(conf); Path outputPath = dm.getOutputTempPath(); FileSystem fs = outputPath.getFileSystem(conf); deleteContentsOfPath(conf, outputPath); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); Vector result1 = dm.times(v); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); deleteContentsOfPath(conf, outputPath); assertEquals(0, HadoopUtil.listStatus(fs, outputPath).length); conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true); dm.setConf(conf); Vector result2 = dm.times(v); FileStatus[] outputStatuses = fs.listStatus(outputPath); assertEquals(1, outputStatuses.length); Path outputTempPath = outputStatuses[0].getPath(); Path inputVectorPath = new Path(outputTempPath, TimesSquaredJob.INPUT_VECTOR); Path outputVectorPath = new Path(outputTempPath, TimesSquaredJob.OUTPUT_VECTOR_FILENAME); assertEquals(1, fs.listStatus(inputVectorPath, PathFilters.logsCRCFilter()).length); assertEquals(1, fs.listStatus(outputVectorPath, PathFilters.logsCRCFilter()).length); assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON); }