/** * Searches the persist directory to find {@link Path}s matching the input {@link org.apache.gobblin.data.management.copy.CopyEntity}. * @param state {@link State} containing job information. * @param file {@link org.apache.gobblin.data.management.copy.CopyEntity} for which persisted {@link Path}s should be found. * @param filter {@link com.google.common.base.Predicate} used to filter found paths. * @return Optionally, a {@link Path} in the {@link FileSystem} that is the desired copy of the {@link org.apache.gobblin.data.management.copy.CopyEntity}. * @throws IOException */ public Optional<FileStatus> findPersistedFile(State state, CopyEntity file, Predicate<FileStatus> filter) throws IOException { if (!this.persistDir.isPresent() || !this.fs.exists(this.persistDir.get())) { return Optional.absent(); } Path guidPath = new Path(this.persistDir.get(), computeGuid(state, file)); FileStatus[] statuses; try { statuses = this.fs.listStatus(guidPath); } catch (FileNotFoundException e) { return Optional.absent(); } for (FileStatus fileStatus : statuses) { if (filter.apply(fileStatus)) { return Optional.of(fileStatus); } } return Optional.absent(); }
/** * Moves a copied path into a persistent location managed by gobblin-distcp. This method is used when an already * copied file cannot be successfully published. In future runs, instead of re-copying the file, distcp will use the * persisted file. * * @param state {@link State} containing job information. * @param file {@link org.apache.gobblin.data.management.copy.CopyEntity} from which input {@link Path} originated. * @param path {@link Path} to persist. * @return true if persist was successful. * @throws IOException */ public boolean persistFile(State state, CopyableFile file, Path path) throws IOException { if (!this.persistDir.isPresent()) { return false; } String guid = computeGuid(state, file); Path guidPath = new Path(this.persistDir.get(), guid); if (!this.fs.exists(guidPath)) { this.fs.mkdirs(guidPath, new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE)); } Path targetPath = new Path(guidPath, shortenPathName(file.getOrigin().getPath(), 250 - guid.length())); log.info(String.format("Persisting file %s with guid %s to location %s.", path, guid, targetPath)); if (this.fs.rename(path, targetPath)) { this.fs.setTimes(targetPath, System.currentTimeMillis(), -1); return true; } return false; }
/** * Searches the persist directory to find {@link Path}s matching the input {@link org.apache.gobblin.data.management.copy.CopyEntity}. * @param state {@link State} containing job information. * @param file {@link org.apache.gobblin.data.management.copy.CopyEntity} for which persisted {@link Path}s should be found. * @param filter {@link com.google.common.base.Predicate} used to filter found paths. * @return Optionally, a {@link Path} in the {@link FileSystem} that is the desired copy of the {@link org.apache.gobblin.data.management.copy.CopyEntity}. * @throws IOException */ public Optional<FileStatus> findPersistedFile(State state, CopyEntity file, Predicate<FileStatus> filter) throws IOException { if (!this.persistDir.isPresent() || !this.fs.exists(this.persistDir.get())) { return Optional.absent(); } Path guidPath = new Path(this.persistDir.get(), computeGuid(state, file)); FileStatus[] statuses; try { statuses = this.fs.listStatus(guidPath); } catch (FileNotFoundException e) { return Optional.absent(); } for (FileStatus fileStatus : statuses) { if (filter.apply(fileStatus)) { return Optional.of(fileStatus); } } return Optional.absent(); }
/** * Moves a copied path into a persistent location managed by gobblin-distcp. This method is used when an already * copied file cannot be successfully published. In future runs, instead of re-copying the file, distcp will use the * persisted file. * * @param state {@link State} containing job information. * @param file {@link org.apache.gobblin.data.management.copy.CopyEntity} from which input {@link Path} originated. * @param path {@link Path} to persist. * @return true if persist was successful. * @throws IOException */ public boolean persistFile(State state, CopyableFile file, Path path) throws IOException { if (!this.persistDir.isPresent()) { return false; } String guid = computeGuid(state, file); Path guidPath = new Path(this.persistDir.get(), guid); if (!this.fs.exists(guidPath)) { this.fs.mkdirs(guidPath, new FsPermission(FsAction.ALL, FsAction.READ, FsAction.NONE)); } Path targetPath = new Path(guidPath, shortenPathName(file.getOrigin().getPath(), 250 - guid.length())); log.info(String.format("Persisting file %s with guid %s to location %s.", path, guid, targetPath)); if (this.fs.rename(path, targetPath)) { this.fs.setTimes(targetPath, System.currentTimeMillis(), -1); return true; } return false; }