/** * Common method used for cleaning out parquet files under a partition path during rollback of a * set of commits */ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException { logger.info("Cleaning path " + partitionPath); FileSystem fs = getMetaClient().getFs(); FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter); for (FileStatus file : toBeDeleted) { boolean success = fs.delete(file.getPath(), false); results.put(file, success); logger.info("Delete file " + file.getPath() + "\t" + success); } return results; }
/** * Common method used for cleaning out parquet files under a partition path during rollback of a * set of commits */ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException { logger.info("Cleaning path " + partitionPath); FileSystem fs = getMetaClient().getFs(); FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter); for (FileStatus file : toBeDeleted) { boolean success = fs.delete(file.getPath(), false); results.put(file, success); logger.info("Delete file " + file.getPath() + "\t" + success); } return results; }
/** * Performs cleaning of partition paths according to cleaning policy and returns the number of * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of * task distribution. * * @throws IllegalArgumentException if unknown cleaning policy is provided */ @Override public List<HoodieCleanStat> clean(JavaSparkContext jsc) { try { FileSystem fs = getMetaClient().getFs(); List<String> partitionsToClean = FSUtils .getAllPartitionPaths(fs, getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()); logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config .getCleanerPolicy()); if (partitionsToClean.isEmpty()) { logger.info("Nothing to clean here mom. It is already clean"); return Collections.emptyList(); } return cleanPartitionPaths(partitionsToClean, jsc); } catch (IOException e) { throw new HoodieIOException("Failed to clean up after commit", e); } }
/** * Performs cleaning of partition paths according to cleaning policy and returns the number of * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of * task distribution. * * @throws IllegalArgumentException if unknown cleaning policy is provided */ @Override public List<HoodieCleanStat> clean(JavaSparkContext jsc) { try { FileSystem fs = getMetaClient().getFs(); List<String> partitionsToClean = FSUtils .getAllPartitionPaths(fs, getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()); logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config .getCleanerPolicy()); if (partitionsToClean.isEmpty()) { logger.info("Nothing to clean here mom. It is already clean"); return Collections.emptyList(); } return cleanPartitionPaths(partitionsToClean, jsc); } catch (IOException e) { throw new HoodieIOException("Failed to clean up after commit", e); } }
final FileSystem fs = getMetaClient().getFs(); final Path temporaryFolder = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME); List<Tuple2<String, Boolean>> results = jsc .parallelize(fileStatusesList, config.getFinalizeWriteParallelism()).map(fileStatus -> { FileSystem fs1 = getMetaClient().getFs(); boolean success = fs1.delete(fileStatus.getPath(), false); logger
final FileSystem fs = getMetaClient().getFs(); final Path temporaryFolder = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME); List<Tuple2<String, Boolean>> results = jsc .parallelize(fileStatusesList, config.getFinalizeWriteParallelism()).map(fileStatus -> { FileSystem fs1 = getMetaClient().getFs(); boolean success = fs1.delete(fileStatus.getPath(), false); logger
.getAllPartitionPaths(metaClient.getFs(), getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning())) .map((Function<String, HoodieRollbackStat>) partitionPath -> {
.getAllPartitionPaths(metaClient.getFs(), getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning())) .map((Function<String, HoodieRollbackStat>) partitionPath -> {
final FileSystem fs = getMetaClient().getFs(); final Path finalPath = new Path(config.getBasePath(), writeStat.getPath());
.map(status -> status.getStat()) .foreach(writeStat -> { final FileSystem fs = getMetaClient().getFs(); final Path finalPath = new Path(config.getBasePath(), writeStat.getPath());