@VisibleForTesting public static Directory getAcidState(Path directory, Configuration conf, ValidWriteIdList writeIdList ) throws IOException { return getAcidState(directory, conf, writeIdList, false, false); }
@VisibleForTesting public static Directory getAcidState(Path directory, Configuration conf, ValidTxnList txnList ) throws IOException { return getAcidState(directory, conf, txnList, false, false); }
private void removeFiles(String location, ValidTxnList txnList) throws IOException { AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(location), conf, txnList); List<FileStatus> obsoleteDirs = dir.getObsolete(); List<Path> filesToDelete = new ArrayList<Path>(obsoleteDirs.size()); for (FileStatus stat : obsoleteDirs) { filesToDelete.add(stat.getPath()); } if (filesToDelete.size() < 1) { LOG.warn("Hmm, nothing to delete in the cleaner for directory " + location + ", that hardly seems right."); return; } LOG.info("About to remove " + filesToDelete.size() + " obsolete directories from " + location); FileSystem fs = filesToDelete.get(0).getFileSystem(conf); for (Path dead : filesToDelete) { LOG.debug("Going to delete path " + dead.toString()); fs.delete(dead, true); } }
public static List<Path> getValidDataPaths(Path dataPath, Configuration conf, String validWriteIdStr) throws IOException { List<Path> pathList = new ArrayList<>(); if ((validWriteIdStr == null) || validWriteIdStr.isEmpty()) { // If Non-Acid case, then all files would be in the base data path. So, just return it. pathList.add(dataPath); return pathList; } // If ACID/MM tables, then need to find the valid state wrt to given ValidWriteIdList. ValidWriteIdList validWriteIdList = new ValidReaderWriteIdList(validWriteIdStr); Directory acidInfo = AcidUtils.getAcidState(dataPath, conf, validWriteIdList); for (HdfsFileStatusWithId hfs : acidInfo.getOriginalFiles()) { pathList.add(hfs.getFileStatus().getPath()); } for (ParsedDelta delta : acidInfo.getCurrentDirectories()) { pathList.add(delta.getPath()); } if (acidInfo.getBaseDirectory() != null) { pathList.add(acidInfo.getBaseDirectory()); } return pathList; }
return null; Directory acidInfo = AcidUtils.getAcidState(dir, jc, idList);
private void checkNothingWritten(Path partitionPath) throws Exception { AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, getTransactionContext(conf)); Assert.assertEquals(0, dir.getObsolete().size()); Assert.assertEquals(0, dir.getOriginalFiles().size()); List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories(); Assert.assertEquals(0, current.size()); }
private void checkNothingWritten(Path partitionPath) throws Exception { AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, getTransactionContext(conf)); Assert.assertEquals(0, dir.getObsolete().size()); Assert.assertEquals(0, dir.getOriginalFiles().size()); List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories(); Assert.assertEquals(0, current.size()); }
@Test public void deltasWithOpenTxnsNotInCompact() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE)); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); }
@Test public void deltasWithOpenTxnInRead() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(1).getPath().toString()); }
assert options.getMaxOffset() == Long.MAX_VALUE; AcidUtils.Directory directoryState = AcidUtils.getAcidState(mergerOptions.getRootPath(), conf, validWriteIdList, false, true);
@Test public void testMinorCompactedDeltaMakesInBetweenDelteDeltaObsolete() throws Exception { // This test checks that if we have a minor compacted delta for the txn range [40,60] // then it will make any delete delta in that range as obsolete. Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_40_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_50_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); List<FileStatus> obsolete = dir.getObsolete(); assertEquals(1, obsolete.size()); assertEquals("mock:/tbl/part1/delete_delta_50_50", obsolete.get(0).getPath().toString()); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_40_60", delts.get(0).getPath().toString()); }
@Test public void testObsoleteOriginals() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/base_10/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/000000_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/000001_1", 500, new byte[0])); Path part = new MockPath(fs, "/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:150:" + Long.MAX_VALUE + ":")); // Obsolete list should include the two original bucket files, and the old base dir List<FileStatus> obsolete = dir.getObsolete(); assertEquals(3, obsolete.size()); assertEquals("mock:/tbl/part1/base_5", obsolete.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/base_10", dir.getBaseDirectory().toString()); }
StreamingAssert(IMetaStoreClient metaStoreClient, HiveConf conf, Table table, List<String> partition) throws Exception { this.metaStoreClient = metaStoreClient; this.table = table; this.partition = partition; validTxnList = metaStoreClient.getValidTxns(); conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString()); List<TableValidWriteIds> v = metaStoreClient.getValidWriteIds(Collections .singletonList(TableName.getDbTable(table.getDbName(), table.getTableName())), validTxnList.writeToString()); writeIds = TxnCommonUtils.createValidReaderWriteIdList(v.get(0)); partitionLocation = getPartitionLocation(); dir = AcidUtils.getAcidState(partitionLocation, conf, writeIds); assertEquals(0, dir.getObsolete().size()); assertEquals(0, dir.getOriginalFiles().size()); currentDeltas = dir.getCurrentDirectories(); min = Long.MAX_VALUE; max = Long.MIN_VALUE; System.out.println("Files found: "); for (AcidUtils.ParsedDelta parsedDelta : currentDeltas) { System.out.println(parsedDelta.getPath().toString()); max = Math.max(parsedDelta.getMaxWriteId(), max); min = Math.min(parsedDelta.getMinWriteId(), min); } }
@Test public void testOverlapingDelta() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_0000063_63/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_000062_62/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_00061_61/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_40_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_0060_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_052_55/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_50", dir.getBaseDirectory().toString()); List<FileStatus> obsolete = dir.getObsolete(); assertEquals(2, obsolete.size()); assertEquals("mock:/tbl/part1/delta_052_55", obsolete.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_0060_60", obsolete.get(1).getPath().toString()); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(4, delts.size()); assertEquals("mock:/tbl/part1/delta_40_60", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_00061_61", delts.get(1).getPath().toString()); assertEquals("mock:/tbl/part1/delta_000062_62", delts.get(2).getPath().toString()); assertEquals("mock:/tbl/part1/delta_0000063_63", delts.get(3).getPath().toString()); }
@Test public void deltasWithOpenTxnsNotInCompact2() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:3:" + Long.MAX_VALUE)); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); }
new MockFile("mock:/tbl/part1/subdir/000000_0", 0, new byte[0])); AcidUtils.Directory dir = AcidUtils.getAcidState(new MockPath(fs, "/tbl/part1"), conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals(null, dir.getBaseDirectory());
new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_49", dir.getBaseDirectory().toString());
/** * @since 1.3.0 * @throws Exception */ @Test public void deltasWithOpenTxnInRead2() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_101_101_1/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(1).getPath().toString()); }
@Test public void deleteDeltasWithOpenTxnInRead() throws Exception { Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_3_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_101_101_1/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(3, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delete_delta_2_5", delts.get(1).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(2).getPath().toString()); // Note that delete_delta_3_3 should not be read, when a minor compacted // [delete_]delta_2_5 is present. }
@Test public void deltasAndDeleteDeltasWithOpenTxnsNotInCompact() throws Exception { // This tests checks that appropriate delta and delete_deltas are included when minor // compactions specifies a valid open txn range. Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_2/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_7_7/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE + ":")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delete_delta_2_2", delts.get(1).getPath().toString()); }