@Override public FileStatus[] listStatus(Path path) throws IOException { statistics.incrementReadOps(1); System.out.println("STATS: listStatus - " + path); checkAccess(); path = path.makeQualified(this); List<FileStatus> result = new ArrayList<FileStatus>(); String pathname = path.toString(); String pathnameAsDir = pathname + "/"; Set<String> dirs = new TreeSet<String>(); MockFile file = findFile(path); if (file != null) { return new FileStatus[]{createStatus(file)}; } findMatchingFiles(files, pathnameAsDir, dirs, result); findMatchingFiles(globalFiles, pathnameAsDir, dirs, result); // for each directory add it once for(String dir: dirs) { result.add(createDirectory(new MockPath(this, pathnameAsDir + dir))); } return result.toArray(new FileStatus[result.size()]); }
private List<LocatedFileStatus> listLocatedFileStatuses(Path path) throws IOException { statistics.incrementReadOps(1); System.out.println("STATS: listLocatedFileStatuses - " + path); checkAccess(); path = path.makeQualified(this); List<LocatedFileStatus> result = new ArrayList<>(); String pathname = path.toString(); String pathnameAsDir = pathname + "/"; Set<String> dirs = new TreeSet<String>(); MockFile file = findFile(path); if (file != null) { result.add(createLocatedStatus(file)); return result; } findMatchingLocatedFiles(files, pathnameAsDir, dirs, result); findMatchingLocatedFiles(globalFiles, pathnameAsDir, dirs, result); // for each directory add it once for(String dir: dirs) { result.add(createLocatedDirectory(new MockPath(this, pathnameAsDir + dir))); } return result; }
@Test public void deltasWithOpenTxnInRead() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(1).getPath().toString()); }
assertEquals(123, opts.getBucketId()); opts = AcidUtils.parseBaseOrDeltaBucketFilename( new MockPath(fs, dir + "/delta_000005_000006/bucket_00001"), conf); assertEquals(false, opts.getOldStyle()); assertEquals(false, opts.isWritingBase()); assertEquals(1, opts.getBucketId()); opts = AcidUtils.parseBaseOrDeltaBucketFilename( new MockPath(fs, dir + "/delete_delta_000005_000006/bucket_00001"), conf); assertEquals(false, opts.getOldStyle()); assertEquals(false, opts.isWritingBase());
new MockFile("mock:/tbl/part1/delta_058_58/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_49", dir.getBaseDirectory().toString());
new MockFile("mock:/tbl/part1/delete_delta_052_55/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString());
new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(new MockPath(fs, "mock:/tbl/part1"), conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_49", dir.getBaseDirectory().toString());
new MockFile("mock:/tbl/part1/subdir/000000_0", 0, new byte[0])); AcidUtils.Directory dir = AcidUtils.getAcidState(new MockPath(fs, "/tbl/part1"), conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals(null, dir.getBaseDirectory());
@Test public void testFileGenerator() throws Exception { OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1]), new MockFile("mock:/a/b/part-01", 1000, new byte[1]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1]), new MockFile("mock:/a/b/part-04", 1000, new byte[1])); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy); conf.set("mapreduce.input.fileinputformat.split.maxsize", "500"); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1000]), new MockFile("mock:/a/b/part-01", 1000, new byte[1000]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1000]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1000]), new MockFile("mock:/a/b/part-04", 1000, new byte[1000])); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ETLSplitStrategy); }
@Test public void deltasAndDeleteDeltasWithOpenTxnsNotInCompact() throws Exception { // This tests checks that appropriate delta and delete_deltas are included when minor // compactions specifies a valid open txn range. Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_2/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_7_7/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE + ":")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delete_delta_2_2", delts.get(1).getPath().toString()); }
@Test public void testOverlapingDelta() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_0000063_63/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_000062_62/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_00061_61/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_40_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_0060_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_052_55/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); assertEquals("mock:/tbl/part1/base_50", dir.getBaseDirectory().toString()); List<FileStatus> obsolete = dir.getObsolete(); assertEquals(2, obsolete.size()); assertEquals("mock:/tbl/part1/delta_052_55", obsolete.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_0060_60", obsolete.get(1).getPath().toString()); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(4, delts.size()); assertEquals("mock:/tbl/part1/delta_40_60", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_00061_61", delts.get(1).getPath().toString()); assertEquals("mock:/tbl/part1/delta_000062_62", delts.get(2).getPath().toString()); assertEquals("mock:/tbl/part1/delta_0000063_63", delts.get(3).getPath().toString()); }
@Test public void deleteDeltasWithOpenTxnInRead() throws Exception { Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_3_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_101_101_1/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(3, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delete_delta_2_5", delts.get(1).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(2).getPath().toString()); // Note that delete_delta_3_3 should not be read, when a minor compacted // [delete_]delta_2_5 is present. }
/** * @since 1.3.0 * @throws Exception */ @Test public void deltasWithOpenTxnInRead2() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_4_4_3/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_101_101_1/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); //hypothetically, txn 50 is open and writing write ID 4 conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[] {50}, new BitSet(), 1000, 55).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:4:4")); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(2, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/delta_2_5", delts.get(1).getPath().toString()); }
@Test public void testACIDSplitStrategy() throws Exception { conf.set("bucket_count", "2"); conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/delta_000_001/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_000_001/bucket_000001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000001", 1000, new byte[1], new MockBlock("host1"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy); List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits(); ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator(); for (OrcSplit split: splits) { assertEquals(1, splitSizeEstimator.getEstimatedSize(split)); } assertEquals(4, splits.size()); }
@Test public void testMinorCompactedDeltaMakesInBetweenDelteDeltaObsolete() throws Exception { // This test checks that if we have a minor compacted delta for the txn range [40,60] // then it will make any delete delta in that range as obsolete. Configuration conf = new Configuration(); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_40_60/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delete_delta_50_50/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:100:" + Long.MAX_VALUE + ":")); List<FileStatus> obsolete = dir.getObsolete(); assertEquals(1, obsolete.size()); assertEquals("mock:/tbl/part1/delete_delta_50_50", obsolete.get(0).getPath().toString()); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_40_60", delts.get(0).getPath().toString()); }
@Test public void testObsoleteOriginals() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/base_10/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/base_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/000000_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/000001_1", 500, new byte[0])); Path part = new MockPath(fs, "/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidReaderWriteIdList("tbl:150:" + Long.MAX_VALUE + ":")); // Obsolete list should include the two original bucket files, and the old base dir List<FileStatus> obsolete = dir.getObsolete(); assertEquals(3, obsolete.size()); assertEquals("mock:/tbl/part1/base_5", obsolete.get(0).getPath().toString()); assertEquals("mock:/tbl/part1/base_10", dir.getBaseDirectory().toString()); }
@Test public void deltasWithOpenTxnsNotInCompact2() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0" + AcidUtils.DELTA_SIDE_FILE_SUFFIX, 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_6_10/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:3:" + Long.MAX_VALUE)); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); }
@Test public void deltasWithOpenTxnsNotInCompact() throws Exception { Configuration conf = new Configuration(); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/tbl/part1/delta_1_1/bucket_0", 500, new byte[0]), new MockFile("mock:/tbl/part1/delta_2_5/bucket_0", 500, new byte[0])); Path part = new MockPath(fs, "mock:/tbl/part1"); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); AcidUtils.Directory dir = AcidUtils.getAcidState(part, conf, new ValidCompactorWriteIdList("tbl:4:" + Long.MAX_VALUE)); List<AcidUtils.ParsedDelta> delts = dir.getCurrentDirectories(); assertEquals(1, delts.size()); assertEquals("mock:/tbl/part1/delta_1_1", delts.get(0).getPath().toString()); }
public OrcInputFormat.AcidDirInfo createAdi( OrcInputFormat.Context context, MockFileSystem fs, String path) throws IOException { return new OrcInputFormat.FileGenerator( context, fs, new MockPath(fs, path), false, null).call(); }