for (Path dir : paths) { FileSystem fs = dir.getFileSystem(conf); FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds, ugi); pathFutures.add(ecs.submit(fileGenerator));
for (Path dir : paths) { FileSystem fs = dir.getFileSystem(conf); FileGenerator fileGenerator = new FileGenerator(context, fs, dir, useFileIds, ugi); pathFutures.add(ecs.submit(fileGenerator));
final OrcInputFormat.Context context = new OrcInputFormat.Context( conf, n); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); final OrcInputFormat.Context context = new OrcInputFormat.Context( conf, n); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
FileGenerator fileGenerator = new FileGenerator(context, fs, dir); pathFutures.add(context.threadPool.submit(fileGenerator));
new MockFile("mock:/a/b/000000_1", 1000, new byte[1], new MockBlock("host1"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); new MockFile("mock:/a/base_0000001/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/base_0000001/bucket_00001", 1000, new byte[1], new MockBlock("host1"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(2, splitStrategies.size()); new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000001_0000001_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(2, splitStrategies.size()); new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delete_delta_0000002_0000002_0000/bucket_00001", 1000, new byte[1], new MockBlock("host1"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size());
new MockFile("mock:/a/b/part-04", 1000, new byte[1], new MockBlock("host1", "host2"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); new MockFile("mock:/a/b/part-03", 1000, new byte[1000], new MockBlock("host1", "host2")), new MockFile("mock:/a/b/part-04", 1000, new byte[1000], new MockBlock("host1", "host2"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); new MockFile("mock:/a/b/part-04", 1000, new byte[1100], new MockBlock("host1", "host2"), new MockBlock("host1", "host2"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); new MockFile("mock:/a/b/part-04", 1000, new byte[2000], new MockBlock("host1", "host2"), new MockBlock("host1", "host2"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); new MockFile("mock:/a/b/part-04", 1000, new byte[2200], new MockBlock("host1", "host2"), new MockBlock("host1", "host2"), new MockBlock("host1", "host2"))); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen);
OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen);
@Test public void testACIDSplitStrategy() throws Exception { conf.set("bucket_count", "2"); conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/delta_000_001/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_000_001/bucket_000001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000001", 1000, new byte[1], new MockBlock("host1"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy); List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits(); ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator(); for (OrcSplit split: splits) { assertEquals(1, splitSizeEstimator.getEstimatedSize(split)); } assertEquals(4, splits.size()); }
@Test public void testFileGenerator() throws Exception { OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1]), new MockFile("mock:/a/b/part-01", 1000, new byte[1]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1]), new MockFile("mock:/a/b/part-04", 1000, new byte[1])); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy); conf.set("mapreduce.input.fileinputformat.split.maxsize", "500"); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1000]), new MockFile("mock:/a/b/part-01", 1000, new byte[1000]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1000]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1000]), new MockFile("mock:/a/b/part-04", 1000, new byte[1000])); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ETLSplitStrategy); }
public OrcInputFormat.AcidDirInfo createAdi( OrcInputFormat.Context context, MockFileSystem fs, String path) throws IOException { return new OrcInputFormat.FileGenerator( context, fs, new MockPath(fs, path), false, null).call(); }
private List<OrcInputFormat.SplitStrategy<?>> getSplitStrategies() throws Exception { conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( context, fs, root, false, null); OrcInputFormat.AcidDirInfo adi = gen.call(); return OrcInputFormat.determineSplitStrategies( null, context, adi.fs, adi.splitPath, adi.baseFiles, adi.deleteEvents, null, null, true); } }