@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("getSplits started"); } Configuration conf = job; if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED)) { // Create HiveConf once, since this is expensive. conf = new HiveConf(conf, OrcInputFormat.class); } List<OrcSplit> result = generateSplitsInfo(conf, new Context(conf, numSplits, createExternalCaches())); if (LOG.isDebugEnabled()) { LOG.debug("getSplits finished"); } return result.toArray(new InputSplit[result.size()]); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (isDebugEnabled) { LOG.debug("getSplits started"); } Configuration conf = job; if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED)) { // Create HiveConf once, since this is expensive. conf = new HiveConf(conf, OrcInputFormat.class); } List<OrcSplit> result = generateSplitsInfo(conf, new Context(conf, numSplits, createExternalCaches())); if (isDebugEnabled) { LOG.debug("getSplits finished"); } return result.toArray(new InputSplit[result.size()]); }
final FileSystem fs = generateMockFiles(c, s); for (int n : numSplits) { final OrcInputFormat.Context context = new OrcInputFormat.Context( conf, n); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( final FileSystem fs = generateMockFiles(c, s); for (int n : numSplits) { final OrcInputFormat.Context context = new OrcInputFormat.Context( conf, n); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
static List<OrcSplit> generateSplitsInfo(Configuration conf) throws IOException { Context context = new Context(conf); List<OrcSplit> splits = Lists.newArrayList(); List<Future<?>> pathFutures = Lists.newArrayList();
conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
@Test public void testBIStrategySplitBlockBoundary() throws Exception { conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI"); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1], new MockBlock("host1", "host2")), assertEquals(5, numSplits); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1000], new MockBlock("host1", "host2")), assertEquals(5, numSplits); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1100], new MockBlock("host1", "host2"), assertEquals(10, numSplits); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[2000], new MockBlock("host1", "host2"), context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[2200], new MockBlock("host1", "host2"),
new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/1/part-00", 1000, new byte[1]),
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, context = new OrcInputFormat.Context(conf); splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true, context = new OrcInputFormat.Context(conf); splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true,
conf.set("fs.defaultFS", "mock:///"); conf.set("fs.mock.impl", MockFileSystem.class.getName()); OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 300); HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 200); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, context = new OrcInputFormat.Context(conf); splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs, fs.getFileStatus(new Path("/a/file")), null, null, true,
List<OrcSplit> splits = OrcInputFormat.generateSplitsInfo(conf, new Context(conf, -1, null)); assertEquals(1, splits.size()); } finally {
new MockBlock("host4-1", "host4-2", "host4-3"), new MockBlock("host5-1", "host5-2", "host5-3"))); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.SplitGenerator splitter = new OrcInputFormat.SplitGenerator(new OrcInputFormat.SplitInfo(context, fs,
@Test public void testFileGenerator() throws Exception { OrcInputFormat.Context context = new OrcInputFormat.Context(conf); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1]), new MockFile("mock:/a/b/part-01", 1000, new byte[1]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1]), new MockFile("mock:/a/b/part-04", 1000, new byte[1])); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.BISplitStrategy); conf.set("mapreduce.input.fileinputformat.split.maxsize", "500"); context = new OrcInputFormat.Context(conf); fs = new MockFileSystem(conf, new MockFile("mock:/a/b/part-00", 1000, new byte[1000]), new MockFile("mock:/a/b/part-01", 1000, new byte[1000]), new MockFile("mock:/a/b/_part-02", 1000, new byte[1000]), new MockFile("mock:/a/b/.part-03", 1000, new byte[1000]), new MockFile("mock:/a/b/part-04", 1000, new byte[1000])); gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a/b"), false, null); splitStrategies = createSplitStrategies(context, gen); assertEquals(1, splitStrategies.size()); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ETLSplitStrategy); }
@Test public void testACIDSplitStrategy() throws Exception { conf.set("bucket_count", "2"); conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); MockFileSystem fs = new MockFileSystem(conf, new MockFile("mock:/a/delta_000_001/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_000_001/bucket_000001", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000000", 1000, new byte[1], new MockBlock("host1")), new MockFile("mock:/a/delta_001_002/bucket_000001", 1000, new byte[1], new MockBlock("host1"))); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(context, fs, new MockPath(fs, "mock:/a"), false, null); List<OrcInputFormat.SplitStrategy<?>> splitStrategies = createSplitStrategies(context, gen); assertEquals(true, splitStrategies.get(0) instanceof OrcInputFormat.ACIDSplitStrategy); List<OrcSplit> splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(0)).getSplits(); ColumnarSplitSizeEstimator splitSizeEstimator = new ColumnarSplitSizeEstimator(); for (OrcSplit split: splits) { assertEquals(1, splitSizeEstimator.getEstimatedSize(split)); } assertEquals(4, splits.size()); }
public Context create(Configuration conf, int numSplits) throws IOException { return new Context(conf, numSplits); } }
private Context createContext(Configuration conf, int numSplits) throws IOException { // Use threads to resolve directories into splits. if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED)) { // Create HiveConf once, since this is expensive. conf = new HiveConf(conf, OrcInputFormat.class); } return new Context(conf, numSplits, null); } }
public Context create(Configuration conf, int numSplits) throws IOException { return new Context(conf, numSplits); } }
private List<OrcInputFormat.SplitStrategy<?>> getSplitStrategies() throws Exception { conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); OrcInputFormat.Context context = new OrcInputFormat.Context(conf); OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( context, fs, root, false, null); OrcInputFormat.AcidDirInfo adi = gen.call(); return OrcInputFormat.determineSplitStrategies( null, context, adi.fs, adi.splitPath, adi.baseFiles, adi.deleteEvents, null, null, true); } }
@Override public Void run() throws Exception { OrcInputFormat.generateSplitsInfo(conf, new Context(conf, -1, null)); return null; } });
private Context createContext(Configuration conf, int numSplits) throws IOException { // Use threads to resolve directories into splits. if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_MS_FOOTER_CACHE_ENABLED)) { // Create HiveConf once, since this is expensive. conf = new HiveConf(conf, OrcInputFormat.class); } return new Context(conf, numSplits, null); } }