@Override public List<InputSplit> getSplits(JobContext job) throws IOException { HiveConf.setLongVar(job.getConfiguration(), HiveConf.ConfVars.MAPREDMINSPLITSIZE, SequenceFile.SYNC_INTERVAL); return super.getSplits(job); } }
List<InputSplit> fileSplits = fileInputFormat.getSplits(job); if (fileSplits == null || fileSplits.isEmpty()) { return ImmutableList.of();
@Test public void testNumInputFilesIgnoreDirs() throws Exception { Configuration conf = getConfiguration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); conf.setBoolean(FileInputFormat.INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS, true); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); Assert.assertEquals("Input splits are not correct", 1, splits.size()); verifySplits(Lists.newArrayList("test:/a1/file1"), splits); }
@Test public void testNumInputFilesWithoutRecursively() throws Exception { Configuration conf = getConfiguration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); Assert.assertEquals("Input splits are not correct", 2, splits.size()); verifySplits(Lists.newArrayList("test:/a1/a2", "test:/a1/file1"), splits); }
@Test public void testNumInputFilesRecursively() throws Exception { Configuration conf = getConfiguration(); conf.set(FileInputFormat.INPUT_DIR_RECURSIVE, "true"); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); Assert.assertEquals("Input splits are not correct", 3, splits.size()); verifySplits(Lists.newArrayList("test:/a1/a2/file2", "test:/a1/a2/file3", "test:/a1/file1"), splits); // Using the deprecated configuration conf = getConfiguration(); conf.set("mapred.input.dir.recursive", "true"); job = Job.getInstance(conf); splits = fileInputFormat.getSplits(job); verifySplits(Lists.newArrayList("test:/a1/a2/file2", "test:/a1/a2/file3", "test:/a1/file1"), splits); }
@Test public void testMaxBlockLocationsNewSplitsWithErasureCoding() throws Exception { Job job = Job.getInstance(conf); final FileInputFormat<?, ?> fileInputFormat = new TextInputFormat(); final List<InputSplit> splits = fileInputFormat.getSplits(job); JobSplitWriter.createSplitFiles(submitDir, conf, fs, splits); validateSplitMetaInfo(); }
@Test public void testListLocatedStatus() throws Exception { Configuration conf = getConfiguration(); conf.setInt(FileInputFormat.LIST_STATUS_NUM_THREADS, numThreads); conf.setBoolean("fs.test.impl.disable.cache", false); conf.set(FileInputFormat.INPUT_DIR, "test:///a1/a2"); MockFileSystem mockFs = (MockFileSystem) new Path("test:///").getFileSystem(conf); Assert.assertEquals("listLocatedStatus already called", 0, mockFs.numListLocatedStatusCalls); Job job = Job.getInstance(conf); FileInputFormat<?, ?> fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); Assert.assertEquals("Input splits are not correct", 2, splits.size()); Assert.assertEquals("listLocatedStatuss calls", 1, mockFs.numListLocatedStatusCalls); FileSystem.closeAll(); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); // TODO combine splits that reside on the same machine return splits; }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return super.getSplits(job); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return super.getSplits(job); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return super.getSplits(job); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { HiveConf.setLongVar(job.getConfiguration(), HiveConf.ConfVars.MAPREDMINSPLITSIZE, SequenceFile.SYNC_INTERVAL); return super.getSplits(job); } }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return getSplits(super.getSplits(job), job.getConfiguration()); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { return getSplits(super.getSplits(job), job.getConfiguration()); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { job.getConfiguration().setLong( ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), SequenceFile.SYNC_INTERVAL); return super.getSplits(job); } }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { job.getConfiguration().setLong( ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), SequenceFile.SYNC_INTERVAL); return super.getSplits(job); } }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { job.getConfiguration().setLong( ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), SequenceFile.SYNC_INTERVAL); return super.getSplits(job); } }
private List<InputSplit> computeSplits(long desiredBundleSizeBytes, SerializableConfiguration serializableConfiguration) throws IOException, IllegalAccessException, InstantiationException { Job job = SerializableConfiguration.newJob(serializableConfiguration); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException, InstantiationException { Job job = jobInstance(); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException, InstantiationException { Job job = jobInstance(); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }