@Override public List<InputSplit> getSplits(JobContext cx) throws IOException { Job modifiedJob = Job.getInstance(cx.getConfiguration()); setSplitSize(modifiedJob); FileInputFormat.setInputDirRecursive(modifiedJob, true); return cleanSplits(super.getSplits(modifiedJob)); }
private void setSplitSize(JobContext cx) { super.setMaxSplitSize(cx.getConfiguration().getLong(COMPACTION_JOB_MAPRED_MAX_SPLIT_SIZE, DEFAULT_COMPACTION_JOB_MAPRED_MAX_SPLIT_SIZE)); super.setMinSplitSizeNode(cx.getConfiguration().getLong(COMPACTION_JOB_MAPRED_MIN_SPLIT_SIZE, DEFAULT_COMPACTION_JOB_MAPRED_MIN_SPLIT_SIZE)); }
List<FileStatus> stats = listStatus(job); List<InputSplit> splits = new ArrayList<InputSplit>(); if (stats.size() == 0) { getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits); getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);
addCreatedSplit(splits, nodes, validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, nodes, validBlocks); } else { for (OneBlockInfo oneblock : validBlocks) { addCreatedSplit(splits, getHosts(racks), validBlocks); createdSplit = true; break; addCreatedSplit(splits, getHosts(racks), validBlocks); } else { addCreatedSplit(splits, getHosts(racks), validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, getHosts(racks), validBlocks);
files[i] = new OneFileInfo(paths[i], conf, isSplitable(job, paths[i]), rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes, maxSize); addCreatedSplit(splits, nodes, validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, nodes, validBlocks); } else { for (OneBlockInfo oneblock : validBlocks) { addCreatedSplit(splits, getHosts(racks), validBlocks); createdSplit = true; break; addCreatedSplit(splits, getHosts(racks), validBlocks); } else { addCreatedSplit(splits, getHosts(racks), validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, getHosts(racks), validBlocks);
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { List<FileStatus> result = super.listStatus(Job.getInstance(job)); return result.toArray(new FileStatus[result.size()]); }
int i = 0; for (FileStatus stat : stats) { files[i] = new OneFileInfo(stat, conf, isSplitable(job, stat.getPath()), rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes, maxSize); totLength += files[i].getLength(); createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits);
List<FileStatus> stats = listStatus(job); List<InputSplit> splits = new ArrayList<InputSplit>(); if (stats.size() == 0) { getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits); getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);
addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; curSplitSize = 0; addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; splitsPerNode.add(node); addCreatedSplit(splits, getHosts(racks), validBlocks); createdSplit = true; break; addCreatedSplit(splits, getHosts(racks), validBlocks); } else { addCreatedSplit(splits, getHosts(racks), validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, getHosts(racks), validBlocks);
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { List<FileStatus> result = super.listStatus(Job.getInstance(job)); return result.toArray(new FileStatus[result.size()]); }
int i = 0; for (FileStatus stat : stats) { files[i] = new OneFileInfo(stat, conf, isSplitable(job, stat.getPath()), rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes, maxSize); totLength += files[i].getLength(); createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits);
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // get input paths and assign a split for every single path String path = job.getConfiguration().get("mapred.input.dir"); String[] paths = path.split(","); List<InputSplit> splits = new ArrayList<>(); for (int i = 0; i < paths.length; ++i) { job.getConfiguration().set("mapred.input.dir", paths[i]); splits.add(super.getSplits(job).get(0)); } return splits; } }
List<FileStatus> stats = listStatus(job); List<InputSplit> splits = new ArrayList<InputSplit>(); if (stats.size() == 0) { getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits); getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);
addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; curSplitSize = 0; addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; splitsPerNode.add(node); addCreatedSplit(splits, getHosts(racks), validBlocks); createdSplit = true; break; addCreatedSplit(splits, getHosts(racks), validBlocks); } else { addCreatedSplit(splits, getHosts(racks), validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, getHosts(racks), validBlocks);
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected FileStatus[] listStatus(JobConf job) throws IOException { List<FileStatus> result = super.listStatus(Job.getInstance(job)); return result.toArray(new FileStatus[result.size()]); }
int i = 0; for (FileStatus stat : stats) { files[i] = new OneFileInfo(stat, conf, isSplitable(job, stat.getPath()), rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes, maxSize); totLength += files[i].getLength(); createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits);
private void setSplitSize(JobContext cx) { super.setMaxSplitSize(cx.getConfiguration().getLong(COMPACTION_JOB_MAPRED_MAX_SPLIT_SIZE, DEFAULT_COMPACTION_JOB_MAPRED_MAX_SPLIT_SIZE)); super.setMinSplitSizeNode(cx.getConfiguration().getLong(COMPACTION_JOB_MAPRED_MIN_SPLIT_SIZE, DEFAULT_COMPACTION_JOB_MAPRED_MIN_SPLIT_SIZE)); }
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { // get input paths and assign a split for every single path String path = job.getConfiguration().get("mapred.input.dir"); String[] paths = path.split(","); List<InputSplit> splits = new ArrayList<>(); for (int i = 0; i < paths.length; ++i) { job.getConfiguration().set("mapred.input.dir", paths[i]); splits.add(super.getSplits(job).get(0)); } return splits; } }
List<FileStatus> stats = listStatus(job); List<InputSplit> splits = new ArrayList<InputSplit>(); if (stats.size() == 0) { getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits); getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);
addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; curSplitSize = 0; addCreatedSplit(splits, Collections.singleton(node), validBlocks); totalLength -= curSplitSize; splitsPerNode.add(node); addCreatedSplit(splits, getHosts(racks), validBlocks); createdSplit = true; break; addCreatedSplit(splits, getHosts(racks), validBlocks); } else { addCreatedSplit(splits, getHosts(racks), validBlocks); curSplitSize = 0; validBlocks.clear(); addCreatedSplit(splits, getHosts(racks), validBlocks);