@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); List<InputSplit> newSplits = new ArrayList<InputSplit>(); for(int i = 0; i < job.getConfiguration().getInt(NNConstants.NN_TEST_SCALE, 1); i++) { for(InputSplit inputSplit: splits) { if(isNotPigOrHadoopMetaFile(((FileSplit) inputSplit).getPath())) { newSplits.add(new GuaguaInputSplit(false, new FileSplit[] { (FileSplit) inputSplit })); } } } newSplits.add(new GuaguaInputSplit(true, (FileSplit) null)); int mapperSize = newSplits.size(); LOG.info("inputs size including master: {}", mapperSize); LOG.debug("input splits inclduing: {}", newSplits); job.getConfiguration().set(GuaguaConstants.GUAGUA_WORKER_NUMBER, (mapperSize - 1) + ""); return newSplits; }
/** * Copy from pig implementation, need to check this code logic. */ protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
/** * Copy from pig implementation, need to check this code logic. */ protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
/** * Copy from pig implementation, need to check this code logic. */ public static List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
/** * Copy from pig implementation, need to check this code logic. */ public static List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
GuaguaConstants.DEFAULT_MASTER_NUMBER); for(int i = 0; i < masters; i++) { modeifiedSplits.add(new GuaguaInputSplit(true, (FileSplit) null));
GuaguaConstants.DEFAULT_MASTER_NUMBER); for(int i = 0; i < masters; i++) { modeifiedSplits.add(new GuaguaInputSplit(true, (FileSplit) null));
private List<InputSplit> createNewSplits() throws IOException { List<InputSplit> newSplits = null; boolean combinable = getConf().getBoolean(GuaguaConstants.GUAGUA_SPLIT_COMBINABLE, false); long blockSize = FileSystem.get(getConf()).getDefaultBlockSize(null); long combineSize = getConf().getLong(GuaguaConstants.GUAGUA_SPLIT_MAX_COMBINED_SPLIT_SIZE, blockSize); if(combineSize == 0) { combineSize = blockSize; } if(combinable) { List<InputSplit> splits = InputSplitUtils.getFileSplits(getConf(), combineSize); LOG.info("combine size:{}, splits:{}", combineSize, splits); newSplits = InputSplitUtils.getFinalCombineGuaguaSplits(splits, combineSize); } else { newSplits = new ArrayList<InputSplit>(); for(InputSplit inputSplit: InputSplitUtils.getFileSplits(getConf(), combineSize)) { FileSplit fs = (FileSplit) inputSplit; newSplits.add(new GuaguaInputSplit(false, new FileSplit[] { fs })); } } // add master int masters = getConf().getInt(GuaguaConstants.GUAGUA_MASTER_NUMBER, GuaguaConstants.DEFAULT_MASTER_NUMBER); for(int i = 0; i < masters; i++) { newSplits.add(new GuaguaInputSplit(true, (FileSplit) null)); } int mapperSize = newSplits.size(); LOG.info("inputs size including master: {}", mapperSize); LOG.debug("input splits: {}", newSplits); getConf().set(GuaguaConstants.GUAGUA_WORKER_NUMBER, (mapperSize - masters) + ""); return newSplits; }
private List<InputSplit> createNewSplits() throws IOException { List<InputSplit> newSplits = null; boolean combinable = getConf().getBoolean(GuaguaConstants.GUAGUA_SPLIT_COMBINABLE, false); long blockSize = FileSystem.get(getConf()).getDefaultBlockSize(null); long combineSize = getConf().getLong(GuaguaConstants.GUAGUA_SPLIT_MAX_COMBINED_SPLIT_SIZE, blockSize); if(combineSize == 0) { combineSize = blockSize; } if(combinable) { List<InputSplit> splits = InputSplitUtils.getFileSplits(getConf(), combineSize); LOG.info("combine size:{}, splits:{}", combineSize, splits); newSplits = InputSplitUtils.getFinalCombineGuaguaSplits(splits, combineSize); } else { newSplits = new ArrayList<InputSplit>(); for(InputSplit inputSplit: InputSplitUtils.getFileSplits(getConf(), combineSize)) { FileSplit fs = (FileSplit) inputSplit; newSplits.add(new GuaguaInputSplit(false, new FileSplit[] { fs })); } } // add master int masters = getConf().getInt(GuaguaConstants.GUAGUA_MASTER_NUMBER, GuaguaConstants.DEFAULT_MASTER_NUMBER); for(int i = 0; i < masters; i++) { newSplits.add(new GuaguaInputSplit(true, (FileSplit) null)); } int mapperSize = newSplits.size(); LOG.info("inputs size including master: {}", mapperSize); LOG.debug("input splits: {}", newSplits); getConf().set(GuaguaConstants.GUAGUA_WORKER_NUMBER, (mapperSize - masters) + ""); return newSplits; }
while(((double) bytesRemaining) / splitSize > GuaguaYarnConstants.SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()) })); bytesRemaining -= splitSize; splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()) })); splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, 0, length, blkLocations[0] .getHosts()) })); } else { splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, 0, length, new String[0]) }));
while(((double) bytesRemaining) / splitSize > GuaguaYarnConstants.SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()) })); bytesRemaining -= splitSize; splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()) })); splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, 0, length, blkLocations[0] .getHosts()) })); } else { splits.add(new GuaguaInputSplit(false, new FileSplit[] { new FileSplit(path, 0, length, new String[0]) }));
while(((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()))); bytesRemaining -= splitSize; splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()))); splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, blkLocations[0].getHosts()))); } else { splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, new String[0])));
while(((double) bytesRemaining) / splitSize > GuaguaMapReduceConstants.SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts()))); bytesRemaining -= splitSize; splits.add(new GuaguaInputSplit(false, new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts()))); splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, blkLocations[0].getHosts()))); } else { splits.add(new GuaguaInputSplit(false, new FileSplit(path, 0, length, new String[0])));