/** * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186). */ private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException { if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) { // This issue was fixed in 2.3.0, if newer version, no need to clean up splits return splits; } List<InputSplit> cleanedSplits = Lists.newArrayList(); for (int i = 0; i < splits.size(); i++) { CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i); String[] locations = oldSplit.getLocations(); Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null"); if (locations.length > SPLIT_MAX_NUM_LOCATIONS) { locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS); } cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations)); } return cleanedSplits; }
public ParserPump(CombineFileSplit split, TaskAttemptContext context) { this.context = context; this.paths = split.getPaths(); this.sizes = split.getLengths(); this.offsets = split.getStartOffsets(); this.size = split.getLength(); Configuration conf = context.getConfiguration(); this.skipInvalid = conf.getBoolean(SKIP_INVALID_PROPERTY, false); this.verifyDataTypeValues = conf.getBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, false); this.overrideRdfContext = conf.getBoolean(OVERRIDE_CONTEXT_PROPERTY, false); this.defaultRdfContextPattern = conf.get(DEFAULT_CONTEXT_PROPERTY); this.maxSize = MAX_SINGLE_FILE_MULTIPLIER * conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0); }
/** Returns an array containing the start offsets of the files in the split*/ public long[] getStartOffsets() { return isMapRedSet() ? mapredSplit.getStartOffsets() : mapreduceSplit.getStartOffsets(); }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(new Job(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(new Job(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l); long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS); instantiateGfxdLoner(conf); }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l); long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS); instantiateGfxdLoner(conf); }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job)); InputSplit[] ret = new InputSplit[newStyleSplits.size()]; for(int pos = 0; pos < newStyleSplits.size(); ++pos) { org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos); ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations()); } return ret; }
/** Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }
/** * @param split Description of input sources. * @param conf Used to resolve FileSystem instances. */ public FileQueue(CombineFileSplit split, Configuration conf) throws IOException { this.conf = conf; paths = split.getPaths(); startoffset = split.getStartOffsets(); lengths = split.getLengths(); nextSource(); }
/** * Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }
/** * Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }
/** * Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }
/** * Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }
/** * Copy constructor */ public CombineFileSplit(CombineFileSplit old) throws IOException { this(old.getPaths(), old.getStartOffsets(), old.getLengths(), old.getLocations()); }