org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getStartOffsets java code examples

/**
 * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than
 * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186).
 */
private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException {
 if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) {
  // This issue was fixed in 2.3.0, if newer version, no need to clean up splits
  return splits;
 }
 List<InputSplit> cleanedSplits = Lists.newArrayList();
 for (int i = 0; i < splits.size(); i++) {
  CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i);
  String[] locations = oldSplit.getLocations();
  Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null");
  if (locations.length > SPLIT_MAX_NUM_LOCATIONS) {
   locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS);
  }
  cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(),
    locations));
 }
 return cleanedSplits;
}

public ParserPump(CombineFileSplit split, TaskAttemptContext context) {
  this.context = context;
  this.paths = split.getPaths();
  this.sizes = split.getLengths();
  this.offsets = split.getStartOffsets();
  this.size = split.getLength();
  Configuration conf = context.getConfiguration();
  this.skipInvalid = conf.getBoolean(SKIP_INVALID_PROPERTY, false);
  this.verifyDataTypeValues = conf.getBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, false);
  this.overrideRdfContext = conf.getBoolean(OVERRIDE_CONTEXT_PROPERTY, false);
  this.defaultRdfContextPattern = conf.get(DEFAULT_CONTEXT_PROPERTY);
  this.maxSize = MAX_SINGLE_FILE_MULTIPLIER * conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
}

/** Returns an array containing the start offsets of the files in the split*/
public long[] getStartOffsets() {
  return isMapRedSet() ? mapredSplit.getStartOffsets() : mapreduceSplit.getStartOffsets();
}

public InputSplit[] getSplits(JobConf job, int numSplits) 
 throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
  super.getSplits(new Job(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
   (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
   newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
   newStyleSplit.getLocations());
 }
 return ret;
}

public InputSplit[] getSplits(JobConf job, int numSplits) 
 throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
   (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
   newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
   newStyleSplit.getLocations());
 }
 return ret;
}

public InputSplit[] getSplits(JobConf job, int numSplits)
  throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
   super.getSplits(new Job(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit =
    (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
    newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
    newStyleSplit.getLocations());
 }
 return ret;
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
 CombineFileSplit cSplit = (CombineFileSplit) split;
 Path[] path = cSplit.getPaths();
 long[] start = cSplit.getStartOffsets();
 long[] len = cSplit.getLengths();
 Configuration conf = context.getConfiguration();
 FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
 
 this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
 CombineFileSplit cSplit = (CombineFileSplit) split;
 Path[] path = cSplit.getPaths();
 long[] start = cSplit.getStartOffsets();
 long[] len = cSplit.getLengths();
 Configuration conf = context.getConfiguration();
 FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
 
 this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l);
}

public InputSplit[] getSplits(JobConf job, int numSplits) 
 throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
   (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
   newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
   newStyleSplit.getLocations());
 }
 return ret;
}

public InputSplit[] getSplits(JobConf job, int numSplits) 
 throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
   (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
   newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
   newStyleSplit.getLocations());
 }
 return ret;
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
  throws IOException {
 Configuration conf = context.getConfiguration();
 CombineFileSplit cSplit =  (CombineFileSplit) split;
 Path[] path = cSplit.getPaths();
 long[] start = cSplit.getStartOffsets();
 long[] len = cSplit.getLengths();
 
 FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
 
 long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
 long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
 this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);
 instantiateGfxdLoner(conf);
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
  throws IOException {
 Configuration conf = context.getConfiguration();
 CombineFileSplit cSplit =  (CombineFileSplit) split;
 Path[] path = cSplit.getPaths();
 long[] start = cSplit.getStartOffsets();
 long[] len = cSplit.getLengths();
 
 FileSystem fs = cSplit.getPath(0).getFileSystem(conf);
 
 long startTS = conf.getLong(RowInputFormat.START_TIME_MILLIS, 0l);
 long endTS = conf.getLong(RowInputFormat.END_TIME_MILLIS, 0l);
 this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, startTS, endTS);
 instantiateGfxdLoner(conf);
}

public InputSplit[] getSplits(JobConf job, int numSplits) 
 throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits =
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newStyleSplits.size()];
 for(int pos = 0; pos < newStyleSplits.size(); ++pos) {
  org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = 
   (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
  ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(),
   newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(),
   newStyleSplit.getLocations());
 }
 return ret;
}

/** Copy constructor */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

/**
 * @param split Description of input sources.
 * @param conf Used to resolve FileSystem instances.
 */
public FileQueue(CombineFileSplit split, Configuration conf)
  throws IOException {
 this.conf = conf;
 paths = split.getPaths();
 startoffset = split.getStartOffsets();
 lengths = split.getLengths();
 nextSource();
}

/**
 * Copy constructor
 */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

/**
 * Copy constructor
 */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

/**
 * Copy constructor
 */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

/**
 * Copy constructor
 */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

/**
 * Copy constructor
 */
public CombineFileSplit(CombineFileSplit old) throws IOException {
 this(old.getPaths(), old.getStartOffsets(),
    old.getLengths(), old.getLocations());
}

Javadoc

Returns an array containing the start offsets of the files in the split

Popular methods of CombineFileSplit

getPath
Returns the ith Path
getLength
Returns the length of the ith Path
getLocations
Returns all the Paths where this input-split resides
getOffset
Returns the start offset of the ith Path
<init>
Specify the set of files to include in this split
getPaths
Returns all the Paths in the split
getLengths
Returns an array containing the lengths of the files in the split
getNumPaths
Returns the number of Paths in the split
initSplit
readFields
write

write

Popular in Java

Making http requests using okhttp
compareTo (BigDecimal)
runOnUiThread (Activity)
getSupportFragmentManager (FragmentActivity)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Top plugins for WebStorm

How to use getStartOffsetsmethodin org.apache.hadoop.mapreduce.lib.input.CombineFileSplit

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.CombineFileSplit.getStartOffsets (Showing top 20 results out of 315)

How to use
getStartOffsets
method
in
org.apache.hadoop.mapreduce.lib.input.CombineFileSplit