org.apache.hadoop.mapreduce.lib.input.FileSplit.getStart java code examples

 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
  InterruptedException {

  FileSplit fSplit = (FileSplit) split;
  Path path = fSplit.getPath();
  Configuration conf = context.getConfiguration();
  this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
  this.end = fSplit.getStart() + fSplit.getLength();

  if (fSplit.getStart() > in.getPosition()) {
   in.sync(fSplit.getStart());
  }

  this.start = in.getPosition();
  more = start < end;

  key = new LongWritable();
  value = new BytesRefArrayWritable();
 }
}

  /**
   * @param clsName Input split class name.
   * @param in Input stream.
   * @param hosts Optional hosts.
   * @return File block or {@code null} if it is not a {@link FileSplit} instance.
   * @throws IgniteCheckedException If failed.
   */
  public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts)
    throws IgniteCheckedException {
    if (!FileSplit.class.getName().equals(clsName))
      return null;

    FileSplit split = new FileSplit();

    try {
      split.readFields(in);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }

    if (hosts == null)
      hosts = EMPTY_HOSTS;

    return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength());
  }
}

FileSplit s = (FileSplit)nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));

/** {@inheritDoc} */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 if (!(inputSplit instanceof FileSplit)) {
  throw new IllegalArgumentException("Only compatible with FileSplits.");
 }
 FileSplit fileSplit = (FileSplit) inputSplit;
 // Open a seekable input stream to the Avro container file.
 SeekableInput seekableFileInput
   = createSeekableInput(context.getConfiguration(), fileSplit.getPath());
 // Wrap the seekable input stream in an Avro DataFileReader.
 Configuration conf = context.getConfiguration();
 GenericData dataModel = AvroSerialization.createDataModel(conf);
 DatumReader<T> datumReader = dataModel.createDatumReader(mReaderSchema);
 mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader);
 // Initialize the start and end offsets into the file based on the boundaries of the
 // input split we're responsible for.  We will read the first block that begins
 // after the input split start boundary.  We will read up to but not including the
 // first block that starts after input split end boundary.
 // Sync to the closest block/record boundary just after beginning of our input split.
 mAvroFileReader.sync(fileSplit.getStart());
 // Initialize the start position to the beginning of the first block of the input split.
 mStartPosition = mAvroFileReader.previousSync();
 // Initialize the end position to the end of the input split (this isn't necessarily
 // on a block boundary so using this for reporting progress will be approximate.
 mEndPosition = fileSplit.getStart() + fileSplit.getLength();
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  final FileSplit split = (FileSplit) genericSplit;
  final Configuration configuration = context.getConfiguration();
  if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
    this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
  this.gryoReader = GryoReader.build().mapper(
      GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create();
  long start = split.getStart();
  final Path file = split.getPath();
  if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
    throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
  }
  // open the file and seek to the start of the split
  this.inputStream = file.getFileSystem(configuration).open(split.getPath());
  this.splitLength = split.getLength();
  if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

public void initialize(InputSplit rawSplit, TaskAttemptContext context)
    throws IOException, InterruptedException {
   FileSplit split = (FileSplit) rawSplit;
  Configuration job = context.getConfiguration();
  
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  
  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  is = new ZipInputStream(fs.open(file));
}

public void initialize(InputSplit split, TaskAttemptContext context) 
  throws IOException, InterruptedException {
 Path p = ((FileSplit)split).getPath();
 FileSystem fs = p.getFileSystem(context.getConfiguration());
 in = fs.open(p);
 long start = ((FileSplit)split).getStart();
 // find the offset to start at a record boundary
 offset = (RECORD_LENGTH - (start % RECORD_LENGTH)) % RECORD_LENGTH;
 in.seek(start + offset);
 length = ((FileSplit)split).getLength();
}

public XmlRecordReader(FileSplit split, Configuration conf)
  throws IOException {
 startTag = conf.get(START_TAG_KEY).getBytes("UTF-8");
 endTag = conf.get(END_TAG_KEY).getBytes("UTF-8");
 // open the file and seek to the start of the split
 start = split.getStart();
 end = start + split.getLength();
 Path file = split.getPath();
 FileSystem fs = file.getFileSystem(conf);
 fsin = fs.open(split.getPath());
 fsin.seek(start);
}

private FileSplit find(List<FileSplit> splits, long start) {
  for (FileSplit split : splits) {
    if (split.getStart() == start) {
      return split;
    }
  }
  throw new AssertionError(start);
}

@Override
public float getProgress() throws IOException, InterruptedException {
  if (finished) {
    return 1f;
  }
  if (inRaw != null) {
    return (float) (inRaw.getPos() - fileSplit.getStart())
     / fileSplit.getLength();
  }
  return 0f;
}

private void updateJobWithSplit(
  final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) {
 if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) {
  org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = 
    (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit;
  job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString());
  job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart());
  job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength());
 }
 LOG.info("Processing mapreduce split: " + inputSplit);
}

/**
 * Builds a {@code ParquetInputSplit} from a mapreduce {@link FileSplit}.
 *
 * @param split a mapreduce FileSplit
 * @return a ParquetInputSplit
 * @throws IOException
 */
static ParquetInputSplit from(FileSplit split) throws IOException {
 return new ParquetInputSplit(split.getPath(),
   split.getStart(), split.getStart() + split.getLength(),
   split.getLength(), split.getLocations(), null);
}

public static CarbonInputSplit from(String segmentId, String blockletId, FileSplit split,
  ColumnarFormatVersion version, String dataMapWritePath) throws IOException {
 return new CarbonInputSplit(segmentId, blockletId, split.getPath(), split.getStart(),
   split.getLength(), split.getLocations(), version, null, dataMapWritePath);
}

Javadoc

The position of the first byte in the file to process.

Popular methods of FileSplit

getPath
The file containing this split's data.
getLength
The number of bytes in the file to process.
<init>
Constructs a split with host and cached-blocks information
getLocations
readFields
write
toString
getLocationInfo

Popular in Java

Finding current android device location
addToBackStack (FragmentTransaction)
scheduleAtFixedRate (Timer)
onCreateOptionsMenu (Activity)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
ImageIO (javax.imageio)
CodeWhisperer alternatives

How to use getStartmethodin org.apache.hadoop.mapreduce.lib.input.FileSplit

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileSplit.getStart (Showing top 20 results out of 801)

How to use
getStart
method
in
org.apache.hadoop.mapreduce.lib.input.FileSplit