org.apache.hadoop.mapreduce.lib.input.FileSplit java code examples

Refine search

 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
  InterruptedException {

  FileSplit fSplit = (FileSplit) split;
  Path path = fSplit.getPath();
  Configuration conf = context.getConfiguration();
  this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
  this.end = fSplit.getStart() + fSplit.getLength();

  if (fSplit.getStart() > in.getPosition()) {
   in.sync(fSplit.getStart());
  }

  this.start = in.getPosition();
  more = start < end;

  key = new LongWritable();
  value = new BytesRefArrayWritable();
 }
}

/**
 * Returns a split for each store files directory using the block location
 * of each file as locality reference.
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
 List<InputSplit> splits = new ArrayList<>();
 List<FileStatus> files = listStatus(job);
 Text key = new Text();
 for (FileStatus file: files) {
  Path path = file.getPath();
  FileSystem fs = path.getFileSystem(job.getConfiguration());
  LineReader reader = new LineReader(fs.open(path));
  long pos = 0;
  int n;
  try {
   while ((n = reader.readLine(key)) > 0) {
    String[] hosts = getStoreDirHosts(fs, path);
    splits.add(new FileSplit(path, pos, n, hosts));
    pos += n;
   }
  } finally {
   reader.close();
  }
 }
 return splits;
}

FileSplit s = (FileSplit)nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));

  /**
   * @param clsName Input split class name.
   * @param in Input stream.
   * @param hosts Optional hosts.
   * @return File block or {@code null} if it is not a {@link FileSplit} instance.
   * @throws IgniteCheckedException If failed.
   */
  public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts)
    throws IgniteCheckedException {
    if (!FileSplit.class.getName().equals(clsName))
      return null;

    FileSplit split = new FileSplit();

    try {
      split.readFields(in);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }

    if (hosts == null)
      hosts = EMPTY_HOSTS;

    return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength());
  }
}

public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
  throws IOException {
 // Set up context
 File testFile = new File(testFileUrl.getFile());
 long testFileSize = testFile.length();
 Path testFilePath = new Path(testFile.getAbsolutePath());
 Configuration conf = new Configuration();
 conf.setInt("io.file.buffer.size", 1);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 // Gather the records returned by the record reader
 ArrayList<String> records = new ArrayList<String>();
 long offset = 0;
 while (offset < testFileSize) {
  FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
  LineRecordReader reader = new LineRecordReader();
  reader.initialize(split, context);
  while (reader.nextKeyValue()) {
   records.add(reader.getCurrentValue().toString());
  }
  offset += splitSize;
 }
 return records;
}

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
  initConfig(context);
  allowEmptyMeta = conf.getBoolean(
    CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, false);
     setFile(((FileSplit) inSplit).getPath());
  fs = file.getFileSystem(context.getConfiguration());
  FileStatus status = fs.getFileStatus(file);
  if(status.isDirectory()) {
    iterator = new FileIterator((FileSplit)inSplit, context);
    inSplit = iterator.next();
  }
  initStream(inSplit);
}

Configuration conf = new Configuration();
expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();
expect(context.getConfiguration()).andReturn(conf).anyTimes();

private void qualityConfigTest() throws IOException
{
  writeToTempQseq(sangerQseq);
  split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null);
  QseqRecordReader reader = new QseqRecordReader(conf, split);
  assertTrue(reader.next(key, fragment));
  assertEquals("###########################################################################################", fragment.getQuality().toString());
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 try {
  Job job = Job.getInstance(new Configuration());
    FileInputFormat.addInputPath(job, new Path(inputPath));
   WorkUnit workUnit = WorkUnit.create(extract);
   workUnit.setProp(FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit));
   workUnit.setProp(FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
   workUnits.add(workUnit);

@Override
public void initialize(InputSplit is, TaskAttemptContext context) throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) is;
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);
 key = new LogFileKey();
 value = new LogFileValue();
 fsdis = fs.open(fileSplit.getPath());
 FileStatus status = fs.getFileStatus(fileSplit.getPath());
 length = status.getLen();
}

@Test
public void testUncompressedInputDefaultDelimiterPosValue()
  throws Exception {
 Configuration conf = new Configuration();
 String inputData = "1234567890\r\n12\r\n345";
 Path inputFile = createInputFile(conf, inputData);
 conf.setInt("io.file.buffer.size", 10);
 conf.setInt(org.apache.hadoop.mapreduce.lib.input.
   LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
 FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
 TaskAttemptContext context = new TaskAttemptContextImpl(conf,
   new TaskAttemptID());
 value = reader.getCurrentValue();
 assertEquals(2, value.getLength());
 split = new FileSplit(inputFile, 15, 4, (String[])null);
 reader = new LineRecordReader(null);
 reader.initialize(split, context);
 value = reader.getCurrentValue();
 assertEquals(3, value.getLength());
 split = new FileSplit(inputFile, 0, 12, (String[])null);
 reader = new LineRecordReader(null);
 reader.initialize(split, context);

  int numberOfLinesPerSplit) throws IOException {
List<FileSplit> splits = new ArrayList<FileSplit> ();
Path fileName = status.getPath();
if (status.isDir()) {
 throw new IOException("Not a file: " + fileName);
FileSystem  fs = fileName.getFileSystem(conf);
LineReader lr = null;
try {
 FSDataInputStream in  = fs.open(fileName);
 lr = new LineReader(in, conf);
 Text line = new Text();
 long begin = 0;
 long length = 0;
 int recordsRead = 0;
 while ((num = lr.readLine(line)) > 0) {
  if (line.toString().indexOf(">") >= 0){
    recordsRead++;
    splits.add(new FileSplit(fileName, begin, recordLength, new String[]{}));
    begin = length;
    recordLength = 0;
  recordLength += num;
 splits.add(new FileSplit(fileName, begin, recordLength, new String[]{}));

public void initialize(InputSplit genericSplit,
            TaskAttemptContext context) throws IOException {
 FileSplit split = (FileSplit) genericSplit;
 job = context.getConfiguration();
 this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                 Integer.MAX_VALUE);
 start = split.getStart();
 end = start + split.getLength();
 final Path file = split.getPath();
 compressionCodecs = new CompressionCodecFactory(job);
 final CompressionCodec codec = compressionCodecs.getCodec(file);
 FileSystem fs = file.getFileSystem(job);
 fileIn = fs.open(split.getPath());
 boolean skipFirstLine = false;
 if (codec != null) {
   skipFirstLine = true;
   --start;
   fileIn.seek(start);
  start += in.readLine(new Text(), 0,
             (int)Math.min((long)Integer.MAX_VALUE, end - start));

@Override
protected void doSetup(Context context) throws IOException {
  tmpBuf = ByteBuffer.allocate(4096);
  Configuration conf = context.getConfiguration();
  bindCurrentConfiguration(conf);
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME));
  List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
  FileSplit fileSplit = (FileSplit) context.getInputSplit();
  String colName = fileSplit.getPath().getParent().getName();
  for (int i = 0; i < uhcColumns.size(); i++) {
    if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) {
      index = i;
      break;
    }
  }
  type = uhcColumns.get(index).getType();
  //for debug
  logger.info("column name: " + colName);
  logger.info("index: " + index);
  logger.info("type: " + type);
}

public XmlRecordReader(FileSplit split, Configuration conf) throws IOException {
  end_tag = ENDING_TAG.getBytes(Charsets.UTF_8);
  start_tag = STARTING_TAG.getBytes(Charsets.UTF_8);
  // open the file and seek to the start of the split
  start = split.getStart();
  // set the end of the file
  end = start + split.getLength();
  Path file = split.getPath();
  FileSystem fs = file.getFileSystem(conf);
  FileStatus fStatus = fs.getFileStatus(file);
  blocks = fs.getFileBlockLocations(fStatus, 0, fStatus.getLen());
  // seek the start of file
  fsin = fs.open(split.getPath());
  fsin.seek(start);
}

public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException
{
  FileSplit fileSplit = (FileSplit) split;
  Path filePath = fileSplit.getPath();
  FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
  shpInputStream = fileSys.open(filePath);
  //assign inputstream to parser and parse file header to init;
  parser = new ShpFileParser(shpInputStream);
  parser.parseShapeFileHead();
}

public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(split.getPath());
  reader = new Reader(fileIn, fs.getFileStatus(file).getLen(), job);
  scanner = reader.createScannerByByteRange(start, split.getLength());
}

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  initializeEvent(context.getConfiguration());
  
  if (genericSplit instanceof FileSplit) {
    final Path p = ((FileSplit) genericSplit).getPath();
    final FileSystem sys = p.getFileSystem(context.getConfiguration());
    
    rawFileName = p.toString();
    rawFileTimeStamp = sys.getFileStatus(p).getModificationTime();
  }
}

@Override
public void initialize(InputSplit split, TaskAttemptContext task)
  throws IOException {
 this.fsplit = (FileSplit) split;
 this.ratio = task.getConfiguration().getFloat("ratio", 0.01f);
 this.conf = task.getConfiguration();
 this.seed = conf.getLong("seed", System.currentTimeMillis());
 this.lindex.setup(conf);
 FileSystem fs = fsplit.getPath().getFileSystem(conf);
 this.in = fs.open(fsplit.getPath());
 this.lindexEnd = fsplit.getStart() + fsplit.getLength();
 moveToNextLocalIndex();
}

@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException {
  List<InputSplit> res = new ArrayList<>(BLOCK_CNT);
  for (int i = 0; i < BLOCK_CNT; i++)
    try {
      res.add(new FileSplit(new Path(new URI("someFile")), i, i + 1, new String[] {"localhost"}));
    }
    catch (URISyntaxException e) {
      throw new IOException(e);
    }
  return res;
}

Javadoc

A section of an input file. Returned by InputFormat#getSplits(JobContext) and passed to InputFormat#createRecordReader(InputSplit,TaskAttemptContext).

Most used methods

getPath
The file containing this split's data.
getLength
The number of bytes in the file to process.
getStart
The position of the first byte in the file to process.
<init>
Constructs a split with host and cached-blocks information
getLocations
readFields
write
toString
getLocationInfo

Popular in Java

Making http post requests using okhttp
getSharedPreferences (Context)
requestLocationUpdates (LocationManager)
runOnUiThread (Activity)
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top 12 Jupyter Notebook extensions

How to useFileSplit in org.apache.hadoop.mapreduce.lib.input

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileSplit (Showing top 20 results out of 1,512)

Refine search

How to use
FileSplit
in
org.apache.hadoop.mapreduce.lib.input