@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
/** * @param clsName Input split class name. * @param in Input stream. * @param hosts Optional hosts. * @return File block or {@code null} if it is not a {@link FileSplit} instance. * @throws IgniteCheckedException If failed. */ public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts) throws IgniteCheckedException { if (!FileSplit.class.getName().equals(clsName)) return null; FileSplit split = new FileSplit(); try { split.readFields(in); } catch (IOException e) { throw new IgniteCheckedException(e); } if (hosts == null) hosts = EMPTY_HOSTS; return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength()); } }
FileSplit s = (FileSplit)nativeSplit; res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
/** {@inheritDoc} */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { if (!(inputSplit instanceof FileSplit)) { throw new IllegalArgumentException("Only compatible with FileSplits."); } FileSplit fileSplit = (FileSplit) inputSplit; // Open a seekable input stream to the Avro container file. SeekableInput seekableFileInput = createSeekableInput(context.getConfiguration(), fileSplit.getPath()); // Wrap the seekable input stream in an Avro DataFileReader. Configuration conf = context.getConfiguration(); GenericData dataModel = AvroSerialization.createDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(mReaderSchema); mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader); // Initialize the start and end offsets into the file based on the boundaries of the // input split we're responsible for. We will read the first block that begins // after the input split start boundary. We will read up to but not including the // first block that starts after input split end boundary. // Sync to the closest block/record boundary just after beginning of our input split. mAvroFileReader.sync(fileSplit.getStart()); // Initialize the start position to the beginning of the first block of the input split. mStartPosition = mAvroFileReader.previousSync(); // Initialize the end position to the end of the input split (this isn't necessarily // on a block boundary so using this for reporting progress will be approximate. mEndPosition = fileSplit.getStart() + fileSplit.getLength(); }
@Override public RecordReader<NullWritable, OrcStruct> createRecordReader( InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Path path = fileSplit.getPath(); Configuration conf = ShimLoader.getHadoopShims() .getConfiguration(context); return new OrcRecordReader(OrcFile.createReader(path, OrcFile.readerOptions(conf)), ShimLoader.getHadoopShims().getConfiguration(context), fileSplit.getStart(), fileSplit.getLength()); }
@Override public RecordReader<NullWritable, OrcStruct> createRecordReader( InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Path path = fileSplit.getPath(); Configuration conf = ShimLoader.getHadoopShims() .getConfiguration(context); return new OrcRecordReader(OrcFile.createReader(path, OrcFile.readerOptions(conf)), ShimLoader.getHadoopShims().getConfiguration(context), fileSplit.getStart(), fileSplit.getLength()); }
expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();
expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();
this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath();
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { final FileSplit split = (FileSplit) genericSplit; final Configuration configuration = context.getConfiguration(); if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER); this.gryoReader = GryoReader.build().mapper( GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create(); long start = split.getStart(); final Path file = split.getPath(); if (null != new CompressionCodecFactory(configuration).getCodec(file)) { throw new IllegalStateException("Compression is not supported for the (binary) Gryo format"); } // open the file and seek to the start of the split this.inputStream = file.getFileSystem(configuration).open(split.getPath()); this.splitLength = split.getLength(); if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
public void initialize(InputSplit rawSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) rawSplit; Configuration job = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); is = new ZipInputStream(fs.open(file)); }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path p = ((FileSplit)split).getPath(); FileSystem fs = p.getFileSystem(context.getConfiguration()); in = fs.open(p); long start = ((FileSplit)split).getStart(); // find the offset to start at a record boundary offset = (RECORD_LENGTH - (start % RECORD_LENGTH)) % RECORD_LENGTH; in.seek(start + offset); length = ((FileSplit)split).getLength(); }
public XmlRecordReader(FileSplit split, Configuration conf) throws IOException { startTag = conf.get(START_TAG_KEY).getBytes("UTF-8"); endTag = conf.get(END_TAG_KEY).getBytes("UTF-8"); // open the file and seek to the start of the split start = split.getStart(); end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); fsin = fs.open(split.getPath()); fsin.seek(start); }
private FileSplit find(List<FileSplit> splits, long start) { for (FileSplit split : splits) { if (split.getStart() == start) { return split; } } throw new AssertionError(start); }
@Override public float getProgress() throws IOException, InterruptedException { if (finished) { return 1f; } if (inRaw != null) { return (float) (inRaw.getPos() - fileSplit.getStart()) / fileSplit.getLength(); } return 0f; }
private void updateJobWithSplit( final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) { if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) { org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit; job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString()); job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart()); job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength()); } LOG.info("Processing mapreduce split: " + inputSplit); }
/** * Builds a {@code ParquetInputSplit} from a mapreduce {@link FileSplit}. * * @param split a mapreduce FileSplit * @return a ParquetInputSplit * @throws IOException */ static ParquetInputSplit from(FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
public static CarbonInputSplit from(String segmentId, String blockletId, FileSplit split, ColumnarFormatVersion version, String dataMapWritePath) throws IOException { return new CarbonInputSplit(segmentId, blockletId, split.getPath(), split.getStart(), split.getLength(), split.getLocations(), version, null, dataMapWritePath); }