@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fSplit = (FileSplit) split; Path path = fSplit.getPath(); Configuration conf = context.getConfiguration(); this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); this.end = fSplit.getStart() + fSplit.getLength(); if (fSplit.getStart() > in.getPosition()) { in.sync(fSplit.getStart()); } this.start = in.getPosition(); more = start < end; key = new LongWritable(); value = new BytesRefArrayWritable(); } }
/** * Returns a split for each store files directory using the block location * of each file as locality reference. */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<>(); List<FileStatus> files = listStatus(job); Text key = new Text(); for (FileStatus file: files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); LineReader reader = new LineReader(fs.open(path)); long pos = 0; int n; try { while ((n = reader.readLine(key)) > 0) { String[] hosts = getStoreDirHosts(fs, path); splits.add(new FileSplit(path, pos, n, hosts)); pos += n; } } finally { reader.close(); } } return splits; }
FileSplit s = (FileSplit)nativeSplit; res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));
/** * @param clsName Input split class name. * @param in Input stream. * @param hosts Optional hosts. * @return File block or {@code null} if it is not a {@link FileSplit} instance. * @throws IgniteCheckedException If failed. */ public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts) throws IgniteCheckedException { if (!FileSplit.class.getName().equals(clsName)) return null; FileSplit split = new FileSplit(); try { split.readFields(in); } catch (IOException e) { throw new IgniteCheckedException(e); } if (hosts == null) hosts = EMPTY_HOSTS; return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength()); } }
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException { // Set up context File testFile = new File(testFileUrl.getFile()); long testFileSize = testFile.length(); Path testFilePath = new Path(testFile.getAbsolutePath()); Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 1); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // Gather the records returned by the record reader ArrayList<String> records = new ArrayList<String>(); long offset = 0; while (offset < testFileSize) { FileSplit split = new FileSplit(testFilePath, offset, splitSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context); while (reader.nextKeyValue()) { records.add(reader.getCurrentValue().toString()); } offset += splitSize; } return records; }
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { initConfig(context); allowEmptyMeta = conf.getBoolean( CONF_INPUT_ARCHIVE_METADATA_OPTIONAL, false); setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); if(status.isDirectory()) { iterator = new FileIterator((FileSplit)inSplit, context); inSplit = iterator.next(); } initStream(inSplit); }
Configuration conf = new Configuration(); expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); expect(context.getConfiguration()).andReturn(conf).anyTimes();
private void qualityConfigTest() throws IOException { writeToTempQseq(sangerQseq); split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null); QseqRecordReader reader = new QseqRecordReader(conf, split); assertTrue(reader.next(key, fragment)); assertEquals("###########################################################################################", fragment.getQuality().toString()); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { try { Job job = Job.getInstance(new Configuration()); FileInputFormat.addInputPath(job, new Path(inputPath)); WorkUnit workUnit = WorkUnit.create(extract); workUnit.setProp(FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit)); workUnit.setProp(FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString()); workUnits.add(workUnit);
@Override public void initialize(InputSplit is, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) is; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); key = new LogFileKey(); value = new LogFileValue(); fsdis = fs.open(fileSplit.getPath()); FileStatus status = fs.getFileStatus(fileSplit.getPath()); length = status.getLen(); }
@Test public void testUncompressedInputDefaultDelimiterPosValue() throws Exception { Configuration conf = new Configuration(); String inputData = "1234567890\r\n12\r\n345"; Path inputFile = createInputFile(conf, inputData); conf.setInt("io.file.buffer.size", 10); conf.setInt(org.apache.hadoop.mapreduce.lib.input. LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); value = reader.getCurrentValue(); assertEquals(2, value.getLength()); split = new FileSplit(inputFile, 15, 4, (String[])null); reader = new LineRecordReader(null); reader.initialize(split, context); value = reader.getCurrentValue(); assertEquals(3, value.getLength()); split = new FileSplit(inputFile, 0, 12, (String[])null); reader = new LineRecordReader(null); reader.initialize(split, context);
int numberOfLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit> (); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); FileSystem fs = fileName.getFileSystem(conf); LineReader lr = null; try { FSDataInputStream in = fs.open(fileName); lr = new LineReader(in, conf); Text line = new Text(); long begin = 0; long length = 0; int recordsRead = 0; while ((num = lr.readLine(line)) > 0) { if (line.toString().indexOf(">") >= 0){ recordsRead++; splits.add(new FileSplit(fileName, begin, recordLength, new String[]{})); begin = length; recordLength = 0; recordLength += num; splits.add(new FileSplit(fileName, begin, recordLength, new String[]{}));
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { skipFirstLine = true; --start; fileIn.seek(start); start += in.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - start));
@Override protected void doSetup(Context context) throws IOException { tmpBuf = ByteBuffer.allocate(4096); Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME)); List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns(); FileSplit fileSplit = (FileSplit) context.getInputSplit(); String colName = fileSplit.getPath().getParent().getName(); for (int i = 0; i < uhcColumns.size(); i++) { if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) { index = i; break; } } type = uhcColumns.get(index).getType(); //for debug logger.info("column name: " + colName); logger.info("index: " + index); logger.info("type: " + type); }
public XmlRecordReader(FileSplit split, Configuration conf) throws IOException { end_tag = ENDING_TAG.getBytes(Charsets.UTF_8); start_tag = STARTING_TAG.getBytes(Charsets.UTF_8); // open the file and seek to the start of the split start = split.getStart(); // set the end of the file end = start + split.getLength(); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); FileStatus fStatus = fs.getFileStatus(file); blocks = fs.getFileBlockLocations(fStatus, 0, fStatus.getLen()); // seek the start of file fsin = fs.open(split.getPath()); fsin.seek(start); }
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Path filePath = fileSplit.getPath(); FileSystem fileSys = filePath.getFileSystem(context.getConfiguration()); shpInputStream = fileSys.open(filePath); //assign inputstream to parser and parse file header to init; parser = new ShpFileParser(shpInputStream); parser.parseShapeFileHead(); }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); reader = new Reader(fileIn, fs.getFileStatus(file).getLen(), job); scanner = reader.createScannerByByteRange(start, split.getLength()); }
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { initializeEvent(context.getConfiguration()); if (genericSplit instanceof FileSplit) { final Path p = ((FileSplit) genericSplit).getPath(); final FileSystem sys = p.getFileSystem(context.getConfiguration()); rawFileName = p.toString(); rawFileTimeStamp = sys.getFileStatus(p).getModificationTime(); } }
@Override public void initialize(InputSplit split, TaskAttemptContext task) throws IOException { this.fsplit = (FileSplit) split; this.ratio = task.getConfiguration().getFloat("ratio", 0.01f); this.conf = task.getConfiguration(); this.seed = conf.getLong("seed", System.currentTimeMillis()); this.lindex.setup(conf); FileSystem fs = fsplit.getPath().getFileSystem(conf); this.in = fs.open(fsplit.getPath()); this.lindexEnd = fsplit.getStart() + fsplit.getLength(); moveToNextLocalIndex(); }
@Override public List<InputSplit> getSplits(JobContext ctx) throws IOException, InterruptedException { List<InputSplit> res = new ArrayList<>(BLOCK_CNT); for (int i = 0; i < BLOCK_CNT; i++) try { res.add(new FileSplit(new Path(new URI("someFile")), i, i + 1, new String[] {"localhost"})); } catch (URISyntaxException e) { throw new IOException(e); } return res; }