private FileInputSplit createTempFile(String content) throws IOException { this.tempFile = File.createTempFile("test_contents", "tmp"); this.tempFile.deleteOnExit(); DataOutputStream dos = new DataOutputStream(new FileOutputStream(tempFile)); dos.writeBytes(content); dos.close(); return new FileInputSplit(0, new Path(this.tempFile.toURI().toString()), 0, this.tempFile.length(), new String[] {"localhost"}); }
this.splitStart = fileSplit.getStart(); this.splitLength = fileSplit.getLength(); LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]"); this.stream = isot.waitForCompletion(); if(fileSplit.getPath().getName().endsWith(DEFLATE_SUFFIX)) { this.stream = new InflaterInputStreamFSInputWrapper(stream); throw new IOException("Error opening the Input Split " + fileSplit.getPath() + " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);
final String[] hostNames = split.getHostNames(); if (hostNames == null) { instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE));
@Override public void open(FileInputSplit split) throws IOException { super.open(split); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength()); LOG.info("Opening split " + split); dataFileReader = DataFileReader.openReader(in, datumReader); dataFileReader.sync(split.getStart()); }
@Override public void run() { try { final FileSystem fs = FileSystem.get(this.split.getPath().toUri()); this.fdis = fs.open(this.split.getPath()); // check for canceling and close the stream in that case, because no one will obtain it if (this.aborted) { final FSDataInputStream f = this.fdis; this.fdis = null; f.close(); } } catch (Throwable t) { this.error = t; } }
@Test public void testCreateInputSplitsWithOneFile() throws IOException { // create temporary file with 3 blocks final File tempFile = File.createTempFile("binary_input_format_test", "tmp"); tempFile.deleteOnExit(); final int blockInfoSize = new BlockInfo().getInfoSize(); final int blockSize = blockInfoSize + 8; final int numBlocks = 3; FileOutputStream fileOutputStream = new FileOutputStream(tempFile); for(int i = 0; i < blockSize * numBlocks; i++) { fileOutputStream.write(new byte[]{1}); } fileOutputStream.close(); final Configuration config = new Configuration(); config.setLong(BinaryInputFormat.BLOCK_SIZE_PARAMETER_KEY, blockSize); final BinaryInputFormat<Record> inputFormat = new MyBinaryInputFormat(); inputFormat.setFilePath(tempFile.toURI().toString()); inputFormat.configure(config); FileInputSplit[] inputSplits = inputFormat.createInputSplits(numBlocks); Assert.assertEquals("Returns requested numbers of splits.", numBlocks, inputSplits.length); Assert.assertEquals("1. split has block size length.", blockSize, inputSplits[0].getLength()); Assert.assertEquals("2. split has block size length.", blockSize, inputSplits[1].getLength()); Assert.assertEquals("3. split has block size length.", blockSize, inputSplits[2].getLength()); }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); DatumReader<E> datumReader; if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) { datumReader = new SpecificDatumReader<E>(avroValueType); } else { datumReader = new ReflectDatumReader<E>(avroValueType); } LOG.info("Opening split " + split); SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength()); dataFileReader = DataFileReader.openReader(in, datumReader); dataFileReader.sync(split.getStart()); }
@Override public void run() { try { final FileSystem fs = FileSystem.get(this.split.getPath().toUri()); this.fdis = fs.open(this.split.getPath()); // check for canceling and close the stream in that case, because no one will obtain it if (this.aborted) { final FSDataInputStream f = this.fdis; this.fdis = null; f.close(); } } catch (Throwable t) { this.error = t; } }
this.splitStart = fileSplit.getStart(); this.splitLength = fileSplit.getLength(); LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]"); this.stream = isot.waitForCompletion(); if(fileSplit.getPath().getName().endsWith(DEFLATE_SUFFIX)) { this.stream = new InflaterInputStreamFSInputWrapper(stream); throw new IOException("Error opening the Input Split " + fileSplit.getPath() + " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public void open(FileInputSplit split) throws IOException { super.open(split); this.wrapper = InstantiationUtil.instantiate(avroWrapperTypeClass, AvroBaseValue.class); DatumReader<E> datumReader; if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) { datumReader = new SpecificDatumReader<E>(avroValueType); } else { datumReader = new ReflectDatumReader<E>(avroValueType); } LOG.info("Opening split " + split); SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength()); dataFileReader = DataFileReader.openReader(in, datumReader); dataFileReader.sync(split.getStart()); reuseAvroValue = null; }
final String[] hostNames = split.getHostNames(); if (hostNames == null) { instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE));
/** * Checks if the expected input splits were created */ @Test public void checkInputSplits() throws IOException { FileInputSplit[] inputSplits = this.createInputFormat().createInputSplits(0); Arrays.sort(inputSplits, new InputSplitSorter()); int splitIndex = 0; for (int fileIndex = 0; fileIndex < this.degreeOfParallelism; fileIndex++) { List<FileInputSplit> sameFileSplits = new ArrayList<FileInputSplit>(); Path lastPath = inputSplits[splitIndex].getPath(); for (; splitIndex < inputSplits.length; splitIndex++) { if (!inputSplits[splitIndex].getPath().equals(lastPath)) { break; } sameFileSplits.add(inputSplits[splitIndex]); } Assert.assertEquals(this.getExpectedBlockCount(fileIndex), sameFileSplits.size()); long lastBlockLength = this.rawDataSizes[fileIndex] % (this.blockSize - this.info.getInfoSize()) + this.info.getInfoSize(); for (int index = 0; index < sameFileSplits.size(); index++) { Assert.assertEquals(this.blockSize * index, sameFileSplits.get(index).getStart()); if (index < sameFileSplits.size() - 1) { Assert.assertEquals(this.blockSize, sameFileSplits.get(index).getLength()); } } Assert.assertEquals(lastBlockLength, sameFileSplits.get(sameFileSplits.size() - 1).getLength()); } }
private FileInputSplit createTempFile(String contents) throws IOException { this.tempFile = File.createTempFile("test_contents", "tmp"); this.tempFile.deleteOnExit(); OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(this.tempFile)); wrt.write(contents); wrt.close(); return new FileInputSplit(0, new Path(this.tempFile.toURI().toString()), 0, this.tempFile.length(), new String[] {"localhost"}); }
Assert.assertEquals(4, splits.length); for(FileInputSplit split : splits) { Assert.assertEquals(-1L, split.getLength()); // unsplittable deflate files have this size as a flag for "read whole file" Assert.assertEquals(0L, split.getStart()); // always read from the beginning. Assert.assertEquals(5, splitsMixed.length); for(FileInputSplit split : splitsMixed) { if(split.getPath().getName().endsWith(".deflate")) { Assert.assertEquals(-1L, split.getLength()); // unsplittable deflate files have this size as a flag for "read whole file" Assert.assertEquals(0L, split.getStart()); // always read from the beginning. } else { Assert.assertEquals(0L, split.getStart()); Assert.assertTrue("split size not correct", split.getLength() > 0);
@Override public FileInputSplit[] createInputSplits(final int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (final FileStatus file : files) { final long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { final long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); final FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
len = READ_WHOLE_SPLIT_FLAG; FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len, hosts.toArray(new String[hosts.size()])); inputSplits.add(fis); FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts()); inputSplits.add(fis); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts()); inputSplits.add(fis); hosts = new String[0]; final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts); inputSplits.add(fis);