org.apache.flink.core.fs.FileInputSplit.getPath java code examples

@Override
public void close() throws IOException {
  if (this.invalidLineCount > 0) {
    if (LOG.isWarnEnabled()) {
      LOG.warn("In file \"" + currentSplit.getPath() + "\" (split start: " + this.splitStart + ") " + this.invalidLineCount +" invalid line(s) were skipped.");
    }
  }
  if (this.commentCount > 0) {
    if (LOG.isInfoEnabled()) {
      LOG.info("In file \"" + currentSplit.getPath() + "\" (split start: " + this.splitStart + ") " + this.commentCount +" comment line(s) were skipped.");
    }
  }
  super.close();
}

@Override
public void run() {
  try {
    final FileSystem fs = FileSystem.get(this.split.getPath().toUri());
    this.fdis = fs.open(this.split.getPath());
    
    // check for canceling and close the stream in that case, because no one will obtain it
    if (this.aborted) {
      final FSDataInputStream f = this.fdis;
      this.fdis = null;
      f.close();
    }
  }
  catch (Throwable t) {
    this.error = t;
  }
}

/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
      Map<Path, FileStatus> eligibleFiles) throws IOException {
  Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
  if (eligibleFiles.isEmpty()) {
    return splitsByModTime;
  }
  for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
    FileStatus fileStatus = eligibleFiles.get(split.getPath());
    if (fileStatus != null) {
      Long modTime = fileStatus.getModificationTime();
      List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
      if (splitsToForward == null) {
        splitsToForward = new ArrayList<>();
        splitsByModTime.put(modTime, splitsToForward);
      }
      splitsToForward.add(new TimestampedFileInputSplit(
        modTime, split.getSplitNumber(), split.getPath(),
        split.getStart(), split.getLength(), split.getHostnames()));
    }
  }
  return splitsByModTime;
}

private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
  DatumReader<E> datumReader;
  if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
    datumReader = new GenericDatumReader<E>();
  } else {
    datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
      ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
  }
  if (LOG.isInfoEnabled()) {
    LOG.info("Opening split {}", split);
  }
  SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
  DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
  }
  end = split.getStart() + split.getLength();
  recordsReadSinceLastSync = 0;
  return dataFileReader;
}

LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]");
throw new IOException("Error opening the Input Split " + fileSplit.getPath() + 
    " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t);

/**
 * This method allows to wrap/decorate the raw {@link FSDataInputStream} for a certain file split, e.g., for decoding.
 * When overriding this method, also consider adapting {@link FileInputFormat#testForUnsplittable} if your
 * stream decoration renders the input file unsplittable. Also consider calling existing superclass implementations.
 *
 * @param inputStream is the input stream to decorated
 * @param fileSplit   is the file split for which the input stream shall be decorated
 * @return the decorated input stream
 * @throws Throwable if the decoration fails
 * @see org.apache.flink.api.common.io.InputStreamFSInputWrapper
 */
protected FSDataInputStream decorateInputStream(FSDataInputStream inputStream, FileInputSplit fileSplit) throws Throwable {
  // Wrap stream in a extracting (decompressing) stream if file ends with a known compression file extension.
  InflaterInputStreamFactory<?> inflaterInputStreamFactory = getInflaterInputStreamFactory(fileSplit.getPath());
  if (inflaterInputStreamFactory != null) {
    return new InputStreamFSInputWrapper(inflaterInputStreamFactory.create(stream));
  }
  return inputStream;
}

      + "ParserError " + parser.getErrorState() + " \n"
      + "Expect field types: "+fieldTypesToString() + " \n"
      + "in file: " + currentSplit.getPath());
  throw new ParseException("Line could not be parsed: '" + lineAsString+"'\n"
      + "Expect field types: "+fieldTypesToString()+" \n"
      + "in file: " + currentSplit.getPath());
} else {
  return false;

final URI uri1 = splits[0].getPath().toUri();
final URI uri2 = splits[1].getPath().toUri();

/**
 * Checks if the expected input splits were created.
 */
@Test
public void checkInputSplits() throws IOException {
  FileInputSplit[] inputSplits = this.createInputFormat().createInputSplits(0);
  Arrays.sort(inputSplits, new InputSplitSorter());
  int splitIndex = 0;
  for (int fileIndex = 0; fileIndex < this.parallelism; fileIndex++) {
    List<FileInputSplit> sameFileSplits = new ArrayList<FileInputSplit>();
    Path lastPath = inputSplits[splitIndex].getPath();
    for (; splitIndex < inputSplits.length; splitIndex++) {
      if (!inputSplits[splitIndex].getPath().equals(lastPath)) {
        break;
      }
      sameFileSplits.add(inputSplits[splitIndex]);
    }
    Assert.assertEquals(this.getExpectedBlockCount(fileIndex), sameFileSplits.size());
    long lastBlockLength =
      this.rawDataSizes[fileIndex] % (this.blockSize - getInfoSize()) + getInfoSize();
    for (int index = 0; index < sameFileSplits.size(); index++) {
      Assert.assertEquals(this.blockSize * index, sameFileSplits.get(index).getStart());
      if (index < sameFileSplits.size() - 1) {
        Assert.assertEquals(this.blockSize, sameFileSplits.get(index).getLength());
      }
    }
    Assert.assertEquals(lastBlockLength, sameFileSplits.get(sameFileSplits.size() - 1).getLength());
  }
}

paths.add(split.getPath().toString());

@Test
public void testExcludeFiles() {
  try {
    final String contents = "CONTENTS";
    // create some accepted, some ignored files
    File child1 = temporaryFolder.newFile("dataFile1.txt");
    File child2 = temporaryFolder.newFile("another_file.bin");
    File[] files = { child1, child2 };
    createTempFiles(contents.getBytes(ConfigConstants.DEFAULT_CHARSET), files);
    // test that only the valid files are accepted
    Configuration configuration = new Configuration();
    final DummyFileInputFormat format = new DummyFileInputFormat();
    format.setFilePath(temporaryFolder.getRoot().toURI().toString());
    format.configure(configuration);
    format.setFilesFilter(new GlobFilePathFilter(
      Collections.singletonList("**"),
      Collections.singletonList("**/another_file.bin")));
    FileInputSplit[] splits = format.createInputSplits(1);
    Assert.assertEquals(1, splits.length);
    final URI uri1 = splits[0].getPath().toUri();
    final URI childUri1 = child1.toURI();
    Assert.assertEquals(uri1, childUri1);
  }
  catch (Exception e) {
    System.err.println(e.getMessage());
    e.printStackTrace();
    Assert.fail(e.getMessage());
  }
}

@Override
public void open(FileInputSplit fileSplit) throws IOException {
  LOG.debug("Opening ORC file {}", fileSplit.getPath());
  org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(fileSplit.getPath().getPath());
  Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

for (int i = 0; i < inputSplits.length; i++) {
  Assert.assertEquals(String.format("%d. split has block size length.", i), blockSize, inputSplits[i].getLength());
  if (inputSplits[i].getPath().toString().equals(pathFile1)) {
    numSplitsFile1++;
  } else if (inputSplits[i].getPath().toString().equals(pathFile2)) {
    numSplitsFile2++;
  } else {

@Test
public void testCreateInputSplitSingleFile() throws IOException {
  String tempFile = TestFileUtils.createTempFile("Hello World");
  FileInputFormat fif = new DummyFileInputFormat();
  fif.setFilePath(tempFile);
  
  fif.configure(new Configuration());
  FileInputSplit[] splits = fif.createInputSplits(2);
  
  Assert.assertEquals(2, splits.length);
  Assert.assertEquals(tempFile, splits[0].getPath().toString());
  Assert.assertEquals(tempFile, splits[1].getPath().toString());
}

private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) {
  Preconditions.checkNotNull(split);
  return new TimestampedFileInputSplit(
    modTime,
    split.getSplitNumber(),
    split.getPath(),
    split.getStart(),
    split.getLength(),
    split.getHostnames());
}

for (FileInputSplit fis : splits) {
  Assert.assertEquals(0, fis.getStart());
  if (fis.getPath().toString().equals(tempFile1)) {
    numSplitsFile1++;
    Assert.assertEquals(21, fis.getLength());
  } else if (fis.getPath().toString().equals(tempFile2)) {
    numSplitsFile2++;
    Assert.assertEquals(22, fis.getLength());
  } else if (fis.getPath().toString().equals(tempFile3)) {
    numSplitsFile3++;
    Assert.assertEquals(23, fis.getLength());

FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames());
FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());

final FileInputSplit split = createTempFile(myString);
FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames());
FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());

Assert.assertEquals(5, splitsMixed.length);
for(FileInputSplit split : splitsMixed) {
  if(split.getPath().getName().endsWith(".deflate")) {

/**
 * Tests that the records are read correctly when the split boundary is in the middle of a record.
 */
@Test
public void testReadOverSplitBoundariesUnaligned() throws IOException {
  final String myString = "value1\nvalue2\nvalue3";
  final FileInputSplit split = createTempFile(myString);
  FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames());
  FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames());
  final Configuration parameters = new Configuration();
  format.configure(parameters);
  format.open(split1);
  assertEquals("value1", format.nextRecord(null));
  assertEquals("value2", format.nextRecord(null));
  assertNull(format.nextRecord(null));
  assertTrue(format.reachedEnd());
  format.close();
  format.open(split2);
  assertEquals("value3", format.nextRecord(null));
  assertNull(format.nextRecord(null));
  assertTrue(format.reachedEnd());
  format.close();
}

Javadoc

Returns the path of the file containing this split's data.

Popular methods of FileInputSplit

getLength
Returns the number of bytes in the file to process.
getStart
Returns the position of the first byte in the file to process.
<init>
Constructs a split with host information.
getHostnames
getSplitNumber
equals
hashCode

Popular in Java

Finding current android device location
putExtra (Intent)
runOnUiThread (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
Socket (java.net)
Provides a client-side TCP socket.
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Menu (java.awt)
JPanel (javax.swing)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
From CI to AI: The AI layer in your organization

How to use getPathmethodin org.apache.flink.core.fs.FileInputSplit

Best Java code snippets using org.apache.flink.core.fs.FileInputSplit.getPath (Showing top 20 results out of 315)

How to use
getPath
method
in
org.apache.flink.core.fs.FileInputSplit