org.apache.hadoop.mapreduce.lib.input.FileSplit.getPath java code examples

@Override
public CubeSegment findSourceSegment(FileSplit fileSplit, CubeInstance cube) {
  String filePath = fileSplit.getPath().toString();
  String jobID = JobBuilderSupport.extractJobIDFromPath(filePath);
  return CubeInstance.findSegmentWithJobId(jobID, cube);
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) split;
 conf = context.getConfiguration();
 Path path = fileSplit.getPath();
 FileSystem fs = path.getFileSystem(conf);
 LOG.info("Initialize HFileRecordReader for {}", path);
 this.in = HFile.createReader(fs, path, conf);
 // The file info must be loaded before the scanner can be used.
 // This seems like a bug in HBase, but it's easily worked around.
 this.in.loadFileInfo();
 this.scanner = in.getScanner(false, false);
}

@Override
protected void doSetup(Context context) throws IOException {
  tmpBuf = ByteBuffer.allocate(4096);
  Configuration conf = context.getConfiguration();
  bindCurrentConfiguration(conf);
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME));
  List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
  FileSplit fileSplit = (FileSplit) context.getInputSplit();
  String colName = fileSplit.getPath().getParent().getName();
  for (int i = 0; i < uhcColumns.size(); i++) {
    if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) {
      index = i;
      break;
    }
  }
  type = uhcColumns.get(index).getType();
  //for debug
  logger.info("column name: " + colName);
  logger.info("index: " + index);
  logger.info("type: " + type);
}

  /**
   * @param clsName Input split class name.
   * @param in Input stream.
   * @param hosts Optional hosts.
   * @return File block or {@code null} if it is not a {@link FileSplit} instance.
   * @throws IgniteCheckedException If failed.
   */
  public static HadoopFileBlock readFileBlock(String clsName, DataInput in, @Nullable String[] hosts)
    throws IgniteCheckedException {
    if (!FileSplit.class.getName().equals(clsName))
      return null;

    FileSplit split = new FileSplit();

    try {
      split.readFields(in);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }

    if (hosts == null)
      hosts = EMPTY_HOSTS;

    return new HadoopFileBlock(hosts, split.getPath().toUri(), split.getStart(), split.getLength());
  }
}

FileSplit s = (FileSplit)nativeSplit;
res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength()));

 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
  InterruptedException {

  FileSplit fSplit = (FileSplit) split;
  Path path = fSplit.getPath();
  Configuration conf = context.getConfiguration();
  this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
  this.end = fSplit.getStart() + fSplit.getLength();

  if (fSplit.getStart() > in.getPosition()) {
   in.sync(fSplit.getStart());
  }

  this.start = in.getPosition();
  more = start < end;

  key = new LongWritable();
  value = new BytesRefArrayWritable();
 }
}

WorkUnit workUnit = WorkUnit.create(extract);
workUnit.setProp(FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit));
workUnit.setProp(FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
workUnits.add(workUnit);

/** {@inheritDoc} */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 final FileSplit file = (FileSplit)inputSplit;
 context.setStatus(file.toString());
 final AvroColumnReader.Params params =
  new AvroColumnReader.Params(new HadoopInput(file.getPath(), context.getConfiguration()));
 params.setModel(ReflectData.get());
 if (AvroJob.getInputKeySchema(context.getConfiguration()) != null) {
  params.setSchema(AvroJob.getInputKeySchema(context.getConfiguration()));
 }
 reader = new AvroColumnReader<>(params);
 rows = reader.getRowCount();
}

/** {@inheritDoc} */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 if (!(inputSplit instanceof FileSplit)) {
  throw new IllegalArgumentException("Only compatible with FileSplits.");
 }
 FileSplit fileSplit = (FileSplit) inputSplit;
 // Open a seekable input stream to the Avro container file.
 SeekableInput seekableFileInput
   = createSeekableInput(context.getConfiguration(), fileSplit.getPath());
 // Wrap the seekable input stream in an Avro DataFileReader.
 Configuration conf = context.getConfiguration();
 GenericData dataModel = AvroSerialization.createDataModel(conf);
 DatumReader<T> datumReader = dataModel.createDatumReader(mReaderSchema);
 mAvroFileReader = createAvroFileReader(seekableFileInput, datumReader);
 // Initialize the start and end offsets into the file based on the boundaries of the
 // input split we're responsible for.  We will read the first block that begins
 // after the input split start boundary.  We will read up to but not including the
 // first block that starts after input split end boundary.
 // Sync to the closest block/record boundary just after beginning of our input split.
 mAvroFileReader.sync(fileSplit.getStart());
 // Initialize the start position to the beginning of the first block of the input split.
 mStartPosition = mAvroFileReader.previousSync();
 // Initialize the end position to the end of the input split (this isn't necessarily
 // on a block boundary so using this for reporting progress will be approximate.
 mEndPosition = fileSplit.getStart() + fileSplit.getLength();
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

 public class LineMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
  protected long lines = 0;

  @Override
  protected void cleanup(Context context) throws IOException,
      InterruptedException {
    FileSplit split = (FileSplit) context.getInputSplit();
    String filename = split.getPath().toString();

    context.write(new Text(filename), new LongWritable(lines));
  }

  @Override
  protected void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    lines++;
  }
}

expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

 @Override
 public String apply(@Nullable InputSplit input) {
  return ((FileSplit) input).getPath().toString();
 }
});

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
 if (!processed) {
  byte[] contents = new byte[(int) fileSplit.getLength()];
  Path file = fileSplit.getPath();
  FileSystem fs = file.getFileSystem(conf);
  FSDataInputStream in = null;
  try {
   in = fs.open(file);
   IOUtils.readFully(in, contents, 0, contents.length);
   value.set(contents, 0, contents.length);
  } finally {
   IOUtils.closeStream(in);
  }
  processed = true;
  return true;
 }
 return false;
}

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.publishConfiguration(context.getConfiguration());
  cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
  segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
  config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
  cubeManager = CubeManager.getInstance(config);
  cube = cubeManager.getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  mergedCubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
  // int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
  newKeyBuf = new byte[256];// size will auto-grow
  // decide which source segment
  InputSplit inputSplit = context.getInputSplit();
  String filePath = ((FileSplit) inputSplit).getPath().toString();
  System.out.println("filePath:" + filePath);
  String jobID = extractJobIDFromPath(filePath);
  System.out.println("jobID:" + jobID);
  sourceCubeSegment = findSegmentWithUuid(jobID, cube);
  System.out.println(sourceCubeSegment);
  this.rowKeySplitter = new RowKeySplitter(sourceCubeSegment, 65, 255);
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  final FileSplit split = (FileSplit) genericSplit;
  final Configuration configuration = context.getConfiguration();
  if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
    this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
  this.gryoReader = GryoReader.build().mapper(
      GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create();
  long start = split.getStart();
  final Path file = split.getPath();
  if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
    throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
  }
  // open the file and seek to the start of the split
  this.inputStream = file.getFileSystem(configuration).open(split.getPath());
  this.splitLength = split.getLength();
  if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}

Javadoc

The file containing this split's data.

Popular methods of FileSplit

getLength
The number of bytes in the file to process.
getStart
The position of the first byte in the file to process.
<init>
Constructs a split with host and cached-blocks information
getLocations
readFields
write
toString
getLocationInfo

Popular in Java

Parsing JSON documents to java classes using gson
runOnUiThread (Activity)
onRequestPermissionsResult (Fragment)
getExternalFilesDir (Context)
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Github Copilot alternatives

How to use getPathmethodin org.apache.hadoop.mapreduce.lib.input.FileSplit

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileSplit.getPath (Showing top 20 results out of 1,215)

How to use
getPath
method
in
org.apache.hadoop.mapreduce.lib.input.FileSplit