org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader java code examples

 public ByteBuffer getMetadataToCache(
   FileSystem fs, Path path, ByteBuffer[] addedVals) throws IOException {
  // For now, there's nothing special to return in addedVals. Just return the footer.
  return OrcFile.createReader(fs, path).getSerializedFileFooter();
 }
}

 public ByteBuffer getMetadataToCache(
   FileSystem fs, Path path, ByteBuffer[] addedVals) throws IOException {
  // For now, there's nothing special to return in addedVals. Just return the footer.
  return OrcFile.createReader(fs, path).getSerializedFileFooter();
 }
}

static void checkFile(Configuration conf, Path inputPath) throws IOException {
 FileSystem fs = inputPath.getFileSystem(conf);
 Reader reader = OrcFile.createReader(fs, inputPath);
 if (OrcInputFormat.isOriginal(reader)) {
  System.out.println(inputPath + " is not an acid file");
  return;
 }
 boolean validIndex = isAcidKeyIndexValid(reader);
 System.out.println("Checking " + inputPath + " - acid key index is " +
   (validIndex ? "valid" : "invalid"));
}

@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
  List<FileStatus> files
  ) throws IOException {
 if (files.size() <= 0) {
  return false;
 }
 for (FileStatus file : files) {
  try {
   OrcFile.createReader(file.getPath(),
     OrcFile.readerOptions(conf).filesystem(fs));
  } catch (IOException e) {
   return false;
  }
 }
 return true;
}

private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
 //create reader, look at footer
 //no need to check side file since it can only be in a streaming ingest delta
 Reader orcReader = OrcFile.createReader(bucket.getPath(), OrcFile.readerOptions(fs.getConf()).filesystem(fs));
 if (orcReader.hasMetadataValue(ACID_STATS)) {
  try {
   ByteBuffer val = orcReader.getMetadataValue(ACID_STATS).duplicate();
   String acidStats = utf8Decoder.decode(val).toString();
   String[] parts = acidStats.split(",");
   long updates = Long.parseLong(parts[1]);
   long deletes = Long.parseLong(parts[2]);
   return deletes > 0 || updates > 0;
  } catch (CharacterCodingException e) {
   throw new IllegalArgumentException("Bad string encoding for " + ACID_STATS, e);
  }
 } else {
  throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
 }
}

 @Override
 public boolean validateInput(FileSystem fs, HiveConf conf,
   List<FileStatus> files
   ) throws IOException {
  if (files.size() <= 0) {
   return false;
  }
  for (FileStatus file : files) {
   try {
    OrcFile.createReader(file.getPath(),
      OrcFile.readerOptions(conf).filesystem(fs));
   } catch (IOException e) {
    return false;
   }
  }
  return true;
 }
}

 /**
  * Finds the next file of the logical bucket
  * @return {@code null} if there are no more files
  */
 private Reader advanceToNextFile() throws IOException {
  while(nextFileIndex < originalFiles.size()) {
   int bucketIdFromPath = AcidUtils.parseBucketId(originalFiles.get(nextFileIndex).getFileStatus().getPath());
   if (bucketIdFromPath == bucketId) {
    break;
   }
   //the the bucket we care about here
   nextFileIndex++;
  }
  if(originalFiles.size() <= nextFileIndex) {
   return null;//no more files for current bucket
  }
  return OrcFile.createReader(originalFiles.get(nextFileIndex++).getFileStatus().
   getPath(), OrcFile.readerOptions(conf));
 }
}

 public static boolean isRawFormatFile(Path dataFile, FileSystem fs) throws IOException {
  try {
   Reader reader = OrcFile.createReader(dataFile, OrcFile.readerOptions(fs.getConf()));
   /*
    acid file would have schema like <op, owid, writerId, rowid, cwid, <f1, ... fn>> so could
    check it this way once/if OrcRecordUpdater.ACID_KEY_INDEX_NAME is removed
    TypeDescription schema = reader.getSchema();
    List<String> columns = schema.getFieldNames();
    */
   return OrcInputFormat.isOriginal(reader);
  } catch (FileFormatException ex) {
   //We may be parsing a delta for Insert-only table which may not even be an ORC file so
   //cannot have ROW_IDs in it.
   LOG.debug("isRawFormat() called on " + dataFile + " which is not an ORC file: " +
     ex.getMessage());
   return true;
  }
 }
}

/**
 * This is smart enough to handle streaming ingest where there could be a
 * {@link OrcAcidUtils#DELTA_SIDE_FILE_SUFFIX} side file.
 * @param dataFile - ORC acid data file
 * @return version property from file if there,
 *          {@link #ORC_ACID_VERSION_DEFAULT} otherwise
 */
@VisibleForTesting
public static int getAcidVersionFromDataFile(Path dataFile, FileSystem fs) throws IOException {
 FileStatus fileStatus = fs.getFileStatus(dataFile);
 Reader orcReader = OrcFile.createReader(dataFile,
   OrcFile.readerOptions(fs.getConf())
     .filesystem(fs)
     //make sure to check for side file in case streaming ingest died
     .maxLength(getLogicalLength(fs, fileStatus)));
 if (orcReader.hasMetadataValue(ACID_VERSION_KEY)) {
  char[] versionChar = UTF8.decode(orcReader.getMetadataValue(ACID_VERSION_KEY)).array();
  String version = new String(versionChar);
  return Integer.valueOf(version);
 }
 return ORC_ACID_VERSION_DEFAULT;
}
/**

public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) throws IOException {
 path = split.getPath();
 start = split.getStart();
 end = start + split.getLength();
 FileSystem fs = path.getFileSystem(conf);
 this.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs));
 this.iter = reader.getStripes().iterator();
 this.stripeIdx = 0;
 this.stripeStatistics = ((ReaderImpl) reader).getOrcProtoStripeStatistics();
}

public OrcFileStripeMergeRecordReader(Configuration conf, FileSplit split) throws IOException {
 path = split.getPath();
 start = split.getStart();
 end = start + split.getLength();
 FileSystem fs = path.getFileSystem(conf);
 this.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs));
 this.iter = reader.getStripes().iterator();
 this.stripeIdx = 0;
 this.stripeStatistics = ((ReaderImpl) reader).getOrcProtoStripeStatistics();
}

private static void assertFileContentsOrcHive(
    Type type,
    TempFile tempFile,
    Iterable<?> expectedValues)
    throws Exception
{
  JobConf configuration = new JobConf(new Configuration(false));
  configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
  configuration.setBoolean(READ_ALL_COLUMNS, false);
  Reader reader = OrcFile.createReader(
      new Path(tempFile.getFile().getAbsolutePath()),
      new ReaderOptions(configuration));
  org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows();
  StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
  StructField field = rowInspector.getStructFieldRef("test");
  Iterator<?> iterator = expectedValues.iterator();
  Object rowData = null;
  while (recordReader.hasNext()) {
    rowData = recordReader.next(rowData);
    Object expectedValue = iterator.next();
    Object actualValue = rowInspector.getStructFieldData(rowData, field);
    actualValue = decodeRecordReaderValue(type, actualValue);
    assertColumnValueEquals(type, actualValue, expectedValue);
  }
  assertFalse(iterator.hasNext());
}

private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
 org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
 Reader reader = OrcFile.createReader(orcFile,
  OrcFile.readerOptions(conf).filesystem(fs));
 RecordReader rows = reader.rows();
 StructObjectInspector inspector = (StructObjectInspector) reader
  .getObjectInspector();
 System.out.format("Found Bucket File : %s \n", orcFile.getName());
 ArrayList<SampleRec> result = new ArrayList<SampleRec>();
 while (rows.hasNext()) {
  Object row = rows.next(null);
  SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
  result.add(rec);
 }
 return result;
}

private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
 org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
 Reader reader = OrcFile.createReader(orcFile,
     OrcFile.readerOptions(conf).filesystem(fs));
 RecordReader rows = reader.rows();
 StructObjectInspector inspector = (StructObjectInspector) reader
     .getObjectInspector();
 System.out.format("Found Bucket File : %s \n", orcFile.getName());
 ArrayList<SampleRec> result = new ArrayList<SampleRec>();
 while (rows.hasNext()) {
  Object row = rows.next(null);
  SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
  result.add(rec);
 }
 return result;
}

static Reader createOrcReaderForSplit(Configuration conf, OrcSplit orcSplit) throws IOException {
 Path path = orcSplit.getPath();
 Reader reader;
 if (orcSplit.hasBase()) {
  OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
  readerOptions.maxLength(orcSplit.getFileLength());
  if (orcSplit.hasFooter()) {
   readerOptions.orcTail(orcSplit.getOrcTail());
  }
  reader = OrcFile.createReader(path, readerOptions);
 } else {
  reader = null;
 }
 return reader;
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

static Reader createOrcReaderForSplit(Configuration conf, OrcSplit orcSplit) throws IOException {
 Path path = orcSplit.getPath();
 Reader reader;
 if (orcSplit.hasBase()) {
  OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf);
  readerOptions.maxLength(orcSplit.getFileLength());
  if (orcSplit.hasFooter()) {
   readerOptions.orcTail(orcSplit.getOrcTail());
  }
  reader = OrcFile.createReader(path, readerOptions);
 } else {
  reader = null;
 }
 return reader;
}

@Override
public RecordReader<NullWritable, OrcStruct> createRecordReader(
  InputSplit inputSplit, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) inputSplit;
 Path path = fileSplit.getPath();
 Configuration conf = ShimLoader.getHadoopShims()
   .getConfiguration(context);
 return new OrcRecordReader(OrcFile.createReader(path,
                         OrcFile.readerOptions(conf)),
   ShimLoader.getHadoopShims().getConfiguration(context),
   fileSplit.getStart(), fileSplit.getLength());
}

@Override
public RecordReader<NullWritable, VectorizedRowBatch>
  getRecordReader(InputSplit inputSplit, JobConf conf,
    Reporter reporter) throws IOException {
 FileSplit fSplit = (FileSplit)inputSplit;
 reporter.setStatus(fSplit.toString());
 Path path = fSplit.getPath();
 OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf);
 if(fSplit instanceof OrcSplit){
  OrcSplit orcSplit = (OrcSplit) fSplit;
  if (orcSplit.hasFooter()) {
   opts.orcTail(orcSplit.getOrcTail());
  }
  opts.maxLength(orcSplit.getFileLength());
 }
 Reader reader = OrcFile.createReader(path, opts);
 return new VectorizedOrcRecordReader(reader, conf, fSplit);
}

@Override
public RecordReader<NullWritable, VectorizedRowBatch>
  getRecordReader(InputSplit inputSplit, JobConf conf,
    Reporter reporter) throws IOException {
 FileSplit fSplit = (FileSplit)inputSplit;
 reporter.setStatus(fSplit.toString());
 Path path = fSplit.getPath();
 OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf);
 if(fSplit instanceof OrcSplit){
  OrcSplit orcSplit = (OrcSplit) fSplit;
  if (orcSplit.hasFooter()) {
   opts.orcTail(orcSplit.getOrcTail());
  }
  opts.maxLength(orcSplit.getFileLength());
 }
 Reader reader = OrcFile.createReader(path, opts);
 return new VectorizedOrcRecordReader(reader, conf, fSplit);
}

Javadoc

Create an ORC file reader.

Popular methods of OrcFile

createWriter
Create an ORC file writer. This is the public interface for creating writers going forward and new o
readerOptions
writerOptions
Create a default set of write options that can be modified.

Popular in Java

Finding current android device location
setContentView (Activity)
getExternalFilesDir (Context)
getApplicationContext (Context)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
JTextField (javax.swing)
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top PhpStorm plugins

How to use createReadermethodin org.apache.hadoop.hive.ql.io.orc.OrcFile

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader (Showing top 20 results out of 315)

How to use
createReader
method
in
org.apache.hadoop.hive.ql.io.orc.OrcFile