org.apache.hadoop.hive.ql.io.orc.OrcProto$Type java code examples

public static TreeReaderFactory.TreeReader createTreeReader(int colId,
  Configuration conf,
  List<OrcProto.Type> fileSchema,
  boolean[] included,
  boolean skipCorrupt) throws IOException {
 final boolean isAcid = checkAcidSchema(fileSchema);
 final List<OrcProto.Type> originalFileSchema;
 if (isAcid) {
  originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(),
    fileSchema.size());
 } else {
  originalFileSchema = fileSchema;
 }
 final int numCols = originalFileSchema.get(0).getSubtypesCount();
 List<OrcProto.Type> schemaOnRead = getSchemaOnRead(numCols, conf);
 List<OrcProto.Type> schemaUsed = getMatchingSchema(fileSchema, schemaOnRead);
 if (schemaUsed == null) {
  return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt);
 } else {
  return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt);
 }
}

List<String> fieldNames = type.getFieldNamesList();
int fieldIdx = 0;
for (String colName : colNames) {
 int idxStart = type.getSubtypes(fieldIdx);
  idxEnd = getLastIdx() + 1;
 } else {
  idxEnd = type.getSubtypes(fieldIdx + 1);

private static boolean checkAcidSchema(List<OrcProto.Type> fileSchema) {
 if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) {
  List<String> acidFields = OrcRecordUpdater.getAcidEventFields();
  List<String> rootFields = fileSchema.get(0).getFieldNamesList();
  if (acidFields.equals(rootFields)) {
   return true;
  }
 }
 return false;
}

private static void writeTypes(OrcProto.Footer.Builder builder,
                TreeWriter treeWriter) {
 OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
 switch (treeWriter.inspector.getCategory()) {
  case PRIMITIVE:

private static void getOrcTypesImpl(List<OrcProto.Type> result, ObjectInspector inspector) {
 OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
 switch (inspector.getCategory()) {
  case PRIMITIVE:

compressBuffSize = k.getCompressBufferSize();
version = k.getVersion();
columnCount = k.getTypes().get(0).getSubtypesCount();
rowIndexStride = k.getRowIndexStride();

private static void writeTypes(OrcProto.Footer.Builder builder,
                TreeWriter treeWriter) {
  OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
  switch (treeWriter.inspector.getCategory()) {
    case PRIMITIVE:

static ObjectInspector createObjectInspector(int columnId,
                       List<OrcProto.Type> types){
 OrcProto.Type type = types.get(columnId);
 switch (type.getKind()) {
  case FLOAT:
   return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
   return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
  case CHAR:
   if (!type.hasMaximumLength()) {
    throw new UnsupportedOperationException(
      "Illegal use of char type without length in ORC type definition.");
     TypeInfoFactory.getCharTypeInfo(type.getMaximumLength()));
  case VARCHAR:
   if (!type.hasMaximumLength()) {
    throw new UnsupportedOperationException(
      "Illegal use of varchar type without length in ORC type definition.");
     TypeInfoFactory.getVarcharTypeInfo(type.getMaximumLength()));
  case TIMESTAMP:
   return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
   return PrimitiveObjectInspectorFactory.writableDateObjectInspector;
  case DECIMAL:
   int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
   int scale =  type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
   return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
     TypeInfoFactory.getDecimalTypeInfo(precision, scale));

) throws IOException {
 OrcProto.Type type = types.get(columnId);
 switch (type.getKind()) {
  case BOOLEAN:
   return new BooleanTreeReader(columnId);
   return new StringTreeReader(columnId);
  case CHAR:
   if (!type.hasMaximumLength()) {
    throw new IllegalArgumentException("ORC char type has no length specified");
   return new CharTreeReader(columnId, type.getMaximumLength());
  case VARCHAR:
   if (!type.hasMaximumLength()) {
    throw new IllegalArgumentException("ORC varchar type has no length specified");
   return new VarcharTreeReader(columnId, type.getMaximumLength());
  case BINARY:
   return new BinaryTreeReader(columnId);
  case DECIMAL:
   int precision =
     type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION;
   int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
   return new DecimalTreeReader(columnId, precision, scale);
  case STRUCT:
  default:
   throw new IllegalArgumentException("Unsupported type " +

OrcProto.Type fColType = fileSchema.get(i);
OrcProto.Type rColType = schemaOnRead.get(i);
if (!fColType.getKind().equals(rColType.getKind())) {
 if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) {
  if (rColType.getKind().equals(OrcProto.Type.Kind.INT) ||
    rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
   result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build());
   canPromoteType = true;
  } else {
 } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) {
  if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) {
   result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build());
   canPromoteType = true;
  } else {

 if (!getSubtypesList().isEmpty()) {
  size += 1;
  size += com.facebook.presto.hive.$internal.com.google.protobuf.CodedOutputStream
 size += 1 * getFieldNamesList().size();
  .computeUInt32Size(6, scale_);
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;

 writer.value(null);
} else {
 switch (type.getKind()) {
  case STRUCT:
   printStruct(writer, (OrcStruct) obj, types, type);

public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.Type other) {
 if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.getDefaultInstance()) return this;
 if (other.hasKind()) {
  setKind(other.getKind());
 if (other.hasMaximumLength()) {
  setMaximumLength(other.getMaximumLength());
 if (other.hasPrecision()) {
  setPrecision(other.getPrecision());
 if (other.hasScale()) {
  setScale(other.getScale());
 this.mergeUnknownFields(other.getUnknownFields());
 return this;

Type type = footer.getTypes(colIdx);
switch (type.getKind()) {
case BINARY:

public org.apache.hadoop.hive.ql.io.orc.OrcProto.Type buildPartial() {
 org.apache.hadoop.hive.ql.io.orc.OrcProto.Type result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.Type(this);
 int from_bitField0_ = bitField0_;
 int to_bitField0_ = 0;

public void writeTo(com.facebook.presto.hive.$internal.com.google.protobuf.CodedOutputStream output)
          throws java.io.IOException {
 getSerializedSize();
 if (((bitField0_ & 0x00000001) == 0x00000001)) {
  output.writeEnum(1, kind_.getNumber());
 }
 if (getSubtypesList().size() > 0) {
  output.writeRawVarint32(18);
  output.writeRawVarint32(subtypesMemoizedSerializedSize);
 }
 for (int i = 0; i < subtypes_.size(); i++) {
  output.writeUInt32NoTag(subtypes_.get(i));
 }
 for (int i = 0; i < fieldNames_.size(); i++) {
  output.writeBytes(3, fieldNames_.getByteString(i));
 }
 if (((bitField0_ & 0x00000002) == 0x00000002)) {
  output.writeUInt32(4, maximumLength_);
 }
 if (((bitField0_ & 0x00000004) == 0x00000004)) {
  output.writeUInt32(5, precision_);
 }
 if (((bitField0_ & 0x00000008) == 0x00000008)) {
  output.writeUInt32(6, scale_);
 }
 getUnknownFields().writeTo(output);
}

private boolean checkCompatibility(OrcFileKeyWrapper k) {
 // check compatibility with subsequent files
 if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) {
  LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath());
  return false;
 }
 if (!k.getCompression().equals(compression)) {
  LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath());
  return false;
 }
 if (k.getCompressBufferSize() != compressBuffSize) {
  LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath());
  return false;
 }
 if (!k.getVersion().equals(version)) {
  LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath());
  return false;
 }
 if (k.getRowIndexStride() != rowIndexStride) {
  LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath());
  return false;
 }
 return true;
}

static void addRgFilteredStreamToRanges(OrcProto.Stream stream,
  boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index,
  OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull,
  long offset, long length, DiskRangeListCreateHelper list, boolean doMergeBuffers) {
 for (int group = 0; group < includedRowGroups.length; ++group) {
  if (!includedRowGroups[group]) continue;
  int posn = getIndexPosition(
    encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull);
  long start = index.getEntry(group).getPositions(posn);
  final long nextGroupOffset;
  boolean isLast = group == (includedRowGroups.length - 1);
  nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn);
  start += offset;
  long end = offset + estimateRgEndOffset(
    isCompressed, isLast, nextGroupOffset, length, compressionSize);
  list.addOrMerge(start, end, doMergeBuffers, true);
 }
}

/**
 * Recurse down into a type subtree turning on all of the sub-columns.
 * @param types the types of the file
 * @param result the global view of columns that should be included
 * @param typeId the root of tree to enable
 * @param rootColumn the top column
 */
private static void includeColumnRecursive(List<OrcProto.Type> types,
                      boolean[] result,
                      int typeId,
                      int rootColumn) {
 result[typeId - rootColumn] = true;
 OrcProto.Type type = types.get(typeId);
 int children = type.getSubtypesCount();
 for(int i=0; i < children; ++i) {
  includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn);
 }
}

StructTreeReader(int columnId,
  List<OrcProto.Type> types,
  boolean[] included,
  boolean skipCorrupt) throws IOException {
 super(columnId);
 OrcProto.Type type = types.get(columnId);
 int fieldCount = type.getFieldNamesCount();
 this.fields = new TreeReader[fieldCount];
 this.fieldNames = new String[fieldCount];
 for (int i = 0; i < fieldCount; ++i) {
  int subtype = type.getSubtypes(i);
  if (included == null || included[subtype]) {
   this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt);
  }
  this.fieldNames[i] = type.getFieldNames(i);
 }
}

Javadoc

Protobuf type orc.proto.Type

Most used methods

getSubtypesCount
repeated uint32 subtypes = 2 [packed = true];
getFieldNamesList
repeated string fieldNames = 3;
getKind
optional .orc.proto.Type.Kind kind = 1;
getSubtypesList
repeated uint32 subtypes = 2 [packed = true];
newBuilder
<init>
getDefaultInstance
getDescriptor
getFieldNames
repeated string fieldNames = 3;
getFieldNamesCount
repeated string fieldNames = 3;
getMaximumLength
optional uint32 maximumLength = 4;
getPrecision
optional uint32 precision = 5;

Popular in Java

Reactive rest calls using spring rest template
setScale (BigDecimal)
setRequestProperty (URLConnection)
getApplicationContext (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top 12 Jupyter Notebook extensions

How to useOrcProto$Type in org.apache.hadoop.hive.ql.io.orc

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.OrcProto$Type (Showing top 20 results out of 315)

How to use
OrcProto$Type
in
org.apache.hadoop.hive.ql.io.orc