public static TreeReaderFactory.TreeReader createTreeReader(int colId, Configuration conf, List<OrcProto.Type> fileSchema, boolean[] included, boolean skipCorrupt) throws IOException { final boolean isAcid = checkAcidSchema(fileSchema); final List<OrcProto.Type> originalFileSchema; if (isAcid) { originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(), fileSchema.size()); } else { originalFileSchema = fileSchema; } final int numCols = originalFileSchema.get(0).getSubtypesCount(); List<OrcProto.Type> schemaOnRead = getSchemaOnRead(numCols, conf); List<OrcProto.Type> schemaUsed = getMatchingSchema(fileSchema, schemaOnRead); if (schemaUsed == null) { return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt); } else { return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt); } }
List<String> fieldNames = type.getFieldNamesList(); int fieldIdx = 0; for (String colName : colNames) { int idxStart = type.getSubtypes(fieldIdx); idxEnd = getLastIdx() + 1; } else { idxEnd = type.getSubtypes(fieldIdx + 1);
private static boolean checkAcidSchema(List<OrcProto.Type> fileSchema) { if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { List<String> acidFields = OrcRecordUpdater.getAcidEventFields(); List<String> rootFields = fileSchema.get(0).getFieldNamesList(); if (acidFields.equals(rootFields)) { return true; } } return false; }
private static void writeTypes(OrcProto.Footer.Builder builder, TreeWriter treeWriter) { OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); switch (treeWriter.inspector.getCategory()) { case PRIMITIVE:
private static void getOrcTypesImpl(List<OrcProto.Type> result, ObjectInspector inspector) { OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); switch (inspector.getCategory()) { case PRIMITIVE:
compressBuffSize = k.getCompressBufferSize(); version = k.getVersion(); columnCount = k.getTypes().get(0).getSubtypesCount(); rowIndexStride = k.getRowIndexStride();
private static void writeTypes(OrcProto.Footer.Builder builder, TreeWriter treeWriter) { OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); switch (treeWriter.inspector.getCategory()) { case PRIMITIVE:
static ObjectInspector createObjectInspector(int columnId, List<OrcProto.Type> types){ OrcProto.Type type = types.get(columnId); switch (type.getKind()) { case FLOAT: return PrimitiveObjectInspectorFactory.writableFloatObjectInspector; return PrimitiveObjectInspectorFactory.writableStringObjectInspector; case CHAR: if (!type.hasMaximumLength()) { throw new UnsupportedOperationException( "Illegal use of char type without length in ORC type definition."); TypeInfoFactory.getCharTypeInfo(type.getMaximumLength())); case VARCHAR: if (!type.hasMaximumLength()) { throw new UnsupportedOperationException( "Illegal use of varchar type without length in ORC type definition."); TypeInfoFactory.getVarcharTypeInfo(type.getMaximumLength())); case TIMESTAMP: return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; return PrimitiveObjectInspectorFactory.writableDateObjectInspector; case DECIMAL: int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION; int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( TypeInfoFactory.getDecimalTypeInfo(precision, scale));
) throws IOException { OrcProto.Type type = types.get(columnId); switch (type.getKind()) { case BOOLEAN: return new BooleanTreeReader(columnId); return new StringTreeReader(columnId); case CHAR: if (!type.hasMaximumLength()) { throw new IllegalArgumentException("ORC char type has no length specified"); return new CharTreeReader(columnId, type.getMaximumLength()); case VARCHAR: if (!type.hasMaximumLength()) { throw new IllegalArgumentException("ORC varchar type has no length specified"); return new VarcharTreeReader(columnId, type.getMaximumLength()); case BINARY: return new BinaryTreeReader(columnId); case DECIMAL: int precision = type.hasPrecision() ? type.getPrecision() : HiveDecimal.SYSTEM_DEFAULT_PRECISION; int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return new DecimalTreeReader(columnId, precision, scale); case STRUCT: default: throw new IllegalArgumentException("Unsupported type " +
OrcProto.Type fColType = fileSchema.get(i); OrcProto.Type rColType = schemaOnRead.get(i); if (!fColType.getKind().equals(rColType.getKind())) { if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); canPromoteType = true; } else { } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); canPromoteType = true; } else {
if (!getSubtypesList().isEmpty()) { size += 1; size += com.facebook.presto.hive.$internal.com.google.protobuf.CodedOutputStream size += 1 * getFieldNamesList().size(); .computeUInt32Size(6, scale_); size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size;
writer.value(null); } else { switch (type.getKind()) { case STRUCT: printStruct(writer, (OrcStruct) obj, types, type);
public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.Type other) { if (other == org.apache.hadoop.hive.ql.io.orc.OrcProto.Type.getDefaultInstance()) return this; if (other.hasKind()) { setKind(other.getKind()); if (other.hasMaximumLength()) { setMaximumLength(other.getMaximumLength()); if (other.hasPrecision()) { setPrecision(other.getPrecision()); if (other.hasScale()) { setScale(other.getScale()); this.mergeUnknownFields(other.getUnknownFields()); return this;
Type type = footer.getTypes(colIdx); switch (type.getKind()) { case BINARY:
public org.apache.hadoop.hive.ql.io.orc.OrcProto.Type buildPartial() { org.apache.hadoop.hive.ql.io.orc.OrcProto.Type result = new org.apache.hadoop.hive.ql.io.orc.OrcProto.Type(this); int from_bitField0_ = bitField0_; int to_bitField0_ = 0;
public void writeTo(com.facebook.presto.hive.$internal.com.google.protobuf.CodedOutputStream output) throws java.io.IOException { getSerializedSize(); if (((bitField0_ & 0x00000001) == 0x00000001)) { output.writeEnum(1, kind_.getNumber()); } if (getSubtypesList().size() > 0) { output.writeRawVarint32(18); output.writeRawVarint32(subtypesMemoizedSerializedSize); } for (int i = 0; i < subtypes_.size(); i++) { output.writeUInt32NoTag(subtypes_.get(i)); } for (int i = 0; i < fieldNames_.size(); i++) { output.writeBytes(3, fieldNames_.getByteString(i)); } if (((bitField0_ & 0x00000002) == 0x00000002)) { output.writeUInt32(4, maximumLength_); } if (((bitField0_ & 0x00000004) == 0x00000004)) { output.writeUInt32(5, precision_); } if (((bitField0_ & 0x00000008) == 0x00000008)) { output.writeUInt32(6, scale_); } getUnknownFields().writeTo(output); }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
static void addRgFilteredStreamToRanges(OrcProto.Stream stream, boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index, OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull, long offset, long length, DiskRangeListCreateHelper list, boolean doMergeBuffers) { for (int group = 0; group < includedRowGroups.length; ++group) { if (!includedRowGroups[group]) continue; int posn = getIndexPosition( encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); long start = index.getEntry(group).getPositions(posn); final long nextGroupOffset; boolean isLast = group == (includedRowGroups.length - 1); nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); start += offset; long end = offset + estimateRgEndOffset( isCompressed, isLast, nextGroupOffset, length, compressionSize); list.addOrMerge(start, end, doMergeBuffers, true); } }
/** * Recurse down into a type subtree turning on all of the sub-columns. * @param types the types of the file * @param result the global view of columns that should be included * @param typeId the root of tree to enable * @param rootColumn the top column */ private static void includeColumnRecursive(List<OrcProto.Type> types, boolean[] result, int typeId, int rootColumn) { result[typeId - rootColumn] = true; OrcProto.Type type = types.get(typeId); int children = type.getSubtypesCount(); for(int i=0; i < children; ++i) { includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn); } }
StructTreeReader(int columnId, List<OrcProto.Type> types, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); OrcProto.Type type = types.get(columnId); int fieldCount = type.getFieldNamesCount(); this.fields = new TreeReader[fieldCount]; this.fieldNames = new String[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); } this.fieldNames[i] = type.getFieldNames(i); } }