OrcUnionObjectInspector(int columnId, List<OrcProto.Type> types) { OrcProto.Type type = types.get(columnId); children = new ArrayList<ObjectInspector>(type.getSubtypesCount()); for(int i=0; i < type.getSubtypesCount(); ++i) { children.add(OrcStruct.createObjectInspector(type.getSubtypes(i), types)); } }
public static boolean[] genIncludedColumns( List<OrcProto.Type> types, List<Integer> included, boolean isOriginal) { int rootColumn = getRootColumn(isOriginal); int numColumns = types.size() - rootColumn; boolean[] result = new boolean[numColumns]; result[0] = true; OrcProto.Type root = types.get(rootColumn); for(int i=0; i < root.getSubtypesCount(); ++i) { if (included.contains(i)) { includeColumnRecursive(types, result, root.getSubtypes(i), rootColumn); } } return result; } /**
OrcStructInspector(int columnId, List<OrcProto.Type> types) { OrcProto.Type type = types.get(columnId); int fieldCount = type.getSubtypesCount(); fields = new ArrayList<StructField>(fieldCount); for(int i=0; i < fieldCount; ++i) { int fieldType = type.getSubtypes(i); fields.add(new Field(type.getFieldNames(i), createObjectInspector(fieldType, types), i)); } }
compressBuffSize = k.getCompressBufferSize(); version = k.getVersion(); columnCount = k.getTypes().get(0).getSubtypesCount(); rowIndexStride = k.getRowIndexStride();
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
public static TreeReaderFactory.TreeReader createTreeReader(int colId, Configuration conf, List<OrcProto.Type> fileSchema, boolean[] included, boolean skipCorrupt) throws IOException { final boolean isAcid = checkAcidSchema(fileSchema); final List<OrcProto.Type> originalFileSchema; if (isAcid) { originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(), fileSchema.size()); } else { originalFileSchema = fileSchema; } final int numCols = originalFileSchema.get(0).getSubtypesCount(); List<OrcProto.Type> schemaOnRead = getSchemaOnRead(numCols, conf); List<OrcProto.Type> schemaUsed = getMatchingSchema(fileSchema, schemaOnRead); if (schemaUsed == null) { return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt); } else { return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt); } }
/** * Recurse down into a type subtree turning on all of the sub-columns. * @param types the types of the file * @param result the global view of columns that should be included * @param typeId the root of tree to enable * @param rootColumn the top column */ private static void includeColumnRecursive(List<OrcProto.Type> types, boolean[] result, int typeId, int rootColumn) { result[typeId - rootColumn] = true; OrcProto.Type type = types.get(typeId); int children = type.getSubtypesCount(); for(int i=0; i < children; ++i) { includeColumnRecursive(types, result, type.getSubtypes(i), rootColumn); } }
UnionTreeReader(int columnId, List<OrcProto.Type> types, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); OrcProto.Type type = types.get(columnId); int fieldCount = type.getSubtypesCount(); this.fields = new TreeReader[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); } } }
int getColumns() { return reader.getTypes().get(OrcRecordUpdater.ROW + 1).getSubtypesCount(); } }
public SparkOrcNewRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { List<OrcProto.Type> types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); value = new OrcStruct(numColumns); this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); this.objectInspector = file.getObjectInspector(); }
public SparkOrcNewRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { List<OrcProto.Type> types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); value = new OrcStruct(numColumns); this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); this.objectInspector = file.getObjectInspector(); }
@Override int getColumns() { return reader.getTypes().get(0).getSubtypesCount(); } }
OrcRecordReader(Reader file, Configuration conf, FileSplit split) throws IOException { List<OrcProto.Type> types = file.getTypes(); this.file = file; numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); this.offset = split.getStart(); this.length = split.getLength(); this.reader = createReaderFromFile(file, conf, offset, length); this.stats = new SerDeStats(); }
OrcRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { List<OrcProto.Type> types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); value = new OrcStruct(numColumns); this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); }