@Override public boolean[] generateFileIncludes(TypeDescription fileSchema) { return OrcInputFormat.genIncludedColumns( fileSchema, filePhysicalColumnIds, acidStructColumnId); }
public static boolean[] genIncludedColumns(TypeDescription readerSchema, List<Integer> included) { return genIncludedColumns(readerSchema, included, null); }
/** * Take the configuration and figure out which columns we need to include. * @param readerSchema the types for the reader * @param conf the configuration */ static boolean[] genIncludedColumns(TypeDescription readerSchema, Configuration conf) { if (!ColumnProjectionUtils.isReadAllColumns(conf)) { List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf); return genIncludedColumns(readerSchema, included); } else { return null; } }
/** * Take the configuration and figure out which columns we need to include. * @param readerSchema the types for the reader * @param conf the configuration */ public static boolean[] genIncludedColumns(TypeDescription readerSchema, Configuration conf) { if (!ColumnProjectionUtils.isReadAllColumns(conf)) { List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf); return genIncludedColumns(readerSchema, included); } else { return null; } }
@Override public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) { if (readerSchema == null) { readerSchema = fileSchema; } // TODO: will this work correctly with ACID? boolean[] readerIncludes = OrcInputFormat.genIncludedColumns( readerSchema, readerLogicalColumnIds); Reader.Options options = new Reader.Options(jobConf) .include(readerIncludes).includeAcidColumns(includeAcidColumns); return new SchemaEvolution(fileSchema, readerSchema, options); }
public void validateIncludes(OrcProto.Footer footer) throws IOException { if (doesSourceHaveIncludes) return; // Irrelevant. boolean[] translatedIncludes = columnIds == null ? null : OrcInputFormat.genIncludedColumns( OrcUtils.convertTypeFromProtobuf(footer.getTypesList(), 0), columnIds); if (translatedIncludes == null) { throwIncludesMismatchError(translatedIncludes); } int len = Math.min(translatedIncludes.length, writerIncludes.length); for (int i = 0; i < len; ++i) { // Translated includes may be a superset of writer includes due to cache. if (!translatedIncludes[i] && writerIncludes[i]) { throwIncludesMismatchError(translatedIncludes); } } if (translatedIncludes.length < writerIncludes.length) { for (int i = len; i < writerIncludes.length; ++i) { if (writerIncludes[i]) { throwIncludesMismatchError(translatedIncludes); } } } }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options(conf).schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); //todo: last param is bogus. why is this hardcoded? OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options().schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
consumer.setUseDecimal64ColumnVectors(useDecimal64ColumnVectors); this.schema = schema; this.writerIncludes = OrcInputFormat.genIncludedColumns(schema, columnIds); SchemaEvolution evolution = new SchemaEvolution(schema, null, new Reader.Options(jobConf).include(writerIncludes));
Reader.Options readerOptions = new Reader.Options(context.conf); if (readerTypes == null) { readerIncluded = genIncludedColumns(fileSchema, context.conf); evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded)); } else { readerIncluded = genIncludedColumns(readerSchema, context.conf); evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded)); if (!isOriginal) {
Reader.Options readerOptions = new Reader.Options(context.conf); if (readerTypes == null) { readerIncluded = genIncludedColumns(fileSchema, context.conf); evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded)); } else { readerIncluded = genIncludedColumns(readerSchema, context.conf); evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded)); if (!isOriginal) {
this.length = fileSplit.getLength(); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true);
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { if (AcidUtils.isFullAcidScan(conf)) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options(conf).range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options, conf); }
this.length = fileSplit.getLength(); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true);
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); if (isTransactionalTableScan) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options().range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options); }
@Override public boolean[] generateFileIncludes(TypeDescription fileSchema) { return OrcInputFormat.genIncludedColumns( fileSchema, filePhysicalColumnIds, acidStructColumnId); }
/** * Take the configuration and figure out which columns we need to include. * @param types the types for the file * @param conf the configuration * @param isOriginal is the file in the original format? */ public static boolean[] genIncludedColumns( List<OrcProto.Type> types, Configuration conf, boolean isOriginal) { if (!ColumnProjectionUtils.isReadAllColumns(conf)) { List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf); return genIncludedColumns(types, included, isOriginal); } else { return null; } }
@Override public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) { if (readerSchema == null) { readerSchema = fileSchema; } // TODO: will this work correctly with ACID? boolean[] readerIncludes = OrcInputFormat.genIncludedColumns( readerSchema, readerLogicalColumnIds); Reader.Options options = new Reader.Options(jobConf).include(readerIncludes); return new SchemaEvolution(fileSchema, readerSchema, options); }
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { Reader.Options options = new Reader.Options().range(offset, length); boolean isOriginal = isOriginal(file); List<OrcProto.Type> types = file.getTypes(); options.include(genIncludedColumns(types, conf, isOriginal)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options); }
VectorizedOrcRecordReader(Reader file, Configuration conf, FileSplit fileSplit) throws IOException { List<OrcProto.Type> types = file.getTypes(); Reader.Options options = new Reader.Options(); this.offset = fileSplit.getStart(); this.length = fileSplit.getLength(); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(types, conf, true)); OrcInputFormat.setSearchArgument(options, types, conf, true); this.reader = file.rowsOptions(options); try { rbCtx = new VectorizedRowBatchCtx(); rbCtx.init(conf, fileSplit); } catch (Exception e) { throw new RuntimeException(e); } }