columns[9] = true; // text column rows = reader.rowsOptions(new Reader.Options() .range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2) .include(columns)); rows.seekToRow(lastRowOfStripe2); for(int i = 0; i < 2; ++i) {
Reader.Options options = new Reader.Options().schema(schema); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true);
Path[] deltaDirectory, Options mergerOptions) throws IOException { this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validWriteIdList = validWriteIdList; ReaderKey baseKey = new ReaderKey(); if (isOriginal) { options = options.clone(); if(mergerOptions.isCompacting()) { assert mergerOptions.isMajorCompaction(); Reader.Options deltaEventOptions = eventOptions.clone() .searchArgument(null, null).range(0, Long.MAX_VALUE); for(Path delta: deltaDirectory) { if(!mergerOptions.isCompacting() && !AcidUtils.isDeleteDelta(delta)) {
protected RecordReaderImpl(ReaderImpl fileReader, Reader.Options options) throws IOException { this.writerVersion = fileReader.getWriterVersion(); if (options.getSchema() == null) { if (LOG.isInfoEnabled()) { LOG.info("Reader schema not provided -- using file schema " + options.getSchema(), options); if (LOG.isDebugEnabled() && evolution.hasConversion()) { LOG.debug("ORC file " + fileReader.path.toString() + " has data type conversion --\n" + "reader schema: " + options.getSchema().toString() + "\n" + "file schema: " + fileReader.getSchema()); this.ignoreNonUtf8BloomFilter = OrcConf.IGNORE_NON_UTF8_BLOOM_FILTERS.getBoolean(fileReader.conf); SearchArgument sarg = options.getSearchArgument(); if (sarg != null && rowIndexStride != 0) { sargApp = new SargApplier(sarg, long offset = options.getOffset(); long maxOffset = options.getMaxOffset(); for(StripeInformation stripe: fileReader.getStripes()) { long stripeStart = stripe.getOffset(); Boolean zeroCopy = options.getUseZeroCopy(); if (zeroCopy == null) { zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf);
public SchemaEvolution(TypeDescription fileSchema, TypeDescription readerSchema, Reader.Options options) { boolean allowMissingMetadata = options.getTolerateMissingSchema(); boolean[] includedCols = options.getInclude(); this.isSchemaEvolutionCaseAware=options.getIsSchemaEvolutionCaseAware(); this.readerIncluded = includedCols == null ? null : Arrays.copyOf(includedCols, includedCols.length); this.fileSchema = fileSchema; isAcid = checkAcidSchema(fileSchema); includeAcidColumns = options.getIncludeAcidColumns(); this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0; if (readerSchema != null) { new TypeDescription[this.readerSchema.getMaximumId() + 1]; int positionalLevels = 0; if (options.getForcePositionalEvolution()) { positionalLevels = isAcid ? 2 : 1; buildConversion(fileSchema, this.readerSchema, positionalLevels);
this.writerIncludes = OrcInputFormat.genIncludedColumns(schema, columnIds); SchemaEvolution evolution = new SchemaEvolution(schema, null, new Reader.Options(jobConf).include(writerIncludes)); consumer.setSchemaEvolution(evolution);
if (hillviewSchema != null) hillviewDesc = hillviewSchema.getColumnDescriptions(); boolean[] include = options.getInclude(); List<IAppendableColumn> toCreate = new ArrayList<IAppendableColumn>();
result = Table.createLazyTable(desc, (int)rowCount, this.filename, lazyLoader); } else { Reader.Options options = new Reader.Options(); List<IAppendableColumn> cols = readColumns(reader, options, this.hillviewSchema); this.close(null);
TypeDescription.fromString(OrcConf.MAPRED_INPUT_SCHEMA.getString(conf)); Reader.Options options = reader.options() .range(start, length) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); if (schema != null) { options.schema(schema); } else { schema = reader.getSchema(); options.include(parseInclude(schema, OrcConf.INCLUDE_COLUMNS.getString(conf))); String kryoSarg = OrcConf.KRYO_SARG.getString(conf); SearchArgument sarg = new Kryo().readObject(new Input(sargBytes), SearchArgumentImpl.class); options.searchArgument(sarg, sargColumns.split(","));
final Reader.Options options = new Reader.Options(); long offset = fSplit.getStart(); long length = fSplit.getLength(); options.schema(fSplit.isOriginal() ? hiveReader.getSchema() : hiveReader.getSchema().getChildren().get(TRANS_ROW_COLUMN_INDEX)); options.range(offset, length); boolean[] include = OrcInputFormat.genIncludedColumns(types, jobConf, fSplit.isOriginal()); options.include(include); options.zeroCopyPoolShim(new HiveORCZeroCopyShim(context.getAllocator())); final ORCScanFilter orcScanFilter = (ORCScanFilter) filter; final SearchArgument sarg = orcScanFilter.getSarg(); options.searchArgument(sarg, OrcInputFormat.getSargColumnNames(selectedColNames, types, options.getInclude(), fSplit.isOriginal()));
public OrcIterator build() { Preconditions.checkNotNull(schema, "Schema is required"); try { Path path = new Path(file.location()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); ColumnIdMap columnIds = new ColumnIdMap(); TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds); Reader.Options options = reader.options(); if (start != null) { options.range(start, length); } options.schema(orcSchema); return new OrcIterator(path, orcSchema, reader.rows(options)); } catch (IOException e) { throw new RuntimeException("Can't open " + file.location(), e); } } }
@Override public List<IColumn> loadColumns(List<String> names) { try { boolean[] toRead = OrcFileLoader.this.project(names); Reader.Options options = new Reader.Options(); options = options.include(toRead); Reader reader = OrcFile.createReader(new Path(filename), OrcFile.readerOptions(OrcFileLoader.this.conf)); List<IAppendableColumn> result = readColumns( reader, options, OrcFileLoader.this.hillviewSchema); return Linq.map(result, e -> e); } catch (IOException e) { throw new RuntimeException(e); } } }
public void internalInit(InputSplit inputSplit, JobConf jobConf, ValueVector[] vectors) throws IOException { if (filter != null && filter instanceof ORCScanFilter) { final ORCScanFilter scanFilter = (ORCScanFilter) filter; jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, jobConf.get(serdeConstants.LIST_COLUMNS)); jobConf.set(ConvertAstToSearchArg.SARG_PUSHDOWN, scanFilter.getKryoBase64EncodedFilter()); } final Reader.Options options = new Reader.Options() .zeroCopyPoolShim(new HiveORCZeroCopyShim(context.getAllocator())); reader = ((OrcInputFormat)jobConf.getInputFormat()).getRecordReader(inputSplit, jobConf, Reporter.NULL, options); if(logger.isTraceEnabled()) { logger.trace("hive reader created: {} for inputSplit {}", reader.getClass().getName(), inputSplit.toString()); } this.key = reader.createKey(); this.value = reader.createValue(); }
public OrcRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); this.offset = split.getOffset(index); this.end = offset + split.getLength(index); final Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(context.getConfiguration())); final Reader.Options options = new Reader.Options(); options.range(offset, split.getLength(index)); in = reader.rows(options); schema = reader.getSchema(); this.batch = schema.createRowBatch(); rowInBatch = 0; this.row = (OrcStruct) OrcStruct.createValue(schema); }
@Override public Options options() { return new Options(conf); }
@Override public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) { if (readerSchema == null) { readerSchema = fileSchema; } // TODO: will this work correctly with ACID? boolean[] readerIncludes = OrcInputFormat.genIncludedColumns( readerSchema, readerLogicalColumnIds); Reader.Options options = new Reader.Options(jobConf).include(readerIncludes); return new SchemaEvolution(fileSchema, readerSchema, options); }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
public OrcMapreduceRecordReader(Reader fileReader, Reader.Options options) throws IOException { this.batchReader = fileReader.rows(options); if (options.getSchema() == null) { schema = fileReader.getSchema(); } else { schema = options.getSchema(); } this.batch = schema.createRowBatch(); rowInBatch = 0; this.row = (V) OrcStruct.createValue(schema); }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getOption(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getOption(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
protected OrcMapredRecordReader(Reader fileReader, Reader.Options options) throws IOException { this.batchReader = fileReader.rows(options); if (options.getSchema() == null) { schema = fileReader.getSchema(); } else { schema = options.getSchema(); } this.batch = schema.createRowBatch(); rowInBatch = 0; }