this.writerIncludes = OrcInputFormat.genIncludedColumns(schema, columnIds); SchemaEvolution evolution = new SchemaEvolution(schema, null, new Reader.Options(jobConf).include(writerIncludes)); consumer.setSchemaEvolution(evolution);
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "url,cost"); options.include(new boolean[]{true, true, false, true, false}); OrcInputFormat.setSearchArgument(options, types, conf, false); String[] colNames = options.getColumnNames();
if (readerTypes == null) { readerIncluded = genIncludedColumns(fileSchema, context.conf); evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded)); } else { evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded)); if (!isOriginal) {
this.length = fileSplit.getLength(); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true);
this.length = fileSplit.getLength(); options.range(offset, length); options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true);
fs.create(root); ReaderPair pair = new OrcRawRecordMerger.OriginalReaderPairToRead(key, reader, BUCKET, minKey, maxKey, new Reader.Options().include(includes), new OrcRawRecordMerger.Options().rootPath(root), conf, new ValidReaderWriteIdList(), 0); RecordReader recordReader = pair.getRecordReader();
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN); if (isTransactionalTableScan) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options().range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options); }
/** * Convert from the row include/sarg/columnNames to the event equivalent * for the underlying file. * @param options options for the row reader * @return a cloned options object that is modified for the event reader */ static Reader.Options createEventOptions(Reader.Options options) { Reader.Options result = options.clone(); result.range(options.getOffset(), Long.MAX_VALUE); result.include(options.getInclude()); // slide the column names down by 6 for the name array if (options.getColumnNames() != null) { String[] orig = options.getColumnNames(); String[] cols = new String[orig.length + OrcRecordUpdater.FIELDS]; for(int i=0; i < orig.length; ++i) { cols[i + OrcRecordUpdater.FIELDS] = orig[i]; } result.searchArgument(options.getSearchArgument(), cols); } return result; }
/** * Convert from the row include/sarg/columnNames to the event equivalent * for the underlying file. * @param options options for the row reader * @param rowSchema schema of the row, excluding ACID columns * @return a cloned options object that is modified for the event reader */ static Reader.Options createEventOptions(Reader.Options options, TypeDescription rowSchema) { Reader.Options result = options.clone(); result.include(options.getInclude()); // slide the column names down by 6 for the name array if (options.getColumnNames() != null) { String[] orig = options.getColumnNames(); String[] cols = new String[orig.length + OrcRecordUpdater.FIELDS]; for(int i=0; i < orig.length; ++i) { cols[i + OrcRecordUpdater.FIELDS] = orig[i]; } result.searchArgument(options.getSearchArgument(), cols); } // schema evolution will insert the acid columns to row schema for ACID read result.schema(rowSchema); return result; }
public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { if (AcidUtils.isFullAcidScan(conf)) { raiseAcidTablesMustBeReadWithAcidReaderException(conf); } /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE); Reader.Options options = new Reader.Options(conf).range(offset, length); options.schema(schema); boolean isOriginal = isOriginal(file); if (schema == null) { schema = file.getSchema(); } List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); return file.rowsOptions(options, conf); }
options.include(computeProjectionMask());
@Override public RecordReader rows(boolean[] include) throws IOException { return rowsOptions(new Options().include(include)); }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options().schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
@Override public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length) .searchArgument(sarg, columnNames)); }
@Override public RecordReader rows(long offset, long length, boolean[] include ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length)); }
static Reader.Options createOptionsForReader(Configuration conf) { /** * Do we have schema on read in the configuration variables? */ TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE); Reader.Options readerOptions = new Reader.Options(conf).schema(schema); // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema); readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf)); //todo: last param is bogus. why is this hardcoded? OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true); return readerOptions; }
@Override public RecordReader rows(boolean[] include) throws IOException { return rowsOptions(new Options().include(include)); }
@Override public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length) .searchArgument(sarg, columnNames)); }
@Override public RecordReader rows(long offset, long length, boolean[] include ) throws IOException { return rowsOptions(new Options().include(include).range(offset, length)); }
@Override public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) { if (readerSchema == null) { readerSchema = fileSchema; } // TODO: will this work correctly with ACID? boolean[] readerIncludes = OrcInputFormat.genIncludedColumns( readerSchema, readerLogicalColumnIds); Reader.Options options = new Reader.Options(jobConf) .include(readerIncludes).includeAcidColumns(includeAcidColumns); return new SchemaEvolution(fileSchema, readerSchema, options); }