org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.genIncludedColumns java code examples

@Override
public boolean[] generateFileIncludes(TypeDescription fileSchema) {
 return OrcInputFormat.genIncludedColumns(
   fileSchema, filePhysicalColumnIds, acidStructColumnId);
}

public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      List<Integer> included) {
 return genIncludedColumns(readerSchema, included, null);
}

/**
 * Take the configuration and figure out which columns we need to include.
 * @param readerSchema the types for the reader
 * @param conf the configuration
 */
static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      Configuration conf) {
  if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
  List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
  return genIncludedColumns(readerSchema, included);
 } else {
  return null;
 }
}

/**
 * Take the configuration and figure out which columns we need to include.
 * @param readerSchema the types for the reader
 * @param conf the configuration
 */
public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      Configuration conf) {
  if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
  List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
  return genIncludedColumns(readerSchema, included);
 } else {
  return null;
 }
}

@Override
public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) {
 if (readerSchema == null) {
  readerSchema = fileSchema;
 }
 // TODO: will this work correctly with ACID?
 boolean[] readerIncludes = OrcInputFormat.genIncludedColumns(
   readerSchema, readerLogicalColumnIds);
 Reader.Options options = new Reader.Options(jobConf)
   .include(readerIncludes).includeAcidColumns(includeAcidColumns);
 return new SchemaEvolution(fileSchema, readerSchema, options);
}

public void validateIncludes(OrcProto.Footer footer) throws IOException {
 if (doesSourceHaveIncludes) return; // Irrelevant.
 boolean[] translatedIncludes = columnIds == null ? null : OrcInputFormat.genIncludedColumns(
   OrcUtils.convertTypeFromProtobuf(footer.getTypesList(), 0), columnIds);
 if (translatedIncludes == null) {
  throwIncludesMismatchError(translatedIncludes);
 }
 int len = Math.min(translatedIncludes.length, writerIncludes.length);
 for (int i = 0; i < len; ++i) {
  // Translated includes may be a superset of writer includes due to cache.
  if (!translatedIncludes[i] && writerIncludes[i]) {
   throwIncludesMismatchError(translatedIncludes);
  }
 }
 if (translatedIncludes.length < writerIncludes.length) {
  for (int i = len; i < writerIncludes.length; ++i) {
   if (writerIncludes[i]) {
    throwIncludesMismatchError(translatedIncludes);
   }
  }
 }
}

static Reader.Options createOptionsForReader(Configuration conf) {
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema =
   OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
 Reader.Options readerOptions = new Reader.Options(conf).schema(schema);
 // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription.
 final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema);
 readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf));
 //todo: last param is bogus. why is this hardcoded?
 OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true);
 return readerOptions;
}

static Reader.Options createOptionsForReader(Configuration conf) {
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema =
   OrcInputFormat.getDesiredRowTypeDescr(conf, true, Integer.MAX_VALUE);
 Reader.Options readerOptions = new Reader.Options().schema(schema);
 // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription.
 final List<OrcProto.Type> schemaTypes = OrcUtils.getOrcTypes(schema);
 readerOptions.include(OrcInputFormat.genIncludedColumns(schema, conf));
 OrcInputFormat.setSearchArgument(readerOptions, schemaTypes, conf, true);
 return readerOptions;
}

consumer.setUseDecimal64ColumnVectors(useDecimal64ColumnVectors);
this.schema = schema;
this.writerIncludes = OrcInputFormat.genIncludedColumns(schema, columnIds);
SchemaEvolution evolution = new SchemaEvolution(schema, null,
  new Reader.Options(jobConf).include(writerIncludes));

Reader.Options readerOptions = new Reader.Options(context.conf);
if (readerTypes == null) {
 readerIncluded = genIncludedColumns(fileSchema, context.conf);
 evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded));
} else {
 readerIncluded = genIncludedColumns(readerSchema, context.conf);
 evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded));
 if (!isOriginal) {

Reader.Options readerOptions = new Reader.Options(context.conf);
if (readerTypes == null) {
 readerIncluded = genIncludedColumns(fileSchema, context.conf);
 evolution = new SchemaEvolution(fileSchema, null, readerOptions.include(readerIncluded));
} else {
 readerIncluded = genIncludedColumns(readerSchema, context.conf);
 evolution = new SchemaEvolution(fileSchema, readerSchema, readerOptions.include(readerIncluded));
 if (!isOriginal) {

this.length = fileSplit.getLength();
options.range(offset, length);
options.include(OrcInputFormat.genIncludedColumns(schema, conf));
OrcInputFormat.setSearchArgument(options, types, conf, true);

public static RecordReader createReaderFromFile(Reader file,
                        Configuration conf,
                        long offset, long length
                        ) throws IOException {
 if (AcidUtils.isFullAcidScan(conf)) {
  raiseAcidTablesMustBeReadWithAcidReaderException(conf);
 }
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);
 Reader.Options options = new Reader.Options(conf).range(offset, length);
 options.schema(schema);
 boolean isOriginal = isOriginal(file);
 if (schema == null) {
  schema = file.getSchema();
 }
 List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
 options.include(genIncludedColumns(schema, conf));
 setSearchArgument(options, types, conf, isOriginal);
 return file.rowsOptions(options, conf);
}

this.length = fileSplit.getLength();
options.range(offset, length);
options.include(OrcInputFormat.genIncludedColumns(schema, conf));
OrcInputFormat.setSearchArgument(options, types, conf, true);

public static RecordReader createReaderFromFile(Reader file,
                        Configuration conf,
                        long offset, long length
                        ) throws IOException {
 boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
 if (isTransactionalTableScan) {
  raiseAcidTablesMustBeReadWithAcidReaderException(conf);
 }
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);
 Reader.Options options = new Reader.Options().range(offset, length);
 options.schema(schema);
 boolean isOriginal = isOriginal(file);
 if (schema == null) {
  schema = file.getSchema();
 }
 List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
 options.include(genIncludedColumns(schema, conf));
 setSearchArgument(options, types, conf, isOriginal);
 return file.rowsOptions(options);
}

@Override
public boolean[] generateFileIncludes(TypeDescription fileSchema) {
 return OrcInputFormat.genIncludedColumns(
   fileSchema, filePhysicalColumnIds, acidStructColumnId);
}

/**
 * Take the configuration and figure out which columns we need to include.
 * @param types the types for the file
 * @param conf the configuration
 * @param isOriginal is the file in the original format?
 */
public static boolean[] genIncludedColumns(
  List<OrcProto.Type> types, Configuration conf, boolean isOriginal) {
  if (!ColumnProjectionUtils.isReadAllColumns(conf)) {
  List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
  return genIncludedColumns(types, included, isOriginal);
 } else {
  return null;
 }
}

@Override
public SchemaEvolution createSchemaEvolution(TypeDescription fileSchema) {
 if (readerSchema == null) {
  readerSchema = fileSchema;
 }
 // TODO: will this work correctly with ACID?
 boolean[] readerIncludes = OrcInputFormat.genIncludedColumns(
   readerSchema, readerLogicalColumnIds);
 Reader.Options options = new Reader.Options(jobConf).include(readerIncludes);
 return new SchemaEvolution(fileSchema, readerSchema, options);
}

public static RecordReader createReaderFromFile(Reader file,
                        Configuration conf,
                        long offset, long length
                        ) throws IOException {
 Reader.Options options = new Reader.Options().range(offset, length);
 boolean isOriginal = isOriginal(file);
 List<OrcProto.Type> types = file.getTypes();
 options.include(genIncludedColumns(types, conf, isOriginal));
 setSearchArgument(options, types, conf, isOriginal);
 return file.rowsOptions(options);
}

VectorizedOrcRecordReader(Reader file, Configuration conf,
  FileSplit fileSplit) throws IOException {
 List<OrcProto.Type> types = file.getTypes();
 Reader.Options options = new Reader.Options();
 this.offset = fileSplit.getStart();
 this.length = fileSplit.getLength();
 options.range(offset, length);
 options.include(OrcInputFormat.genIncludedColumns(types, conf, true));
 OrcInputFormat.setSearchArgument(options, types, conf, true);
 this.reader = file.rowsOptions(options);
 try {
  rbCtx = new VectorizedRowBatchCtx();
  rbCtx.init(conf, fileSplit);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

Javadoc

Take the configuration and figure out which columns we need to include.

Popular methods of OrcInputFormat

<init>
createReaderFromFile
generateSplitsInfo
getDesiredRowTypeDescr
getInputPaths
Get the list of input Paths for the map-reduce job.
getReader
getRecordReader
getRootColumn
Get the root column for the row. In ACID format files, it is offset by the extra metadata columns.
getSargColumnNames
isOriginal
setSearchArgument
cancelFutures

Popular in Java

Creating JSON documents from java classes using gson
getSupportFragmentManager (FragmentActivity)
setRequestProperty (URLConnection)
scheduleAtFixedRate (ScheduledExecutorService)
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top PhpStorm plugins

How to use genIncludedColumnsmethodin org.apache.hadoop.hive.ql.io.orc.OrcInputFormat

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.genIncludedColumns (Showing top 20 results out of 315)

How to use
genIncludedColumns
method
in
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat