org.apache.orc.TypeDescription.getMaximumId java code examples

/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
private boolean[] computeProjectionMask() {
  // mask with all fields of the schema
  boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
  // for each selected field
  for (int inIdx : selectedFields) {
    // set all nested fields of a selected field to true
    TypeDescription fieldSchema = schema.getChildren().get(inIdx);
    for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
      projectionMask[i] = true;
    }
  }
  return projectionMask;
}

private static void addColumnToIncludes(TypeDescription child, boolean[] result) {
 for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
  result[col] = true;
 }
}

public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      List<Integer> included) {
 boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
 if (included == null) {
  Arrays.fill(result, true);
  return result;
 }
 result[0] = true;
 List<TypeDescription> children = readerSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  if (included.contains(columnNumber)) {
   TypeDescription child = children.get(columnNumber);
   for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
    result[col] = true;
   }
  }
 }
 return result;
}

TypeDescription child = children.get(columnNumber);
int id = child.getId();
int maxId = child.getMaximumId();
if (id >= included.length || maxId >= included.length) {
 throw new AssertionError("Inconsistent includes: " + included.length

TypeDescription child = children.get(columnNumber);
int id = child.getId();
int maxId = child.getMaximumId();
if (id >= included.length || maxId >= included.length) {
 throw new AssertionError("Inconsistent includes: " + included.length

public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      List<Integer> included,
                      Integer recursiveStruct) {
 boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
 if (included == null) {
  Arrays.fill(result, true);
  return result;
 }
 result[0] = true;
 List<TypeDescription> children = readerSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  if (included.contains(columnNumber)) {
   addColumnToIncludes(children.get(columnNumber), result);
  } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
   // This assumes all struct cols immediately follow struct
   List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren();
   for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) {
    int columnNumberNested = columnNumber + 1 + columnNumberDelta;
    if (included.contains(columnNumberNested)) {
     addColumnToIncludes(nestedChildren.get(columnNumberDelta), result);
    }
   }
  }
 }
 return result;
}

assertEquals(0, writer.getSchema().getMaximumId());
boolean[] expected = new boolean[] {false};
boolean[] included = OrcUtils.includeColumns("", writer.getSchema());

assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, true};
boolean[] included = OrcUtils.includeColumns("string1", schema);

assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] {false, true, false};
boolean[] included = OrcUtils.includeColumns("int1", schema);

assertEquals(23, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, false, false, false,
  false, false, false, false, false,

private void ensureRawDataReader(boolean isOpen) throws IOException {
 ensureOrcReader();
 if (rawDataReader != null) {
  if (!isRawDataReaderOpen && isOpen) {
   long startTime = counters.startTimeCounter();
   rawDataReader.open();
   counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
  }
  return;
 }
 long startTime = counters.startTimeCounter();
 boolean useZeroCopy = (daemonConf != null) && OrcConf.USE_ZEROCOPY.getBoolean(daemonConf);
 rawDataReader = RecordReaderUtils.createDefaultDataReader(
   DataReaderProperties.builder().withBufferSize(orcReader.getCompressionSize())
   .withCompression(orcReader.getCompressionKind())
   .withFileSystem(fs).withPath(path)
   .withTypeCount(orcReader.getSchema().getMaximumId() + 1)
   .withZeroCopy(useZeroCopy)
   .build());
 if (isOpen) {
  rawDataReader.open();
  isRawDataReaderOpen = true;
 }
 counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
}

 sarg.getLeaves(), evolution);
sargColumns = new boolean[evolution.getFileSchema().getMaximumId() + 1];
for (int i : filterColumns) {

assertEquals(5, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, false, false, false, false};
boolean[] included = OrcUtils.includeColumns("", schema);

/**
 * Convert a string with a comma separated list of column ids into the
 * array of boolean that match the schemas.
 * @param schema the schema for the reader
 * @param columnsStr the comma separated list of column ids
 * @return a boolean array
 */
public static boolean[] parseInclude(TypeDescription schema,
                   String columnsStr) {
 if (columnsStr == null ||
   schema.getCategory() != TypeDescription.Category.STRUCT) {
  return null;
 }
 boolean[] result = new boolean[schema.getMaximumId() + 1];
 result[0] = true;
 if (StringUtils.isBlank(columnsStr)) {
  return result;
 }
 List<TypeDescription> types = schema.getChildren();
 for(String idString: columnsStr.split(",")) {
  TypeDescription type = types.get(Integer.parseInt(idString));
  for(int c=type.getId(); c <= type.getMaximumId(); ++c) {
   result[c] = true;
  }
 }
 return result;
}

int numFlattenedCols = schema.getMaximumId();
boolean[] results = new boolean[numFlattenedCols + 1];
if ("*".equals(selectedColumns)) {
  TypeDescription col = findColumn(column, fieldNames, fields);
  if (col != null) {
   for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
    results[i] = true;

private boolean[] populatePpdSafeConversion() {
 if (fileSchema == null || readerSchema == null || readerFileTypes == null) {
  return null;
 }
 boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
 boolean safePpd = validatePPDConversion(fileSchema, readerSchema);
 result[readerSchema.getId()] = safePpd;
 return populatePpdSafeConversionForChildern(result,
   readerSchema.getChildren());
}

@Override
public Metrics metrics() {
 try {
  long rows = writer.getNumberOfRows();
  ColumnStatistics[] stats = writer.getStatistics();
  // we don't currently have columnSizes or distinct counts.
  Map<Integer, Long> valueCounts = new HashMap<>();
  Map<Integer, Long> nullCounts = new HashMap<>();
  Integer[] icebergIds = new Integer[orcSchema.getMaximumId() + 1];
  for(TypeDescription type: columnIds.keySet()) {
   icebergIds[type.getId()] = columnIds.get(type);
  }
  for(int c=1; c < stats.length; ++c) {
   if (icebergIds[c] != null) {
    valueCounts.put(icebergIds[c], stats[c].getNumberOfValues());
   }
  }
  for(TypeDescription child: orcSchema.getChildren()) {
   int c = child.getId();
   if (icebergIds[c] != null) {
    nullCounts.put(icebergIds[c], rows - stats[c].getNumberOfValues());
   }
  }
  return new Metrics(rows, null, valueCounts, nullCounts);
 } catch (IOException e) {
  throw new RuntimeException("Can't get statistics " + path, e);
 }
}

public PhysicalFsWriter(FileSystem fs,
            Path path,
            OrcFile.WriterOptions opts) throws IOException {
 this.path = path;
 long defaultStripeSize = opts.getStripeSize();
 this.addBlockPadding = opts.getBlockPadding();
 if (opts.isEnforceBufferSize()) {
  this.bufferSize = opts.getBufferSize();
 } else {
  this.bufferSize = WriterImpl.getEstimatedBufferSize(defaultStripeSize,
    opts.getSchema().getMaximumId() + 1,
    opts.getBufferSize());
 }
 this.compress = opts.getCompress();
 this.maxPadding = (int) (opts.getPaddingTolerance() * defaultStripeSize);
 this.blockSize = opts.getBlockSize();
 LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
   " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
   compress, bufferSize);
 rawWriter = fs.create(path, opts.getOverwrite(), HDFS_BUFFER_SIZE,
   fs.getDefaultReplication(path), blockSize);
 blockOffset = 0;
 codec = OrcCodecPool.getCodec(compress);
 writer = new OutStream("metadata", bufferSize, codec,
   new DirectStream(rawWriter));
 protobufWriter = CodedOutputStream.newInstance(writer);
 writeVariableLengthBlocks = opts.getWriteVariableLengthBlocks();
 shims = opts.getHadoopShims();
}

buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
int numColumns = schema.getMaximumId() + 1;
this.bufferSize = getEstimatedBufferSize(defaultStripeSize,
  numColumns, opts.getBufferSize());
if (version == OrcFile.Version.V_0_11) {
 this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
} else {
 this.bloomFilterColumns =

private void ensureRawDataReader(boolean isOpen) throws IOException {
 ensureOrcReader();
 if (rawDataReader != null) {
  if (!isRawDataReaderOpen && isOpen) {
   long startTime = counters.startTimeCounter();
   rawDataReader.open();
   counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
  }
  return;
 }
 long startTime = counters.startTimeCounter();
 boolean useZeroCopy = (daemonConf != null) && OrcConf.USE_ZEROCOPY.getBoolean(daemonConf);
 rawDataReader = RecordReaderUtils.createDefaultDataReader(
   DataReaderProperties.builder().withBufferSize(orcReader.getCompressionSize())
   .withCompression(orcReader.getCompressionKind())
   .withFileSystem(fs).withPath(path)
   .withTypeCount(orcReader.getSchema().getMaximumId() + 1)
   .withZeroCopy(useZeroCopy)
   .build());
 if (isOpen) {
  rawDataReader.open();
  isRawDataReaderOpen = true;
 }
 counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
}

Javadoc

Get the maximum id assigned to this type or its children. The first call will cause all of the the ids in tree to be assigned, so it should not be called before the type is completely built.

Popular methods of TypeDescription

createRowBatch
getChildren
Get the subtypes of this type.
getCategory
Get the kind of this type.
getFieldNames
For struct types, get the list of field names.
addField
Add a field to a struct type as it is built.
createString
createTimestamp
createBoolean
createDouble
createLong
createStruct
createInt

Popular in Java

Updating database using SQL prepared statement
notifyDataSetChanged (ArrayAdapter)
getContentResolver (Context)
putExtra (Intent)
String (java.lang)
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
Collectors (java.util.stream)
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
IsNull (org.hamcrest.core)
Is the value null?
Top PhpStorm plugins

How to use getMaximumIdmethodin org.apache.orc.TypeDescription

Best Java code snippets using org.apache.orc.TypeDescription.getMaximumId (Showing top 20 results out of 315)

How to use
getMaximumId
method
in
org.apache.orc.TypeDescription