org.apache.orc.TypeDescription.getChildren java code examples

public static TypeDescription[] genIncludedTypes(TypeDescription fileSchema,
  List<Integer> included, Integer recursiveStruct) {
 TypeDescription[] result = new TypeDescription[included.size()];
 List<TypeDescription> children = fileSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  int indexInBatchCols = included.indexOf(columnNumber);
  if (indexInBatchCols >= 0) {
   result[indexInBatchCols] = children.get(columnNumber);
  } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
   // This assumes all struct cols immediately follow struct
   List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren();
   for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) {
    int columnNumberNested = columnNumber + 1 + columnNumberDelta;
    int nestedIxInBatchCols = included.indexOf(columnNumberNested);
    if (nestedIxInBatchCols >= 0) {
     result[nestedIxInBatchCols] = nestedChildren.get(columnNumberDelta);
    }
   }
  }
 }
 return result;
}

public ListColumnConverter(TypeDescription schema) {
  childrenConverter = createConverter(schema.getChildren().get(0));
}

/**
 * Fills an ORC batch into an array of Row.
 *
 * @param rows The batch of rows need to be filled.
 * @param schema The schema of the ORC data.
 * @param batch The ORC data.
 * @param selectedFields The list of selected ORC fields.
 * @return The number of rows that were filled.
 */
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {
  int rowsToRead = Math.min((int) batch.count(), rows.length);
  List<TypeDescription> fieldTypes = schema.getChildren();
  // read each selected field
  for (int fieldIdx = 0; fieldIdx < selectedFields.length; fieldIdx++) {
    int orcIdx = selectedFields[fieldIdx];
    readField(rows, fieldIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], rowsToRead);
  }
  return rowsToRead;
}

public StructColumnConverter(TypeDescription schema) {
  List<TypeDescription> kids = schema.getChildren();
  childrenConverters = new JsonConverter[kids.size()];
  for (int c = 0; c < childrenConverters.length; ++c) {
    childrenConverters[c] = createConverter(kids.get(c));
  }
  fieldNames = schema.getFieldNames();
}

/**
 * Creates an OrcRowInputFormat.
 *
 * @param path The path to read ORC files from.
 * @param orcSchema The schema of the ORC files as ORC TypeDescription.
 * @param orcConfig The configuration to read the ORC files with.
 * @param batchSize The number of Row objects to read in a batch.
 */
public OrcRowInputFormat(String path, TypeDescription orcSchema, Configuration orcConfig, int batchSize) {
  super(new Path(path));
  // configure OrcRowInputFormat
  this.schema = orcSchema;
  this.rowType = (RowTypeInfo) OrcBatchReader.schemaToTypeInfo(schema);
  this.conf = orcConfig;
  this.batchSize = batchSize;
  // set default selection mask, i.e., all fields.
  this.selectedFields = new int[this.schema.getChildren().size()];
  for (int i = 0; i < selectedFields.length; i++) {
    this.selectedFields[i] = i;
  }
}

public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      List<Integer> included,
                      Integer recursiveStruct) {
 boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
 if (included == null) {
  Arrays.fill(result, true);
  return result;
 }
 result[0] = true;
 List<TypeDescription> children = readerSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  if (included.contains(columnNumber)) {
   addColumnToIncludes(children.get(columnNumber), result);
  } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
   // This assumes all struct cols immediately follow struct
   List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren();
   for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) {
    int columnNumberNested = columnNumber + 1 + columnNumberDelta;
    if (included.contains(columnNumberNested)) {
     addColumnToIncludes(nestedChildren.get(columnNumberDelta), result);
    }
   }
  }
 }
 return result;
}

private static void readNonNullStructColumn(Object[] vals, int fieldIdx, StructColumnVector structVector, TypeDescription schema, int childCount) {
  List<TypeDescription> childrenTypes = schema.getChildren();
  int numFields = childrenTypes.size();
  // create a batch of Rows to read the structs
  Row[] structs = new Row[childCount];
  // TODO: possible improvement: reuse existing Row objects
  for (int i = 0; i < childCount; i++) {
    structs[i] = new Row(numFields);
  }
  // read struct fields
  // we don't have to handle isRepeating because ORC assumes that it is propagated into the children.
  for (int i = 0; i < numFields; i++) {
    readField(structs, i, childrenTypes.get(i), structVector.fields[i], childCount);
  }
  if (fieldIdx == -1) { // set struct as an object
    System.arraycopy(structs, 0, vals, 0, childCount);
  } else { // set struct as a field of Row
    Row[] rows = (Row[]) vals;
    for (int i = 0; i < childCount; i++) {
      rows[i].setField(fieldIdx, structs[i]);
    }
  }
}

  return Row.class;
case LIST:
  Class<?> childClass = getClassForType(schema.getChildren().get(0));
  return Array.newInstance(childClass, 0).getClass();
case MAP:

/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
private boolean[] computeProjectionMask() {
  // mask with all fields of the schema
  boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
  // for each selected field
  for (int inIdx : selectedFields) {
    // set all nested fields of a selected field to true
    TypeDescription fieldSchema = schema.getChildren().get(inIdx);
    for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
      projectionMask[i] = true;
    }
  }
  return projectionMask;
}

  return OrcBatchReader::copyBinary;
case STRUCT:
  List<TypeDescription> fieldTypes = schema.getChildren();
  Function<Object, Object>[] copyFields = new Function[fieldTypes.size()];
  for (int i = 0; i < fieldTypes.size(); i++) {
  TypeDescription entryType = schema.getChildren().get(0);
  Function<Object, Object> copyEntry = getCopyFunction(entryType);
  Class entryClass = getClassForType(entryType);
  return new CopyList(copyEntry, entryClass);
case MAP:
  TypeDescription keyType = schema.getChildren().get(0);
  TypeDescription valueType = schema.getChildren().get(1);
  Function<Object, Object> copyKey = getCopyFunction(keyType);
  Function<Object, Object> copyValue = getCopyFunction(valueType);

private static void setList(JSONWriter writer, ListColumnVector vector,
    TypeDescription schema, int row) throws JSONException {
  writer.array();
  int offset = (int) vector.offsets[row];
  TypeDescription childType = schema.getChildren().get(0);
  for (int i = 0; i < vector.lengths[row]; ++i) {
    setValue(writer, vector.child, childType, offset + i);
  }
  writer.endArray();
}

public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                      List<Integer> included) {
 boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
 if (included == null) {
  Arrays.fill(result, true);
  return result;
 }
 result[0] = true;
 List<TypeDescription> children = readerSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  if (included.contains(columnNumber)) {
   TypeDescription child = children.get(columnNumber);
   for(int col = child.getId(); col <= child.getMaximumId(); ++col) {
    result[col] = true;
   }
  }
 }
 return result;
}

  return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
case STRUCT:
  List<TypeDescription> fieldSchemas = schema.getChildren();
  TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()];
  for (int i = 0; i < fieldSchemas.size(); i++) {
  return new RowTypeInfo(fieldTypes, fieldNames);
case LIST:
  TypeDescription elementSchema = schema.getChildren().get(0);
  TypeInformation<?> elementType = schemaToTypeInfo(elementSchema);
  TypeDescription keySchema = schema.getChildren().get(0);
  TypeDescription valSchema = schema.getChildren().get(1);
  TypeInformation<?> keyType = schemaToTypeInfo(keySchema);
  TypeInformation<?> valType = schemaToTypeInfo(valSchema);

  private static void setStruct(JSONWriter writer, StructColumnVector batch,
      TypeDescription schema, int row) throws JSONException {
    writer.object();
    List<String> fieldNames = schema.getFieldNames();
    List<TypeDescription> fieldTypes = schema.getChildren();
    for (int i = 0; i < fieldTypes.size(); ++i) {
      writer.key(fieldNames.get(i));
      setValue(writer, batch.fields[i], fieldTypes.get(i), row);
    }
    writer.endObject();
  }
}

private static void readNonNullListColumn(Object[] vals, int fieldIdx, ListColumnVector list, TypeDescription schema, int childCount) {
  TypeDescription fieldType = schema.getChildren().get(0);

static OrcUnion nextUnion(ColumnVector vector,
             int row,
             TypeDescription schema,
             Object previous) {
 if (vector.isRepeating) {
  row = 0;
 }
 if (vector.noNulls || !vector.isNull[row]) {
  OrcUnion result;
  List<TypeDescription> childrenTypes = schema.getChildren();
  if (previous == null || previous.getClass() != OrcUnion.class) {
   result = new OrcUnion();
  } else {
   result = (OrcUnion) previous;
  }
  UnionColumnVector union = (UnionColumnVector) vector;
  byte tag = (byte) union.tags[row];
  result.set(tag, nextValue(union.fields[tag], row, childrenTypes.get(tag),
    result.getObject()));
  return result;
 } else {
  return null;
 }
}

public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec)
    throws IOException {
  Configuration conf = new Configuration();
  Path path = new Path(logFilePath.getLogFilePath());
  schema = schemaProvider.getSchema(logFilePath.getTopic(),
      logFilePath);
  List<TypeDescription> fieldTypes = schema.getChildren();
  converters = new JsonConverter[fieldTypes.size()];
  for (int c = 0; c < converters.length; ++c) {
    converters[c] = VectorColumnFiller.createConverter(fieldTypes
        .get(c));
  }
  writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
      .compress(resolveCompression(codec)).setSchema(schema));
  batch = schema.createRowBatch();
}

public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
    TypeDescription schema, int row) throws JSONException {
  if (schema.getCategory() == TypeDescription.Category.STRUCT) {
    List<TypeDescription> fieldTypes = schema.getChildren();
    List<String> fieldNames = schema.getFieldNames();
    writer.object();
    for (int c = 0; c < batch.cols.length; ++c) {
      writer.key(fieldNames.get(c));
      setValue(writer, batch.cols[c], fieldTypes.get(c), row);
    }
    writer.endObject();
  } else {
    setValue(writer, batch.cols[0], schema, row);
  }
}

private static void readNonNullMapColumn(Object[] vals, int fieldIdx, MapColumnVector mapsVector, TypeDescription schema, int childCount) {
  List<TypeDescription> fieldType = schema.getChildren();
  TypeDescription keyType = fieldType.get(0);
  TypeDescription valueType = fieldType.get(1);

@Override
public Object next(Object previous) throws IOException {
 if (!ensureBatch()) {
  return null;
 }
 if (schema.getCategory() == TypeDescription.Category.STRUCT) {
  OrcStruct result;
  List<TypeDescription> children = schema.getChildren();
  int numberOfChildren = children.size();
  if (previous == null || previous.getClass() != OrcStruct.class) {
   result = new OrcStruct(numberOfChildren);
   previous = result;
  } else {
   result = (OrcStruct) previous;
   if (result.getNumFields() != numberOfChildren) {
    result.setNumFields(numberOfChildren);
   }
  }
  for(int i=0; i < numberOfChildren; ++i) {
   result.setFieldValue(i, nextValue(batch.cols[i], rowInBatch,
     children.get(i), result.getFieldValue(i)));
  }
 } else {
  previous = nextValue(batch.cols[0], rowInBatch, schema, previous);
 }
 rowInBatch += 1;
 return previous;
}

Javadoc

Get the subtypes of this type.

Popular methods of TypeDescription

createRowBatch
getCategory
Get the kind of this type.
getFieldNames
For struct types, get the list of field names.
addField
Add a field to a struct type as it is built.
createString
createTimestamp
createBoolean
createDouble
createLong
createStruct
getMaximumId
Get the maximum id assigned to this type or its children. The first call will cause all of the the i
createInt

Popular in Java

Parsing JSON documents to java classes using gson
setContentView (Activity)
getContentResolver (Context)
onCreateOptionsMenu (Activity)
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Notification (javax.management)
JLabel (javax.swing)
JTable (javax.swing)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top plugins for WebStorm

How to use getChildrenmethodin org.apache.orc.TypeDescription

Best Java code snippets using org.apache.orc.TypeDescription.getChildren (Showing top 20 results out of 315)

How to use
getChildren
method
in
org.apache.orc.TypeDescription