org.apache.arrow.vector.types.pojo.Schema.getFields java code examples

public VectorSchemaRoot(Schema schema, List<FieldVector> fieldVectors, int rowCount) {
 if (schema.getFields().size() != fieldVectors.size()) {
  throw new IllegalArgumentException("Fields must match field vectors. Found " +
    fieldVectors.size() + " vectors and " + schema.getFields().size() + " fields");
 }
 this.schema = schema;
 this.rowCount = rowCount;
 this.fieldVectors = fieldVectors;
 for (int i = 0; i < schema.getFields().size(); ++i) {
  Field field = schema.getFields().get(i);
  FieldVector vector = fieldVectors.get(i);
  fieldVectorsMap.put(field.getName(), vector);
 }
}

public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) {
 List<FieldVector> fieldVectors = new ArrayList<>();
 for (Field field : schema.getFields()) {
  FieldVector vector = field.createVector(allocator);
  fieldVectors.add(vector);
 }
 if (fieldVectors.size() != schema.getFields().size()) {
  throw new IllegalArgumentException("The root vector did not create the right number of children. found " +
    fieldVectors.size() + " expected " + schema.getFields().size());
 }
 return new VectorSchemaRoot(schema, fieldVectors, 0);
}

public static TypedFieldId getFieldId(Schema schema, SchemaPath path, boolean isHyper){
 int i = 0;
 for (Field f : schema.getFields()) {
  TypedFieldId id = getFieldId(f, i, path, isHyper);
  if (id != null) {
   return id;
  }
  i++;
 }
 return null;
}

/**
 * Search for a field by name in this Schema.
 *
 * @param name the name of the field to return
 * @return the corresponding field
 * @throws IllegalArgumentException if the field was not found
 */
public Field findField(String name) {
 return findField(getFields(), name);
}

/**
 * Convert an {@link org.apache.arrow.vector.types.pojo.Schema}
 * to a datavec {@link Schema}
 * @param schema the input arrow schema
 * @return the equivalent datavec schema
 */
public static Schema toDatavecSchema(org.apache.arrow.vector.types.pojo.Schema schema) {
  Schema.Builder schemaBuilder = new Schema.Builder();
  for (int i = 0; i < schema.getFields().size(); i++) {
    schemaBuilder.addColumn(metaDataFromField(schema.getFields().get(i)));
  }
  return schemaBuilder.build();
}

 public String contentToTSVString() {
  StringBuilder sb = new StringBuilder();
  List<Object> row = new ArrayList<>(schema.getFields().size());
  for (Field field : schema.getFields()) {
   row.add(field.getName());
  }
  printRow(sb, row);
  for (int i = 0; i < rowCount; i++) {
   row.clear();
   for (FieldVector v : fieldVectors) {
    row.add(v.getObject(i));
   }
   printRow(sb, row);
  }
  return sb.toString();
 }
}

public void start(Schema schema, DictionaryProvider provider) throws IOException {
 List<Field> fields = new ArrayList<>(schema.getFields().size());
 Set<Long> dictionaryIdsUsed = new HashSet<>();
 this.schema = schema;  // Store original Schema to ensure batches written match
 // Convert fields with dictionaries to have dictionary type
 for (Field field : schema.getFields()) {
  fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
 }
 Schema updatedSchema = new Schema(fields, schema.getCustomMetadata());
 generator.writeStartObject();
 generator.writeObjectField("schema", updatedSchema);
 // Write all dictionaries that were used
 if (!dictionaryIdsUsed.isEmpty()) {
  writeDictionaryBatches(generator, dictionaryIdsUsed, provider);
 }
 // Start writing of record batches
 generator.writeArrayFieldStart("batches");
}

public ExpandableHyperContainer(BufferAllocator allocator, Schema schema, BitSet isKeyBits) {
 super(allocator);
 this.isKeyBits = isKeyBits;
 int i=0;
 for(Field f : schema.getFields()){
  /* If the bit is not set, the corresponding field will be added to hyper container,
   * otherwise the field will be ignored.
   */
  if (!this.isKeyBits.get(i)) {
   this.addEmptyHyper(f);
  }
  i ++;
 }
 this.buildSchema(SelectionVectorMode.FOUR_BYTE);
}

public static CompleteType deserialize(byte[] bytes) {
 Schema schema = Schema.getRootAsSchema(ByteBuffer.wrap(bytes));
 org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema);
 return CompleteType.fromField(s.getFields().get(0));
}

public ExpandableHyperContainer(BufferAllocator allocator, Schema schema) {
 super(allocator);
 // Add all key fields for VECTORIZED_BIGINT mode
 this.isKeyBits = null;
 int i=0;
 for(Field f : schema.getFields()){
  this.addEmptyHyper(f);
 }
 this.buildSchema(SelectionVectorMode.FOUR_BYTE);
}

public void addSchema(Schema schema){
 clearSchema();
 for(Field f : schema.getFields()) {
  addOrGet(f);
 }
}

public ParquetRecordMaterializer(OutputMutator mutator, ComplexWriter complexWriter, MessageType schema,
                   Collection<SchemaPath> columns, OptionManager options, Schema arrowSchema,
                   SchemaDerivationHelper schemaHelper) {
 this.complexWriter = complexWriter;
 root = new StructGroupConverter(
   mutator,
   complexWriter.rootAsStruct(),
   schema,
   columns,
   options,
   arrowSchema == null ? null : arrowSchema.getFields(),
   schemaHelper
 );
}

public static BatchSchema deserialize(byte[] bytes) {
 Schema schema = Schema.getRootAsSchema(ByteBuffer.wrap(bytes));
 org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema);
 return new BatchSchema(SelectionVectorMode.NONE, s.getFields());
}

private void writeBatch(VectorSchemaRoot recordBatch) throws IOException {
 generator.writeStartObject();
 {
  generator.writeObjectField("count", recordBatch.getRowCount());
  generator.writeArrayFieldStart("columns");
  for (Field field : recordBatch.getSchema().getFields()) {
   FieldVector vector = recordBatch.getVector(field.getName());
   writeFromVectorIntoJson(field, vector);
  }
  generator.writeEndArray();
 }
 generator.writeEndObject();
}

/**
 * Maps a Parquet and Arrow Schema
 * For now does not validate primitive type compatibility
 * @param arrowSchema an Arrow schema
 * @param parquetSchema a Parquet message type
 * @return the mapping between the 2
 */
public SchemaMapping map(Schema arrowSchema, MessageType parquetSchema) {
 List<TypeMapping> children = map(arrowSchema.getFields(), parquetSchema.getFields());
 return new SchemaMapping(arrowSchema, parquetSchema, children);
}

public static VectorContainer create(BufferAllocator allocator, Schema schema){
 VectorContainer container = new VectorContainer(allocator);
 for (Field field : schema.getFields()) {
  container.addOrGet(field);
 }
 container.buildSchema(SelectionVectorMode.NONE);
 return container;
}

public static String[] getColumnsLowerCase(DatasetConfig datasetConfig) {
 final ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig);
 if (schemaBytes != null) {
  Schema schema = Schema.getRootAsSchema(schemaBytes.asReadOnlyByteBuffer());
  org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema);
  return s.getFields().stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new);
 } else {
  // If virtual dataset was created with view fields
  if (datasetConfig.getType() == DatasetType.VIRTUAL_DATASET) {
   final List<ViewFieldType> viewFieldTypes = datasetConfig.getVirtualDataset().getSqlFieldsList();
   if (notEmpty(viewFieldTypes)) {
    return viewFieldTypes.stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new);
   }
  }
 }
 return new String[0];
}

public static BatchSchema deserialize(ByteString bytes) {
 Schema schema = Schema.getRootAsSchema(bytes.asReadOnlyByteBuffer());
 org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema);
 return new BatchSchema(SelectionVectorMode.NONE, s.getFields());
}

/**
 * Creates a Parquet Schema from an Arrow one and returns the mapping
 * @param arrowSchema the provided Arrow Schema
 * @return the mapping between the 2
 */
public SchemaMapping fromArrow(Schema arrowSchema) {
 List<Field> fields = arrowSchema.getFields();
 List<TypeMapping> parquetFields = fromArrow(fields);
 MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root");
 return new SchemaMapping(arrowSchema, parquetType, parquetFields);
}

public boolean read(VectorSchemaRoot root) throws IOException {
 JsonToken t = parser.nextToken();
 if (t == START_OBJECT) {
  {
   int count = readNextField("count", Integer.class);
   root.setRowCount(count);
   nextFieldIs("columns");
   readToken(START_ARRAY);
   {
    for (Field field : root.getSchema().getFields()) {
     FieldVector vector = root.getVector(field.getName());
     readFromJsonIntoVector(field, vector);
    }
   }
   readToken(END_ARRAY);
  }
  readToken(END_OBJECT);
  return true;
 } else if (t == END_ARRAY) {
  root.setRowCount(0);
  return false;
 } else {
  throw new IllegalArgumentException("Invalid token: " + t);
 }
}

Popular methods of Schema

<init>
convertSchema
getSchema
findField
Search for a field by name in given the list of fields.
toJson
equals
getCustomMetadata
hashCode

Popular in Java

Reading from database using SQL prepared statement
addToBackStack (FragmentTransaction)
runOnUiThread (Activity)
getExternalFilesDir (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top Vim plugins

How to use getFieldsmethodin org.apache.arrow.vector.types.pojo.Schema

Best Java code snippets using org.apache.arrow.vector.types.pojo.Schema.getFields (Showing top 20 results out of 315)

How to use
getFields
method
in
org.apache.arrow.vector.types.pojo.Schema