private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) { final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null); final VectorContainer input = new VectorContainer(bufferAllocator); final VarBinaryVector binaryVector = input.addOrGet(field); binaryVector.allocateNew(); final SortedSet<Binary> values = new TreeSet<>(); for (Dictionary dictionary : dictionaries) { for (int i = 0; i <= dictionary.getMaxId(); ++i) { values.add(dictionary.decodeToBinary(i)); } } if (existingDict != null) { final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector(); for (int i = 0; i < existingDict.getRecordCount(); ++i) { values.add(Binary.fromConstantByteArray(existingDictValues.get(i))); } } final Iterator<Binary> iter = values.iterator(); int recordCount = 0; while (iter.hasNext()) { final byte[] data = iter.next().getBytes(); binaryVector.setSafe(recordCount++, data, 0, data.length); } binaryVector.setValueCount(recordCount); input.setRecordCount(recordCount); input.buildSchema(BatchSchema.SelectionVectorMode.NONE); return input; }
final Field field2 = new Field(SchemaPath.getSimplePath("c1").getAsUnescapedPath(), true, new ArrowType.Binary(), null); final VarBinaryVector binaryVector = dict2.addOrGet(field2); binaryVector.allocateNew();
/** * Create a field given the input {@link ColumnType} * and name * @param name the name of the field * @param columnType the column type to add * @return */ public static Field getFieldForColumn(String name,ColumnType columnType) { switch(columnType) { case Long: return field(name,new ArrowType.Int(64,false)); case Integer: return field(name,new ArrowType.Int(32,false)); case Double: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)); case Float: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)); case Boolean: return field(name, new ArrowType.Bool()); case Categorical: return field(name,new ArrowType.Utf8()); case Time: return field(name,new ArrowType.Date(DateUnit.MILLISECOND)); case Bytes: return field(name,new ArrowType.Binary()); case NDArray: return field(name,new ArrowType.Binary()); case String: return field(name,new ArrowType.Utf8()); default: throw new IllegalArgumentException("Column type invalid " + columnType); } }
return new ArrowType.Binary();
return new Field(name, true, new Binary(), null); case DECIMAL: { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) pTypeInfo;