private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
int childCapacity = capacity; if (type instanceof ArrayType) { childType = ((ArrayType)type).elementType(); } else { childType = DataTypes.ByteType;
int childCapacity = capacity; if (type instanceof ArrayType) { childType = ((ArrayType)type).elementType(); } else { childType = DataTypes.ByteType;
int childCapacity = capacity; if (type instanceof ArrayType) { childType = ((ArrayType)type).elementType(); } else { childType = DataTypes.ByteType;
Object elementValue = toRowValue(obj, arrayType.elementType(), elementPath); if (elementValue == null && !arrayType.containsNull()) { throw new IllegalArgumentException("Null value is not allowed for array element at " + elementPath);
Object elementValue = toRowValue(obj, arrayType.elementType(), elementPath); if (elementValue == null && !arrayType.containsNull()) { throw new IllegalArgumentException("Null value is not allowed for array element at " + elementPath);
@Override public Type list(Types.ListType list, Supplier<Type> elementResult) { Preconditions.checkArgument(current instanceof ArrayType, "Not an array: %s", current); ArrayType array = (ArrayType) current; Preconditions.checkArgument(array.containsNull() || !list.isElementOptional(), "Cannot project an array of optional elements as required elements: %s", array); this.current = array.elementType(); try { Type elementType = elementResult.get(); if (list.elementType() == elementType) { return list; } // must be a projected element type, create a new list if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementType); } else { return Types.ListType.ofRequired(list.elementId(), elementType); } } finally { this.current = array; } }
@Override public Type list(Types.ListType list, Supplier<Type> elementResult) { Preconditions.checkArgument(current instanceof ArrayType, "Not an array: %s", current); ArrayType array = (ArrayType) current; Preconditions.checkArgument(array.containsNull() || !list.isElementOptional(), "Cannot project an array of optional elements as required elements: %s", array); this.current = array.elementType(); try { Type elementType = elementResult.get(); if (list.elementType() == elementType) { return list; } // must be a projected element type, create a new list if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementType); } else { return Types.ListType.ofRequired(list.elementId(), elementType); } } finally { this.current = array; } }
public static String exportSchemaToJson(StructType dfSchema) { //This would contain column name along with type of a dataframe List<Field> schema = new ArrayList<>(); for (StructField field : dfSchema.fields()) { if (field.dataType() instanceof StringType) { schema.add(new Field(field.name(), STRING)); } else if (field.dataType() instanceof BooleanType) { schema.add(new Field(field.name(), BOOLEAN)); } else if (field.dataType() instanceof VectorUDT) { schema.add(new Field(field.name(), DOUBLE_ARRAY)); } else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType || field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) { schema.add(new Field(field.name(), DOUBLE)); } else if (field.dataType() instanceof ArrayType) { if(((ArrayType)field.dataType()).elementType() instanceof StringType) { schema.add(new Field(field.name(), STRING_ARRAY)); }else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) { schema.add(new Field(field.name(), DOUBLE_ARRAY)); }else { throw new UnsupportedOperationException("Cannot support data of type " + field.dataType()); } } else { throw new UnsupportedOperationException("Cannot support data of type " + field.dataType()); } } return gson.toJson(schema); }
public static String exportToJson(Set<String> columns, StructType dfSchema) { //This would contain column name along with type of a dataframe List<Field> schema = new ArrayList<>(); for (String column : columns) { StructField field = dfSchema.fields()[ dfSchema.fieldIndex(column) ]; if (field.dataType() instanceof StringType) { schema.add(new Field(field.name(), STRING)); } else if (field.dataType() instanceof BooleanType) { schema.add(new Field(field.name(), BOOLEAN)); } else if (field.dataType() instanceof VectorUDT) { schema.add(new Field(field.name(), DOUBLE_ARRAY)); } else if (field.dataType() instanceof DoubleType || field.dataType() instanceof DecimalType || field.dataType() instanceof FloatType || field.dataType() instanceof IntegerType || field.dataType() instanceof LongType || field.dataType() instanceof ShortType) { schema.add(new Field(field.name(), DOUBLE)); } else if (field.dataType() instanceof ArrayType) { if(((ArrayType)field.dataType()).elementType() instanceof StringType) { schema.add(new Field(field.name(), STRING_ARRAY)); }else if(((ArrayType)field.dataType()).elementType() instanceof DoubleType) { schema.add(new Field(field.name(), DOUBLE_ARRAY)); }else { throw new UnsupportedOperationException("Cannot support data of type " + field.dataType()); } } else { throw new UnsupportedOperationException("Cannot support data of type " + field.dataType()); } } return gson.toJson(schema); }
if (arrayType.elementType() == DataTypes.ByteType) { return Schema.of(Schema.Type.BYTES); Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter); return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
static <T> T visit(DataType type, SparkTypeVisitor<T> visitor) { if (type instanceof StructType) { StructField[] fields = ((StructType) type).fields(); List<T> fieldResults = Lists.newArrayListWithExpectedSize(fields.length); for (StructField field : fields) { fieldResults.add(visitor.field( field, visit(field.dataType(), visitor))); } return visitor.struct((StructType) type, fieldResults); } else if (type instanceof MapType) { return visitor.map((MapType) type, visit(((MapType) type).keyType(), visitor), visit(((MapType) type).valueType(), visitor)); } else if (type instanceof ArrayType) { return visitor.array( (ArrayType) type, visit(((ArrayType) type).elementType(), visitor)); } else if (type instanceof UserDefinedType){ throw new UnsupportedOperationException( "User-defined types are not supported"); } else { return visitor.atomic(type); } }
if (arrayType.elementType() == DataTypes.ByteType) { return Schema.of(Schema.Type.BYTES); Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter); return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
if (arrayType.elementType() == DataTypes.ByteType) { return Schema.of(Schema.Type.BYTES); Schema componentSchema = dataTypeToSchema(arrayType.elementType(), recordCounter); return Schema.arrayOf(arrayType.containsNull() ? Schema.nullableOf(componentSchema) : componentSchema);
case MESSAGE: Row childRow = RowFactory.create(buildRowValues(field.getMessageType(), (Message) child, (StructType) arrayType.elementType()).toArray()); childValues.add(childRow); break; case ENUM: childValues.add(RowUtils.toRowValue(((Descriptors.EnumValueDescriptor) child).getName(), arrayType.elementType())); break; case GROUP: throw new IllegalStateException("GROUP type not permitted"); default: childValues.add(RowUtils.toRowValue(child, arrayType.elementType()));
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
int childCapacity = capacity; if (type instanceof ArrayType) { childType = ((ArrayType)type).elementType(); } else { childType = DataTypes.ByteType;
case "array": ArrayType arrayType = (ArrayType) dataType; typeSchema = typeBuilder.array().items(typeFor(arrayType.elementType(), arrayType.containsNull(), recordCount)); break; case "map":