this.avroSerializer = new AvroSerializer(); this.avroDeserializer = new AvroDeserializer();
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { // Avro only allows maps with string keys if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) { throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString()); } ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector(); TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo(); TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo(); Map<?,?> map = fieldOI.getMap(structFieldData); Schema valueType = schema.getValueType(); Map<Object, Object> deserialized = new LinkedHashMap<Object, Object>(fieldOI.getMapSize(structFieldData)); for (Map.Entry<?, ?> entry : map.entrySet()) { deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType)); } return deserialized; }
return serializeEnum(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); case MAP: assert fieldOI instanceof MapObjectInspector; assert typeInfo instanceof MapTypeInfo; return serializeMap((MapTypeInfo) typeInfo, (MapObjectInspector) fieldOI, structFieldData, schema); case LIST: assert fieldOI instanceof ListObjectInspector; assert typeInfo instanceof ListTypeInfo; return serializeList((ListTypeInfo) typeInfo, (ListObjectInspector) fieldOI, structFieldData, schema); case UNION: assert fieldOI instanceof UnionObjectInspector; assert typeInfo instanceof UnionTypeInfo; return serializeUnion((UnionTypeInfo) typeInfo, (UnionObjectInspector) fieldOI, structFieldData, schema); case STRUCT: assert fieldOI instanceof StructObjectInspector; assert typeInfo instanceof StructTypeInfo; return serializeStruct((StructTypeInfo) typeInfo, (StructObjectInspector) fieldOI, structFieldData, schema); default: throw new AvroSerdeException("Ran out of TypeInfo Categories: " + typeInfo.getCategory());
@Override public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException { if(badSchema) { throw new BadSchemaException(); } return avroSerializer.serialize(o, objectInspector, columnNames, columnTypes, schema); }
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema)); }
private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { if(isTransformedType(schema)) return serializeTransformedType(typeInfo, fieldOI, structFieldData, schema); List<?> list = fieldOI.getList(structFieldData); List<Object> deserialized = new ArrayList<Object>(list.size()); TypeInfo listElementTypeInfo = typeInfo.getListElementTypeInfo(); ObjectInspector listElementObjectInspector = fieldOI.getListElementObjectInspector(); Schema elementType = schema.getElementType(); for(int i = 0; i < list.size(); i++) { deserialized.add(i, serialize(listElementTypeInfo, listElementObjectInspector, list.get(i), elementType)); } return deserialized; }
private Object serializeUnion(UnionTypeInfo typeInfo, UnionObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { byte tag = fieldOI.getTag(structFieldData); // Invariant that Avro's tag ordering must match Hive's. return serialize(typeInfo.getAllUnionObjectTypeInfos().get(tag), fieldOI.getObjectInspectors().get(tag), fieldOI.getField(structFieldData), schema.getTypes().get(tag)); }
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema)); }
private Object serialize(TypeInfo typeInfo, ObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { switch(typeInfo.getCategory()) { case PRIMITIVE: assert fieldOI instanceof PrimitiveObjectInspector; return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData); case MAP: assert fieldOI instanceof MapObjectInspector; assert typeInfo instanceof MapTypeInfo; return serializeMap((MapTypeInfo) typeInfo, (MapObjectInspector) fieldOI, structFieldData, schema); case LIST: assert fieldOI instanceof ListObjectInspector; assert typeInfo instanceof ListTypeInfo; return serializeList((ListTypeInfo) typeInfo, (ListObjectInspector) fieldOI, structFieldData, schema); case UNION: assert fieldOI instanceof UnionObjectInspector; assert typeInfo instanceof UnionTypeInfo; return serializeUnion((UnionTypeInfo) typeInfo, (UnionObjectInspector) fieldOI, structFieldData, schema); case STRUCT: assert fieldOI instanceof StructObjectInspector; assert typeInfo instanceof StructTypeInfo; return serializeStruct((StructTypeInfo) typeInfo, (StructObjectInspector) fieldOI, structFieldData, schema); default: throw new AvroSerdeException("Ran out of TypeInfo Categories: " + typeInfo.getCategory()); } }
private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException { int size = schema.getFields().size(); List<? extends StructField> allStructFieldRefs = ssoi.getAllStructFieldRefs(); List<Object> structFieldsDataAsList = ssoi.getStructFieldsDataAsList(o); GenericData.Record record = new GenericData.Record(schema); ArrayList<TypeInfo> allStructFieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); for(int i = 0; i < size; i++) { Field field = schema.getFields().get(i); TypeInfo colTypeInfo = allStructFieldTypeInfos.get(i); StructField structFieldRef = allStructFieldRefs.get(i); Object structFieldData = structFieldsDataAsList.get(i); ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector(); Object val = serialize(colTypeInfo, fieldOI, structFieldData, field.schema()); record.put(field.name(), val); } return record; }
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { // Avro only allows maps with string keys if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) { throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString()); } ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector(); TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo(); TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo(); Map<?,?> map = fieldOI.getMap(structFieldData); Schema valueType = schema.getValueType(); Map<Object, Object> deserialized = new HashMap<Object, Object>(fieldOI.getMapSize(structFieldData)); for (Map.Entry<?, ?> entry : map.entrySet()) { deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType)); } return deserialized; }
private AvroSerializer getSerializer() { if(avroSerializer == null) { avroSerializer = new AvroSerializer(); } return avroSerializer; } }
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema)); }
return serializeEnum(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); case MAP: assert fieldOI instanceof MapObjectInspector; assert typeInfo instanceof MapTypeInfo; return serializeMap((MapTypeInfo) typeInfo, (MapObjectInspector) fieldOI, structFieldData, schema); case LIST: assert fieldOI instanceof ListObjectInspector; assert typeInfo instanceof ListTypeInfo; return serializeList((ListTypeInfo) typeInfo, (ListObjectInspector) fieldOI, structFieldData, schema); case UNION: assert fieldOI instanceof UnionObjectInspector; assert typeInfo instanceof UnionTypeInfo; return serializeUnion((UnionTypeInfo) typeInfo, (UnionObjectInspector) fieldOI, structFieldData, schema); case STRUCT: assert fieldOI instanceof StructObjectInspector; assert typeInfo instanceof StructTypeInfo; return serializeStruct((StructTypeInfo) typeInfo, (StructObjectInspector) fieldOI, structFieldData, schema); default: throw new AvroSerdeException("Ran out of TypeInfo Categories: " + typeInfo.getCategory());
private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { List<?> list = fieldOI.getList(structFieldData); List<Object> deserialized = new GenericData.Array<Object>(list.size(), schema); TypeInfo listElementTypeInfo = typeInfo.getListElementTypeInfo(); ObjectInspector listElementObjectInspector = fieldOI.getListElementObjectInspector(); Schema elementType = schema.getElementType(); for(int i = 0; i < list.size(); i++) { deserialized.add(i, serialize(listElementTypeInfo, listElementObjectInspector, list.get(i), elementType)); } return deserialized; }
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { // Avro only allows maps with string keys if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString()); ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector(); TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo(); TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo(); Map<?,?> map = fieldOI.getMap(structFieldData); Schema valueType = schema.getValueType(); Map<Object, Object> deserialized = new Hashtable<Object, Object>(fieldOI.getMapSize(structFieldData)); for (Map.Entry<?, ?> entry : map.entrySet()) { deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), null), // This works, but is a bit fragile. Construct a single String schema? serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType)); } return deserialized; }
private AvroSerializer getSerializer() { if(avroSerializer == null) { avroSerializer = new AvroSerializer(); } return avroSerializer; } }
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema)); }
return serializeEnum(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema); case MAP: assert fieldOI instanceof MapObjectInspector; assert typeInfo instanceof MapTypeInfo; return serializeMap((MapTypeInfo) typeInfo, (MapObjectInspector) fieldOI, structFieldData, schema); case LIST: assert fieldOI instanceof ListObjectInspector; assert typeInfo instanceof ListTypeInfo; return serializeList((ListTypeInfo) typeInfo, (ListObjectInspector) fieldOI, structFieldData, schema); case UNION: assert fieldOI instanceof UnionObjectInspector; assert typeInfo instanceof UnionTypeInfo; return serializeUnion((UnionTypeInfo) typeInfo, (UnionObjectInspector) fieldOI, structFieldData, schema); case STRUCT: assert fieldOI instanceof StructObjectInspector; assert typeInfo instanceof StructTypeInfo; return serializeStruct((StructTypeInfo) typeInfo, (StructObjectInspector) fieldOI, structFieldData, schema); default: throw new AvroSerdeException("Ran out of TypeInfo Categories: " + typeInfo.getCategory());
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException { StructObjectInspector soi = (StructObjectInspector) objectInspector; GenericData.Record record = new GenericData.Record(schema); List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs(); if(outputFieldRefs.size() != columnNames.size()) { throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size()); } int size = schema.getFields().size(); if(outputFieldRefs.size() != size) { throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() +", Avro expected " + schema.getFields().size()); } List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs(); List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o); for(int i = 0; i < size; i++) { Field field = schema.getFields().get(i); TypeInfo typeInfo = columnTypes.get(i); StructField structFieldRef = allStructFieldRefs.get(i); Object structFieldData = structFieldsDataAsList.get(i); ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector(); Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema()); record.put(field.name(), val); } if(!GenericData.get().validate(schema, record)) { throw new SerializeToAvroException(schema, record); } cache.setRecord(record); return cache; }