AvroDeserializer deserializer = new AvroDeserializer(); oiGenerator = new AvroObjectInspectorGenerator(rs); deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs); } catch (SerDeException se) {
private Object worker(Object datum, Schema fileSchema, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { if (datum == null) { return null; } // Avro requires nullable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if (AvroSerdeUtils.isNullableType(recordSchema)) { recordSchema = AvroSerdeUtils.getOtherTypeFromNullableType(recordSchema); } if (fileSchema != null && AvroSerdeUtils.isNullableType(fileSchema)) { fileSchema = AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema); } switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, fileSchema, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, fileSchema, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, fileSchema, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, fileSchema, recordSchema, (MapTypeInfo) columnType); case PRIMITIVE: return deserializePrimitive(datum, fileSchema, recordSchema, (PrimitiveTypeInfo) columnType); default: throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory()); } }
private Object deserializeMap(Object datum, Schema fileSchema, Schema mapSchema, MapTypeInfo columnType) throws AvroSerdeException { // Avro only allows maps with Strings for keys, so we only have to worry // about deserializing the values Map<String, Object> map = new HashMap<String, Object>(); Map<CharSequence, Object> mapDatum = (Map)datum; Schema valueSchema = mapSchema.getValueType(); TypeInfo valueTypeInfo = columnType.getMapValueTypeInfo(); for (CharSequence key : mapDatum.keySet()) { Object value = mapDatum.get(key); map.put(key.toString(), worker(value, fileSchema == null ? null : fileSchema.getValueType(), valueSchema, valueTypeInfo)); } return map; }
private Object deserializeStruct(GenericData.Record datum, Schema fileSchema, StructTypeInfo columnType) throws AvroSerdeException { // No equivalent Java type for the backing structure, need to recurse and build a list ArrayList<TypeInfo> innerFieldTypes = columnType.getAllStructFieldTypeInfos(); ArrayList<String> innerFieldNames = columnType.getAllStructFieldNames(); List<Object> innerObjectRow = new ArrayList<Object>(innerFieldTypes.size()); return workerBase(innerObjectRow, fileSchema, innerFieldNames, innerFieldTypes, datum); }
@Override public Object deserialize(Writable writable) throws SerDeException { if(badSchema) { throw new BadSchemaException(); } return avroDeserializer.deserialize(columnNames, columnTypes, writable, schema); }
this.avroDeserializer = new AvroDeserializer();
workerBase(row, fileSchema, columnNames, columnTypes, r); return row;
@Override public Object deserialize(Writable writable) throws SerDeException { if(badSchema) { throw new BadSchemaException(); } return avroDeserializer.deserialize(columnNames, columnTypes, writable, schema); }
private AvroDeserializer getDeserializer() { if(avroDeserializer == null) { avroDeserializer = new AvroDeserializer(); } return avroDeserializer; }
private Object worker(Object datum, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { // Klaxon! Klaxon! Klaxon! // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(recordSchema)) return deserializeNullableUnion(datum, recordSchema, columnType); if(columnType == TypeInfoFactory.stringTypeInfo) return datum.toString(); // To workaround AvroUTF8 // This also gets us around the Enum issue since we just take the value // and convert it to a string. Yay! switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, recordSchema, (MapTypeInfo) columnType); default: return datum; // Simple type. } }
private List<Object> workerBase(List<Object> objectRow, Schema fileSchema, List<String> columnNames, List<TypeInfo> columnTypes, GenericRecord record) throws AvroSerdeException { for(int i = 0; i < columnNames.size(); i++) { TypeInfo columnType = columnTypes.get(i); String columnName = columnNames.get(i); Object datum = record.get(columnName); Schema datumSchema = record.getSchema().getField(columnName).schema(); Schema.Field field = AvroSerdeUtils.isNullableType(fileSchema)?AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema).getField(columnName):fileSchema.getField(columnName); objectRow.add(worker(datum, field == null ? null : field.schema(), datumSchema, columnType)); } return objectRow; }
AvroDeserializer deserializer = new AvroDeserializer(); oiGenerator = new AvroObjectInspectorGenerator(rs); deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs); } catch (SerDeException se) {
private Object deserializeStruct(GenericData.Record datum, Schema fileSchema, StructTypeInfo columnType) throws AvroSerdeException { // No equivalent Java type for the backing structure, need to recurse and build a list ArrayList<TypeInfo> innerFieldTypes = columnType.getAllStructFieldTypeInfos(); ArrayList<String> innerFieldNames = columnType.getAllStructFieldNames(); List<Object> innerObjectRow = new ArrayList<Object>(innerFieldTypes.size()); return workerBase(innerObjectRow, fileSchema, innerFieldNames, innerFieldTypes, datum); }
@Override public Object deserialize(Writable writable) throws SerDeException { if(badSchema) throw new BadSchemaException(); return getDeserializer().deserialize(columnNames, columnTypes, writable, schema); }
private AvroDeserializer getDeserializer() { if(avroDeserializer == null) { avroDeserializer = new AvroDeserializer(); } return avroDeserializer; }
private Object worker(Object datum, Schema fileSchema, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { // Klaxon! Klaxon! Klaxon! // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(recordSchema)) { return deserializeNullableUnion(datum, fileSchema, recordSchema); } switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, fileSchema, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, fileSchema, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, fileSchema, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, fileSchema, recordSchema, (MapTypeInfo) columnType); case PRIMITIVE: return deserializePrimitive(datum, fileSchema, recordSchema, (PrimitiveTypeInfo) columnType); default: throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory()); } }
private Object deserializeUnion(Object datum, Schema fileSchema, Schema recordSchema, UnionTypeInfo columnType) throws AvroSerdeException { // Calculate tags individually since the schema can evolve and can have different tags. In worst case, both schemas are same // and we would end up doing calculations twice to get the same tag int fsTag = GenericData.get().resolveUnion(fileSchema, datum); // Determine index of value from fileSchema int rsTag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value from recordSchema Object desered = worker(datum, fileSchema == null ? null : fileSchema.getTypes().get(fsTag), recordSchema.getTypes().get(rsTag), columnType.getAllUnionObjectTypeInfos().get(rsTag)); return new StandardUnionObjectInspector.StandardUnion((byte)rsTag, desered); }
AvroDeserializer deserializer = new AvroDeserializer(); oiGenerator = new AvroObjectInspectorGenerator(rs); deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs); } catch (SerDeException se) {
private Object deserializeStruct(GenericData.Record datum, StructTypeInfo columnType) throws AvroSerdeException { // No equivalent Java type for the backing structure, need to recurse and build a list ArrayList<TypeInfo> innerFieldTypes = columnType.getAllStructFieldTypeInfos(); ArrayList<String> innerFieldNames = columnType.getAllStructFieldNames(); List<Object> innerObjectRow = new ArrayList<Object>(innerFieldTypes.size()); return workerBase(innerObjectRow, innerFieldNames, innerFieldTypes, datum); }
@Override public Object deserialize(Writable writable) throws SerDeException { if(badSchema) { throw new BadSchemaException(); } return getDeserializer().deserialize(columnNames, columnTypes, writable, schema); }