private void verifySchemaIsARecord(Schema schema) throws SerDeException { if(!schema.getType().equals(Schema.Type.RECORD)) { throw new AvroSerdeException("Schema for table must be of type RECORD. " + "Received type: " + schema.getType()); } }
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
private static TypeInfo generateRecordTypeInfo(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { assert schema.getType().equals(Schema.Type.RECORD); if (seenSchemas == null) { seenSchemas = Collections.newSetFromMap(new IdentityHashMap<Schema, Boolean>()); } else if (seenSchemas.contains(schema)) { throw new AvroSerdeException( "Recursive schemas are not supported. Recursive schema was " + schema .getFullName()); } seenSchemas.add(schema); List<Schema.Field> fields = schema.getFields(); List<String> fieldNames = new ArrayList<String>(fields.size()); List<TypeInfo> typeInfos = new ArrayList<TypeInfo>(fields.size()); for(int i = 0; i < fields.size(); i++) { fieldNames.add(i, fields.get(i).name()); typeInfos.add(i, generateTypeInfo(fields.get(i).schema(), seenSchemas)); } return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); }
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
if (columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty() ) { throw new AvroSerdeException(EXCEPTION_MESSAGE); return schema; } else if(schemaString.equals(SCHEMA_NONE)) { throw new AvroSerdeException(EXCEPTION_MESSAGE); throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe); } catch (URISyntaxException urie) { throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie);
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException { StructObjectInspector soi = (StructObjectInspector) objectInspector; GenericData.Record record = new GenericData.Record(schema); List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs(); if(outputFieldRefs.size() != columnNames.size()) { throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size()); } int size = schema.getFields().size(); if(outputFieldRefs.size() != size) { throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() +", Avro expected " + schema.getFields().size()); } List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs(); List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o); for(int i = 0; i < size; i++) { Field field = schema.getFields().get(i); TypeInfo typeInfo = columnTypes.get(i); StructField structFieldRef = allStructFieldRefs.get(i); Object structFieldData = structFieldsDataAsList.get(i); ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector(); Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema()); record.put(field.name(), val); } if(!GenericData.get().validate(schema, record)) { throw new SerializeToAvroException(schema, record); } cache.setRecord(record); return cache; }
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
public GenericRecord reencode(GenericRecord r) throws AvroSerdeException { baos.reset(); BinaryEncoder be = EncoderFactory.get().directBinaryEncoder(baos, null); gdw.setSchema(r.getSchema()); try { gdw.write(r, be); ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); binaryDecoder = DecoderFactory.defaultFactory().createBinaryDecoder(bais, binaryDecoder); return gdr.read(r, binaryDecoder); } catch (IOException e) { throw new AvroSerdeException("Exception trying to re-encode record to new schema", e); } } }
/** * Attempt to determine the schema via the usual means, but do not throw * an exception if we fail. Instead, signal failure via a special * schema. This is used because Hive calls init on the serde during * any call, including calls to update the serde properties, meaning * if the serde is in a bad state, there is no way to update that state. */ public Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { try { configErrors = ""; return AvroSerdeUtils.determineSchemaOrThrowException(conf, props); } catch(AvroSerdeException he) { LOG.warn("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem", he); configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + "signal schema to indicate problem: " + he.getMessage()); return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } catch (Exception e) { LOG.warn("Encountered exception determining schema. Returning signal " + "schema to indicate problem", e); configErrors = new String("Encountered exception determining schema. Returning signal " + "schema to indicate problem: " + e.getMessage()); return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; } }
return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum); } else { throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType()); throw new AvroSerdeException("File schema is missing for decimal field. Reader schema is " + columnType); scale = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).asInt(); } catch(Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex); case CHAR: if (fileSchema == null) { throw new AvroSerdeException("File schema is missing for char field. Reader schema is " + columnType); maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex); case VARCHAR: if (fileSchema == null) { throw new AvroSerdeException("File schema is missing for varchar field. Reader schema is " + columnType); maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex); case DATE: if (recordSchema.getType() != Type.INT) { throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
Writable writable, Schema readerSchema) throws AvroSerdeException { if(!(writable instanceof AvroGenericRecordWritable)) { throw new AvroSerdeException("Expecting a AvroGenericRecordWritable");
private static TypeInfo generateTypeInfoWorker(Schema schema, Set<Schema> seenSchemas) throws AvroSerdeException { // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if(AvroSerdeUtils.isNullableType(schema)) { return generateTypeInfo( AvroSerdeUtils.getOtherTypeFromNullableType(schema), seenSchemas); } Schema.Type type = schema.getType(); if(primitiveTypeToTypeInfo.containsKey(type)) { return primitiveTypeToTypeInfo.get(type); } switch(type) { case RECORD: return generateRecordTypeInfo(schema, seenSchemas); case MAP: return generateMapTypeInfo(schema, seenSchemas); case ARRAY: return generateArrayTypeInfo(schema, seenSchemas); case UNION: return generateUnionTypeInfo(schema, seenSchemas); case ENUM: return generateEnumTypeInfo(schema); default: throw new AvroSerdeException("Do not yet support: " + schema); } }
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException { // Avro only allows maps with string keys if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) { throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString()); } ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector(); ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector(); TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo(); TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo(); Map<?,?> map = fieldOI.getMap(structFieldData); Schema valueType = schema.getValueType(); Map<Object, Object> deserialized = new LinkedHashMap<Object, Object>(fieldOI.getMapSize(structFieldData)); for (Map.Entry<?, ?> entry : map.entrySet()) { deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType)); } return deserialized; }
scale = schema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).getIntValue(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); HiveDecimalUtils.validateParameter(precision, scale); } catch (Exception ex) { throw new AvroSerdeException("Invalid precision or scale for decimal type", ex); maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); maxLength = schema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt(); } catch (Exception ex) { throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex);
throw new AvroSerdeException("Don't yet support this type: " + ti); break; default: throw new AvroSerdeException("No Hive categories matched: " + ti);
return fixed; } else { throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + schema.getType()); return timestamp.toEpochMilli(); case UNKNOWN: throw new AvroSerdeException("Received UNKNOWN primitive category."); case VOID: return null;
return serializeStruct((StructTypeInfo) typeInfo, (StructObjectInspector) fieldOI, structFieldData, schema); default: throw new AvroSerdeException("Ran out of TypeInfo Categories: " + typeInfo.getCategory());
private Object worker(Object datum, Schema fileSchema, Schema recordSchema, TypeInfo columnType) throws AvroSerdeException { if (datum == null) { return null; } // Avro requires nullable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if (AvroSerdeUtils.isNullableType(recordSchema)) { recordSchema = AvroSerdeUtils.getOtherTypeFromNullableType(recordSchema); } if (fileSchema != null && AvroSerdeUtils.isNullableType(fileSchema)) { fileSchema = AvroSerdeUtils.getOtherTypeFromNullableType(fileSchema); } switch(columnType.getCategory()) { case STRUCT: return deserializeStruct((GenericData.Record) datum, fileSchema, (StructTypeInfo) columnType); case UNION: return deserializeUnion(datum, fileSchema, recordSchema, (UnionTypeInfo) columnType); case LIST: return deserializeList(datum, fileSchema, recordSchema, (ListTypeInfo) columnType); case MAP: return deserializeMap(datum, fileSchema, recordSchema, (MapTypeInfo) columnType); case PRIMITIVE: return deserializePrimitive(datum, fileSchema, recordSchema, (PrimitiveTypeInfo) columnType); default: throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory()); } }
private void verifySchemaIsARecord(Schema schema) throws SerDeException { if(!schema.getType().equals(Schema.Type.RECORD)) { throw new AvroSerdeException("Schema for table must be of type RECORD. " + "Received type: " + schema.getType()); } }