/** * Validates and returns the underlying {@link LogicalType} of the given {@link Schema.Field}. * * @param field field with logical type * @param allowedTypes acceptable logical types * @return the underlying {@link LogicalType} for the field * @throws UnexpectedFormatException if the field provided does not have a {@link LogicalType} that is * one of the acceptable types */ @Nullable private static LogicalType validateAndGetLogicalType(Schema.Field field, Set<LogicalType> allowedTypes) { if (field == null) { return null; } String fieldName = field.getName(); LogicalType logicalType = getLogicalType(field.getSchema(), allowedTypes); if (logicalType == null) { throw new UnexpectedFormatException(String.format("Field %s does not have a logical type.", fieldName)); } if (!allowedTypes.contains(logicalType)) { throw new UnexpectedFormatException(String.format("Field %s must be of logical type %s, instead it is of type %s", fieldName, allowedTypes, logicalType)); } return logicalType; }
/** * Sets the date value for {@link LogicalType#DATE} field * * @param fieldName name of the field to set * @param localDate value for the field * @return this builder * @throws UnexpectedFormatException if the field is not in the schema, or the field is not nullable but a null * value is given or if the provided date is an invalid date */ public Builder setDate(String fieldName, @Nullable LocalDate localDate) { validateAndGetLogicalType(validateAndGetField(fieldName, localDate), EnumSet.of(LogicalType.DATE)); if (localDate == null) { fields.put(fieldName, null); return this; } try { fields.put(fieldName, Math.toIntExact(localDate.toEpochDay())); } catch (ArithmeticException e) { // Highest integer is 2,147,483,647 which is Jan 1 2038. throw new UnexpectedFormatException(String.format("Field %s was set to a date that is too large." + "Valid date should be below Jan 1 2038", fieldName)); } return this; }
fields.put(fieldName, millis); } catch (ArithmeticException e) { throw new UnexpectedFormatException(String.format("Field %s was set to a time that is too large.", fieldName));
private int addNextLogEntry(String data, int start, List<String> parts) { int end = -1; if (data.charAt(start) == '"') { // Find the closing '"' and extract values within start = start + 1; end = findNext(data, start, '"'); } else if (data.charAt(start) == '[') { // find the closing ']' and extract values start = start + 1; end = findNext(data, start, ']'); } else { // find the next ' ' and extract values end = findNext(data, start + 1, ' '); } if (end == -1) { throw new UnexpectedFormatException(String.format("Could not parse data in CLF format. Entry %s", data)); } parts.add(data.substring(start, end)); return end + 1; }
/** * Sets the date value for {@link LogicalType#DATE} field * * @param fieldName name of the field to set * @param localDate value for the field * @return this builder * @throws UnexpectedFormatException if the field is not in the schema, or the field is not nullable but a null * value is given or if the provided date is an invalid date */ public Builder setDate(String fieldName, @Nullable LocalDate localDate) { validateAndGetLogicalType(validateAndGetField(fieldName, localDate), EnumSet.of(LogicalType.DATE)); if (localDate == null) { fields.put(fieldName, null); return this; } try { fields.put(fieldName, Math.toIntExact(localDate.toEpochDay())); } catch (ArithmeticException e) { // Highest integer is 2,147,483,647 which is Jan 1 2038. throw new UnexpectedFormatException(String.format("Field %s was set to a date that is too large." + "Valid date should be below Jan 1 2038", fieldName)); } return this; }
/** * Validates and returns the underlying {@link LogicalType} of the given {@link Schema.Field}. * * @param field field with logical type * @param allowedTypes acceptable logical types * @return the underlying {@link LogicalType} for the field * @throws UnexpectedFormatException if the field provided does not have a {@link LogicalType} that is * one of the acceptable types */ @Nullable private static LogicalType validateAndGetLogicalType(Schema.Field field, Set<LogicalType> allowedTypes) { if (field == null) { return null; } String fieldName = field.getName(); LogicalType logicalType = getLogicalType(field.getSchema(), allowedTypes); if (logicalType == null) { throw new UnexpectedFormatException(String.format("Field %s does not have a logical type.", fieldName)); } if (!allowedTypes.contains(logicalType)) { throw new UnexpectedFormatException(String.format("Field %s must be of logical type %s, instead it is of type %s", fieldName, allowedTypes, logicalType)); } return logicalType; }
private int addNextLogEntry(String data, int start, List<String> parts) { int end = -1; if (data.charAt(start) == '"') { // Find the closing '"' and extract values within start = start + 1; end = findNext(data, start, '"'); } else if (data.charAt(start) == '[') { // find the closing ']' and extract values start = start + 1; end = findNext(data, start, ']'); } else { // find the next ' ' and extract values end = findNext(data, start + 1, ' '); } if (end == -1) { throw new UnexpectedFormatException(String.format("Could not parse data in CLF format. Entry %s", data)); } parts.add(data.substring(start, end)); return end + 1; }
/** * Get a builder for creating a record with the given schema. * * @param schema schema for the record to build. * @return builder for creating a record with the given schema. * @throws UnexpectedFormatException if the given schema is not a record with at least one field. */ public static Builder builder(Schema schema) throws UnexpectedFormatException { if (schema == null || schema.getType() != Schema.Type.RECORD || schema.getFields().size() < 1) { throw new UnexpectedFormatException("Schema must be a record with at least one field."); } return new Builder(schema); }
/** * Get a builder for creating a record with the given schema. * * @param schema schema for the record to build. * @return builder for creating a record with the given schema. * @throws UnexpectedFormatException if the given schema is not a record with at least one field. */ public static Builder builder(Schema schema) throws UnexpectedFormatException { if (schema == null || schema.getType() != Schema.Type.RECORD || schema.getFields().size() < 1) { throw new UnexpectedFormatException("Schema must be a record with at least one field."); } return new Builder(schema); }
private static Object convertUnion(Object value, List<Schema> schemas) { boolean isNullable = false; for (Schema possibleSchema : schemas) { if (possibleSchema.getType() == Schema.Type.NULL) { isNullable = true; if (value == null) { return value; } } else { try { return convertField(value, possibleSchema); } catch (Exception e) { // if we couldn't convert, move to the next possibility } } } if (isNullable) { return null; } throw new UnexpectedFormatException("Unable to determine the union type."); }
private Schema.Field validateAndGetField(String fieldName, Object val) { Schema.Field field = schema.getField(fieldName); if (field == null) { throw new UnexpectedFormatException("field " + fieldName + " is not in the schema."); } Schema fieldSchema = field.getSchema(); if (val == null) { if (fieldSchema.getType() == Schema.Type.NULL) { return field; } if (fieldSchema.getType() != Schema.Type.UNION) { throw new UnexpectedFormatException("field " + fieldName + " cannot be set to a null value."); } for (Schema unionSchema : fieldSchema.getUnionSchemas()) { if (unionSchema.getType() == Schema.Type.NULL) { return field; } } throw new UnexpectedFormatException("field " + fieldName + " cannot be set to a null value."); } return field; } }
@Override public StructuredRecord read(ByteBuffer event) { try { binaryDecoder = decoderFactory.binaryDecoder(byteBufferInput.reset(event), binaryDecoder); return datumReader.read(null, binaryDecoder); } catch (IOException e) { throw new UnexpectedFormatException("Unable to decode the stream body as avro.", e); } }
private Schema.Field validateAndGetField(String fieldName, Object val) { Schema.Field field = schema.getField(fieldName); if (field == null) { throw new UnexpectedFormatException("field " + fieldName + " is not in the schema."); } Schema fieldSchema = field.getSchema(); if (val == null) { if (fieldSchema.getType() == Schema.Type.NULL) { return field; } if (fieldSchema.getType() != Schema.Type.UNION) { throw new UnexpectedFormatException("field " + fieldName + " cannot be set to a null value."); } for (Schema unionSchema : fieldSchema.getUnionSchemas()) { if (unionSchema.getType() == Schema.Type.NULL) { return field; } } throw new UnexpectedFormatException("field " + fieldName + " cannot be set to a null value."); } return field; } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fieldsIterator = schema.getFields().iterator(); while (fieldsIterator.hasNext()) { Schema.Field field = fieldsIterator.next(); Schema fieldSchema = field.getSchema(); String fieldName = field.getName(); if (isStringArray(fieldSchema)) { if (!fieldsIterator.hasNext()) { // only do varargs-style string array parsing on bodyField if it's the last field List<String> fields = Lists.newArrayList(bodyFields); builder.set(fieldName, fields.toArray(new String[fields.size()])); } else { throw new UnexpectedFormatException( String.format("string array type field '%s' must be the last schema field", fieldName)); } } else { // simple type (not string array) String bodyField = bodyFields.hasNext() ? bodyFields.next() : null; String val = parseBodyValue(bodyField, fieldSchema); builder.convertAndSet(fieldName, val); } } return builder.build(); } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fieldsIterator = schema.getFields().iterator(); while (fieldsIterator.hasNext()) { Schema.Field field = fieldsIterator.next(); Schema fieldSchema = field.getSchema(); String fieldName = field.getName(); if (isStringArray(fieldSchema)) { if (!fieldsIterator.hasNext()) { // only do varargs-style string array parsing on bodyField if it's the last field List<String> fields = Lists.newArrayList(bodyFields); builder.set(fieldName, fields.toArray(new String[fields.size()])); } else { throw new UnexpectedFormatException( String.format("string array type field '%s' must be the last schema field", fieldName)); } } else { // simple type (not string array) String bodyField = bodyFields.hasNext() ? bodyFields.next() : null; String val = parseBodyValue(bodyField, fieldSchema); builder.convertAndSet(fieldName, val); } } return builder.build(); } }
private List<Object> flattenRecord(Object obj, List<String> fieldNames, List<TypeInfo> fieldTypes, Schema schema) throws NoSuchFieldException, IllegalAccessException { boolean isNullable = schema.isNullable(); if (obj == null) { if (isNullable) { return null; } else { throw new UnexpectedFormatException("Non-nullable field is null."); } } if (isNullable) { schema = schema.getNonNullable(); } Map<String, Schema.Field> fieldMap = getFieldMap(schema); List<Object> objectFields = Lists.newArrayListWithCapacity(fieldNames.size()); for (int i = 0; i < fieldNames.size(); i++) { String hiveName = fieldNames.get(i); TypeInfo fieldType = fieldTypes.get(i); Schema.Field schemaField = fieldMap.get(hiveName); // use the name from the schema field in case it is not all lowercase Object recordField = getRecordField(obj, schemaField.getName()); objectFields.add(deserializeField(recordField, fieldType, schemaField.getSchema())); } return objectFields; }
private List<Object> flattenRecord(Object obj, List<String> fieldNames, List<TypeInfo> fieldTypes, Schema schema) throws NoSuchFieldException, IllegalAccessException { boolean isNullable = schema.isNullable(); if (obj == null) { if (isNullable) { return null; } else { throw new UnexpectedFormatException("Non-nullable field is null."); } } if (isNullable) { schema = schema.getNonNullable(); } Map<String, Schema.Field> fieldMap = getFieldMap(schema); List<Object> objectFields = Lists.newArrayListWithCapacity(fieldNames.size()); for (int i = 0; i < fieldNames.size(); i++) { String hiveName = fieldNames.get(i); TypeInfo fieldType = fieldTypes.get(i); Schema.Field schemaField = fieldMap.get(hiveName); // use the name from the schema field in case it is not all lowercase Object recordField = getRecordField(obj, schemaField.getName()); objectFields.add(deserializeField(recordField, fieldType, schemaField.getSchema())); } return objectFields; }
/** * Build a {@link StructuredRecord} with the fields set by this builder. * * @return a {@link StructuredRecord} with the fields set by this builder * @throws UnexpectedFormatException if there is at least one non-nullable field without a value */ public StructuredRecord build() throws UnexpectedFormatException { // check that all non-nullable fields have a value. for (Schema.Field field : schema.getFields()) { String fieldName = field.getName(); if (!fields.containsKey(fieldName)) { // if the field is not nullable and there is no value set for the field, this is invalid. if (!field.getSchema().isNullable()) { throw new UnexpectedFormatException("Field " + fieldName + " must contain a value."); } else { // otherwise, set the value for the field to null fields.put(fieldName, null); } } } return new StructuredRecord(schema, fields); }
/** * Build a {@link StructuredRecord} with the fields set by this builder. * * @return a {@link StructuredRecord} with the fields set by this builder * @throws UnexpectedFormatException if there is at least one non-nullable field without a value */ public StructuredRecord build() throws UnexpectedFormatException { // check that all non-nullable fields have a value. for (Schema.Field field : schema.getFields()) { String fieldName = field.getName(); if (!fields.containsKey(fieldName)) { // if the field is not nullable and there is no value set for the field, this is invalid. if (!field.getSchema().isNullable()) { throw new UnexpectedFormatException("Field " + fieldName + " must contain a value."); } else { // otherwise, set the value for the field to null fields.put(fieldName, null); } } } return new StructuredRecord(schema, fields); }
@Override public StructuredRecord read(StreamEvent event) { try { // Check if the event has different schema then the read schema. If it does update the datumReader String eventSchemaStr = event.getHeaders().get(SCHEMA); if (eventSchemaStr != null) { String eventSchemaHash = event.getHeaders().get(SCHEMA_HASH); if (!this.eventSchemaHash.equals(eventSchemaHash)) { org.apache.avro.Schema eventSchema = new org.apache.avro.Schema.Parser().parse(eventSchemaStr); datumReader.setSchema(eventSchema); this.eventSchemaHash = eventSchemaHash; } } else { // If no schema is available on the event, assume it's the same as read schema datumReader.setSchema(avroFormatSchema); eventSchemaHash = formatSchemaHash; } binaryDecoder = decoderFactory.binaryDecoder(byteBufferInput.reset(event.getBody()), binaryDecoder); return datumReader.read(null, binaryDecoder); } catch (IOException e) { throw new UnexpectedFormatException("Unable to decode the stream body as avro.", e); } }