public static boolean isRecord(final DataType dataType, final Object value) { if (dataType.getFieldType() == RecordFieldType.RECORD) { return true; } if (value == null) { return false; } if (value instanceof Record) { return true; } return false; } }
public static boolean isScalarValue(final DataType dataType, final Object value) { final RecordFieldType fieldType = dataType.getFieldType(); final RecordFieldType chosenType; if (fieldType == RecordFieldType.CHOICE) { final ChoiceDataType choiceDataType = (ChoiceDataType) dataType; final DataType chosenDataType = chooseDataType(value, choiceDataType); if (chosenDataType == null) { return false; } chosenType = chosenDataType.getFieldType(); } else { chosenType = fieldType; } switch (chosenType) { case ARRAY: case MAP: case RECORD: return false; } return true; }
protected Object convert(final DataType fieldType, final Object rawValue, final String fieldName) { if (fieldType == null) { return rawValue; } if (rawValue == null) { return null; } // If string is empty then return an empty string if field type is STRING. If field type is // anything else, we can't really convert it so return null final boolean fieldEmpty = rawValue instanceof String && ((String) rawValue).isEmpty(); if (fieldEmpty && fieldType.getFieldType() != RecordFieldType.STRING) { return null; } return DataTypeUtils.convertType(rawValue, fieldType, fieldName); }
private Object parseStringForType(String data, String fieldName, DataType dataType) { switch (dataType.getFieldType()) { case BOOLEAN: case BYTE: case CHAR: case DOUBLE: case FLOAT: case INT: case LONG: case SHORT: case STRING: case DATE: case TIME: case TIMESTAMP: { return DataTypeUtils.convertType(data, dataType, LAZY_DATE_FORMAT, LAZY_TIME_FORMAT, LAZY_TIMESTAMP_FORMAT, fieldName); } } return null; }
@Override public boolean equals(final Object obj) { if (obj == this) { return true; } if (obj == null) { return false; } if (!(obj instanceof DataType)) { return false; } final DataType other = (DataType) obj; return getFieldType().equals(other.getFieldType()) && Objects.equals(getFormat(), other.getFormat()); }
private static boolean isCompatibleDataType(final Object value, final DataType dataType) { if (value == null) { return false; } switch (dataType.getFieldType()) { case RECORD: if (value instanceof GenericRecord || value instanceof SpecificRecord) { return true; } break; case STRING: if (value instanceof Utf8) { return true; } break; case ARRAY: if (value instanceof Array || value instanceof List || value instanceof ByteBuffer) { return true; } break; case MAP: if (value instanceof Map) { return true; } } return DataTypeUtils.isCompatibleDataType(value, dataType); }
private static FieldValue validateParent(final FieldValue parent) { Objects.requireNonNull(parent, "Cannot create an ArrayIndexFieldValue without a parent"); if (RecordFieldType.ARRAY != parent.getField().getDataType().getFieldType()) { throw new IllegalArgumentException("Cannot create an ArrayIndexFieldValue with a parent of type " + parent.getField().getDataType().getFieldType()); } final Object parentRecord = parent.getValue(); if (parentRecord == null) { throw new IllegalArgumentException("Cannot create an ArrayIndexFieldValue without a parent Record"); } return parent; }
@Override public String toString() { if (getFormat() == null) { return getFieldType().toString(); } else { return getFieldType().toString() + ":" + getFormat(); } } }
private String getFormat(final RecordField field) { final DataType dataType = field.getDataType(); switch (dataType.getFieldType()) { case DATE: return dateFormat; case TIME: return timeFormat; case TIMESTAMP: return timestampFormat; } return dataType.getFormat(); }
public static Predicate<FieldValue> fieldTypeFilter(final RecordFieldType fieldType, final RecordFieldType... alternativeTypes) { return fieldVal -> { final RecordFieldType recordFieldType = fieldVal.getField().getDataType().getFieldType(); if (recordFieldType == fieldType) { return true; } for (final RecordFieldType alternate : alternativeTypes) { if (recordFieldType == alternate) { return true; } } return false; }; }
@Override public int hashCode() { return 31 + 41 * getFieldType().hashCode() + 41 * (getFormat() == null ? 0 : getFormat().hashCode()); }
private DataType getCanonicalDataType(final DataType dataType, final Object rawValue, final StandardSchemaValidationResult result, final String fieldPrefix, final RecordField field) { final RecordFieldType fieldType = dataType.getFieldType(); final DataType canonicalDataType; if (fieldType == RecordFieldType.CHOICE) { canonicalDataType = DataTypeUtils.chooseDataType(rawValue, (ChoiceDataType) dataType); if (canonicalDataType == null) { result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD, "Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + dataType)); return null; } } else { canonicalDataType = dataType; } return canonicalDataType; }
private void verifyComplexType(final DataType dataType, final Object rawValue, final StandardSchemaValidationResult result, final String fieldPrefix, final RecordField field) { // If the field type is RECORD, or if the field type is a CHOICE that allows for a RECORD and the value is a RECORD, then we // need to dig into each of the sub-fields. To do this, we first need to determine the 'canonical data type'. final DataType canonicalDataType = getCanonicalDataType(dataType, rawValue, result, fieldPrefix, field); if (canonicalDataType == null) { return; } // Now that we have the 'canonical data type', we check if it is a Record. If so, we need to validate each sub-field. if (canonicalDataType.getFieldType() == RecordFieldType.RECORD) { verifyChildRecord(canonicalDataType, rawValue, dataType, result, field, fieldPrefix); } if (canonicalDataType.getFieldType() == RecordFieldType.ARRAY) { final ArrayDataType arrayDataType = (ArrayDataType) canonicalDataType; final DataType elementType = arrayDataType.getElementType(); final Object[] arrayObject = (Object[]) rawValue; int i=0; for (final Object arrayValue : arrayObject) { verifyComplexType(elementType, arrayValue, result, fieldPrefix + "[" + i + "]", field); i++; } } }
public static DataType chooseDataType(final Object value, final ChoiceDataType choiceType) { for (final DataType subType : choiceType.getPossibleSubTypes()) { if (isCompatibleDataType(value, subType)) { if (subType.getFieldType() == RecordFieldType.CHOICE) { return chooseDataType(value, (ChoiceDataType) subType); } return subType; } } return null; }
@Override @SuppressWarnings("unchecked") public void setMapValue(final String fieldName, final String mapKey, final Object value) { final Optional<RecordField> field = getSchema().getField(fieldName); if (!field.isPresent()) { return; } final RecordField recordField = field.get(); final DataType dataType = recordField.getDataType(); if (dataType.getFieldType() != RecordFieldType.MAP) { throw new IllegalTypeConversionException("Cannot set the value of map entry on Record because the field '" + fieldName + "' is of type '" + dataType + "' and cannot be coerced into an MAP type"); } Object mapObject = values.get(recordField.getFieldName()); if (mapObject == null) { mapObject = new HashMap<String, Object>(); } if (!(mapObject instanceof Map)) { return; } final Map<String, Object> map = (Map<String, Object>) mapObject; final MapDataType mapDataType = (MapDataType) dataType; final DataType valueDataType = mapDataType.getValueType(); final Object coerced = DataTypeUtils.convertType(value, valueDataType, fieldName); final Object replaced = map.put(mapKey, coerced); if (replaced == null || !replaced.equals(coerced)) { serializedForm = Optional.empty(); } }
/** * Writes each SolrDocument to a record. */ public static RecordSet solrDocumentsToRecordSet(final List<SolrDocument> docs, final RecordSchema schema) { final List<Record> lr = new ArrayList<Record>(); for (SolrDocument doc : docs) { final Map<String, Object> recordValues = new LinkedHashMap<>(); for (RecordField field : schema.getFields()){ final Object fieldValue = doc.getFieldValue(field.getFieldName()); if (fieldValue != null) { if (field.getDataType().getFieldType().equals(RecordFieldType.ARRAY)){ recordValues.put(field.getFieldName(), ((List<Object>) fieldValue).toArray()); } else { recordValues.put(field.getFieldName(), fieldValue); } } } lr.add(new MapRecord(schema, recordValues)); } return new ListRecordSet(schema, lr); }
.filter(Filters.fieldTypeFilter(RecordFieldType.MAP, RecordFieldType.ARRAY)) .flatMap(fieldValue -> { final RecordFieldType fieldType = fieldValue.getField().getDataType().getFieldType();
private void verifyChildRecord(final DataType canonicalDataType, final Object rawValue, final DataType expectedDataType, final StandardSchemaValidationResult result, final RecordField field, final String fieldPrefix) { // Now that we have the 'canonical data type', we check if it is a Record. If so, we need to validate each sub-field. if (canonicalDataType.getFieldType() == RecordFieldType.RECORD) { if (!(rawValue instanceof Record)) { // sanity check result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD, "Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + expectedDataType)); return; } final RecordDataType recordDataType = (RecordDataType) canonicalDataType; final RecordSchema childSchema = recordDataType.getChildSchema(); final String fullChildFieldName = concat(fieldPrefix, field); final SchemaValidationResult childValidationResult = validate((Record) rawValue, childSchema, fullChildFieldName); if (childValidationResult.isValid()) { return; } for (final ValidationError validationError : childValidationResult.getValidationErrors()) { result.addValidationError(validationError); } } }
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveFieldType(DataType rawDataType) throws IllegalArgumentException { if (rawDataType == null) { throw new IllegalArgumentException("Avro type is null"); } RecordFieldType fieldType = rawDataType.getFieldType(); if (RecordFieldType.INT.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("int"); } if (RecordFieldType.LONG.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("bigint"); } if (RecordFieldType.BOOLEAN.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("boolean"); } if (RecordFieldType.DOUBLE.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("double"); } if (RecordFieldType.FLOAT.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("float"); } if (RecordFieldType.STRING.equals(fieldType)) { return TypeInfoFactory.getPrimitiveTypeInfo("string"); } throw new IllegalArgumentException("Field type " + fieldType.name() + " is not a primitive type"); }
private boolean iterateThroughRecordUsingSchema(Deque<String> tagsToOpen, Record record, RecordSchema schema) throws XMLStreamException { boolean loopHasWritten = false; for (RecordField field : schema.getFields()) { String fieldName = field.getFieldName(); DataType dataType = field.getDataType(); Object value = record.getValue(field); final DataType chosenDataType = dataType.getFieldType() == RecordFieldType.CHOICE ? DataTypeUtils.chooseDataType(value, (ChoiceDataType) dataType) : dataType; final Object coercedValue = DataTypeUtils.convertType(value, chosenDataType, LAZY_DATE_FORMAT, LAZY_TIME_FORMAT, LAZY_TIMESTAMP_FORMAT, fieldName); if (coercedValue != null) { boolean hasWritten = writeFieldForType(tagsToOpen, coercedValue, chosenDataType, fieldName); if (hasWritten) { loopHasWritten = true; } } else { if (nullSuppression.equals(NullSuppression.NEVER_SUPPRESS) || nullSuppression.equals(NullSuppression.SUPPRESS_MISSING) && recordHasField(field, record)) { writeAllTags(tagsToOpen, fieldName); writer.writeEndElement(); loopHasWritten = true; } } } return loopHasWritten; }