@Override public boolean equals(final Object obj) { if (obj == null) { return false; } if (obj == this) { return true; } if (!(obj instanceof RecordSchema)) { return false; } final RecordSchema other = (RecordSchema) obj; return fields.equals(other.getFields()); }
public ORCHDFSRecordWriter(final Writer orcWriter, final RecordSchema recordSchema, final String hiveTableName, final boolean hiveFieldNames) { this.recordSchema = recordSchema; this.orcWriter = orcWriter; this.hiveFieldNames = hiveFieldNames; this.orcSchema = NiFiOrcUtils.getOrcSchema(recordSchema, this.hiveFieldNames); this.hiveTableName = hiveTableName; this.recordFields = recordSchema != null ? recordSchema.getFields() : null; this.numRecordFields = recordFields != null ? recordFields.size() : -1; // Reuse row object this.workingRow = numRecordFields > -1 ? new Object[numRecordFields] : null; }
private List<String> getColumnNames(final RecordSchema schema) { final List<String> columnNames = new ArrayList<>(); for (final RecordField field : schema.getFields()) { columnNames.add(field.getFieldName()); for (final String alias : field.getAliases()) { columnNames.add(alias); } } return columnNames; }
public static String getHiveSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> hiveFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { hiveFields.add((hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + ":" + getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames)); }); return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">"; } return null; }
public static String generateHiveDDL(RecordSchema recordSchema, String tableName, boolean hiveFieldNames) { StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS `"); sb.append(tableName); sb.append("` ("); List<String> hiveColumns = new ArrayList<>(); List<RecordField> fields = recordSchema.getFields(); if (fields != null) { hiveColumns.addAll( fields.stream().map(field -> "`" + (hiveFieldNames ? field.getFieldName().toLowerCase() : field.getFieldName()) + "` " + getHiveTypeFromFieldType(field.getDataType(), hiveFieldNames)).collect(Collectors.toList())); } sb.append(StringUtils.join(hiveColumns, ", ")); sb.append(") STORED AS ORC"); return sb.toString(); }
private static Schema buildAvroSchema(final RecordSchema recordSchema) { final List<Field> avroFields = new ArrayList<>(recordSchema.getFieldCount()); for (final RecordField recordField : recordSchema.getFields()) { avroFields.add(buildAvroField(recordField)); } final Schema avroSchema = Schema.createRecord("nifiRecord", null, "org.apache.nifi", false, avroFields); return avroSchema; }
public static TypeInfo getOrcSchema(RecordSchema recordSchema, boolean hiveFieldNames) throws IllegalArgumentException { List<RecordField> recordFields = recordSchema.getFields(); if (recordFields != null) { List<String> orcFieldNames = new ArrayList<>(recordFields.size()); List<TypeInfo> orcFields = new ArrayList<>(recordFields.size()); recordFields.forEach(recordField -> { String fieldName = hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName(); orcFieldNames.add(fieldName); orcFields.add(getOrcField(recordField.getDataType(), hiveFieldNames)); }); return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields); } return null; }
protected Record createRecord(final ResultSet rs) throws SQLException { final Map<String, Object> values = new HashMap<>(schema.getFieldCount()); for (final RecordField field : schema.getFields()) { final String fieldName = field.getFieldName(); final Object value; if (rsColumnNames.contains(fieldName)) { value = normalizeValue(rs.getObject(fieldName)); } else { value = null; } values.put(fieldName, value); } return new MapRecord(schema, values); }
private Stream<FieldValue> getChildren(final FieldValue fieldValue) { if (fieldValue == null || fieldValue.getValue() == null || !Filters.isRecord(fieldValue)) { return Stream.empty(); } final Record record = (Record) fieldValue.getValue(); return Filters.presentValues(record.getSchema().getFields().stream() .map(field -> { final Object value = record.getValue(field); if (value == null) { return Optional.empty(); } return Optional.of(new StandardFieldValue(value, field, fieldValue)); })); }
@Override public Object[] getValues() { final Object[] values = new Object[schema.getFieldCount()]; int i = 0; for (final RecordField recordField : schema.getFields()) { Object value = getExplicitValue(recordField); if (value == null) { value = recordField.getDefaultValue(); } values[i++] = value; } return values; }
public static GenericRecord createAvroRecord(final Record record, final Schema avroSchema, final Charset charset) throws IOException { final GenericRecord rec = new GenericData.Record(avroSchema); final RecordSchema recordSchema = record.getSchema(); for (final RecordField recordField : recordSchema.getFields()) { final Object rawValue = record.getValue(recordField); Pair<String, Field> fieldPair = lookupField(avroSchema, recordField); final String fieldName = fieldPair.getLeft(); final Field field = fieldPair.getRight(); if (field == null) { continue; } final Object converted = convertToAvroObject(rawValue, field.schema(), fieldName, charset); rec.put(fieldName, converted); } // see if the Avro schema has any fields that aren't in the RecordSchema, and if those fields have a default // value then we want to populate it in the GenericRecord being produced for (final Field field : avroSchema.getFields()) { final Optional<RecordField> recordField = recordSchema.getField(field.name()); if (!recordField.isPresent() && rec.get(field.name()) == null && field.defaultVal() != null) { rec.put(field.name(), field.defaultVal()); } } return rec; }
/** * Writes each SolrDocument to a record. */ public static RecordSet solrDocumentsToRecordSet(final List<SolrDocument> docs, final RecordSchema schema) { final List<Record> lr = new ArrayList<Record>(); for (SolrDocument doc : docs) { final Map<String, Object> recordValues = new LinkedHashMap<>(); for (RecordField field : schema.getFields()){ final Object fieldValue = doc.getFieldValue(field.getFieldName()); if (fieldValue != null) { if (field.getDataType().getFieldType().equals(RecordFieldType.ARRAY)){ recordValues.put(field.getFieldName(), ((List<Object>) fieldValue).toArray()); } else { recordValues.put(field.getFieldName(), fieldValue); } } } lr.add(new MapRecord(schema, recordValues)); } return new ListRecordSet(schema, lr); }
public static Map<String, Object> convertAvroRecordToMap(final GenericRecord avroRecord, final RecordSchema recordSchema, final Charset charset) { final Map<String, Object> values = new HashMap<>(recordSchema.getFieldCount()); for (final RecordField recordField : recordSchema.getFields()) {
@Override public Object deserialize(Writable writable) throws SerDeException { ObjectWritable t = (ObjectWritable) writable; Record record = (Record) t.get(); List<Object> r = new ArrayList<>(Collections.nCopies(columnNames.size(), null)); try { RecordSchema recordSchema = record.getSchema(); for (RecordField field : recordSchema.getFields()) { String fieldName = field.getFieldName(); String normalizedFieldName = fieldName.toLowerCase(); // Get column position of field name, and set field value there Integer fpos = fieldPositionMap.get(normalizedFieldName); if(fpos == null || fpos == -1) { // This is either a partition column or not a column in the target table, ignore either way continue; } Object currField = extractCurrentField(record, field, schema.getStructFieldTypeInfo(normalizedFieldName)); r.set(fpos, currField); } stats.setRowCount(stats.getRowCount() + 1); } catch (Exception e) { log.warn("Error [{}] parsing Record [{}].", new Object[]{e.toString(), t}, e); throw new SerDeException(e); } return r; }
private List<FieldValue> findDescendants(final FieldValue fieldValue) { if (fieldValue == null || fieldValue.getValue() == null) { return Collections.emptyList(); } if (!Filters.isRecord(fieldValue)) { return Collections.emptyList(); } final Record record = (Record) fieldValue.getValue(); final List<FieldValue> matchingValues = new ArrayList<>(); for (final RecordField childField : record.getSchema().getFields()) { final Object value = record.getValue(childField); if (value == null) { continue; } final FieldValue descendantFieldValue = new StandardFieldValue(value, childField, fieldValue); matchingValues.add(descendantFieldValue); if (Filters.isRecord(childField.getDataType(), value)) { final FieldValue childFieldValue = new StandardFieldValue(value, childField, fieldValue); matchingValues.addAll(findDescendants(childFieldValue)); } } return matchingValues; } }
final List<FieldValue> matchingValues = new ArrayList<>(); for (final RecordField childField : record.getSchema().getFields()) { if (childField.getFieldName().equals(descendantName) || childField.getAliases().contains(descendantName)) { final Object value = record.getValue(descendantName);
@Override public RelDataType getRowType(final RelDataTypeFactory typeFactory) { if (relDataType != null) { return relDataType; } RecordSchema schema; try (final InputStream in = session.read(flowFile)) { final RecordReader recordParser = recordParserFactory.createRecordReader(flowFile, in, logger); schema = recordParser.getSchema(); } catch (final Exception e) { throw new ProcessException("Failed to determine schema of data records for " + flowFile, e); } final List<String> names = new ArrayList<>(); final List<RelDataType> types = new ArrayList<>(); final JavaTypeFactory javaTypeFactory = (JavaTypeFactory) typeFactory; for (final RecordField field : schema.getFields()) { names.add(field.getFieldName()); final RelDataType relDataType = getRelDataType(field.getDataType(), javaTypeFactory); types.add(javaTypeFactory.createTypeWithNullability(relDataType, field.isNullable())); } logger.debug("Found Schema: {}", new Object[] {schema}); if (recordSchema == null) { recordSchema = schema; } relDataType = typeFactory.createStructType(Pair.zip(names, types)); return relDataType; }
@Override public Map<String, String> writeRecord(final Record record) throws IOException { // If we are not writing an active record set, then we need to ensure that we write the // schema information. if (!isActiveRecordSet()) { schemaWriter.writeHeader(recordSchema, getOutputStream()); } includeHeaderIfNecessary(record, true); int i = 0; for (final RecordField recordField : recordSchema.getFields()) { fieldValues[i++] = record.getAsString(recordField, getFormat(recordField)); } printer.printRecord(fieldValues); return schemaWriter.getAttributes(recordSchema); }
private boolean iterateThroughRecordUsingSchema(Deque<String> tagsToOpen, Record record, RecordSchema schema) throws XMLStreamException { boolean loopHasWritten = false; for (RecordField field : schema.getFields()) { String fieldName = field.getFieldName(); DataType dataType = field.getDataType(); Object value = record.getValue(field); final DataType chosenDataType = dataType.getFieldType() == RecordFieldType.CHOICE ? DataTypeUtils.chooseDataType(value, (ChoiceDataType) dataType) : dataType; final Object coercedValue = DataTypeUtils.convertType(value, chosenDataType, LAZY_DATE_FORMAT, LAZY_TIME_FORMAT, LAZY_TIMESTAMP_FORMAT, fieldName); if (coercedValue != null) { boolean hasWritten = writeFieldForType(tagsToOpen, coercedValue, chosenDataType, fieldName); if (hasWritten) { loopHasWritten = true; } } else { if (nullSuppression.equals(NullSuppression.NEVER_SUPPRESS) || nullSuppression.equals(NullSuppression.SUPPRESS_MISSING) && recordHasField(field, record)) { writeAllTags(tagsToOpen, fieldName); writer.writeEndElement(); loopHasWritten = true; } } } return loopHasWritten; }
private Map<String, Object> checkTypes(final Map<String, Object> values, final RecordSchema schema) { for (final RecordField field : schema.getFields()) { Object value = getExplicitValue(field, values); if (value == null) { if (field.isNullable() || field.getDefaultValue() != null) { continue; } throw new SchemaValidationException("Field " + field.getFieldName() + " cannot be null"); } if (!DataTypeUtils.isCompatibleDataType(value, field.getDataType())) { throw new SchemaValidationException("Field " + field.getFieldName() + " has a value of " + value + ", which cannot be coerced into the appropriate data type of " + field.getDataType()); } } return values; }