@Override public void writeHeader(final RecordSchema schema, final OutputStream out) throws IOException { final SchemaIdentifier identifier = schema.getIdentifier(); final Long id = identifier.getIdentifier().getAsLong(); // This encoding follows the pattern that is provided for serializing data by the Confluent Schema Registry serializer // as it is provided at: // http://docs.confluent.io/current/schema-registry/docs/serializer-formatter.html#wire-format // The format consists of the first byte always being 0, to indicate a 'magic byte' followed by 4 bytes // representing the schema id. final ByteBuffer bb = ByteBuffer.allocate(5); bb.put((byte) 0); bb.putInt(id.intValue()); out.write(bb.array()); }
@Override public void writeHeader(final RecordSchema schema, final OutputStream out) throws IOException { final SchemaIdentifier identifier = schema.getIdentifier(); final Long id = identifier.getIdentifier().getAsLong(); final Integer version = identifier.getVersion().getAsInt(); // This decoding follows the pattern that is provided for serializing data by the Hortonworks Schema Registry serializer // as it is provided at: // https://github.com/hortonworks/registry/blob/master/schema-registry/serdes/src/main/java/com/hortonworks/registries/schemaregistry/serdes/avro/AvroSnapshotSerializer.java final ByteBuffer bb = ByteBuffer.allocate(13); bb.put((byte) LATEST_PROTOCOL_VERSION); bb.putLong(id); bb.putInt(version); out.write(bb.array()); }
@Override public Map<String, String> getAttributes(final RecordSchema schema) { final Map<String, String> attributes = new HashMap<>(4); final SchemaIdentifier id = schema.getIdentifier(); final Long schemaId = id.getIdentifier().getAsLong(); final Integer schemaVersion = id.getVersion().getAsInt(); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_ID_ATTRIBUTE, String.valueOf(schemaId)); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_VERSION_ATTRIBUTE, String.valueOf(schemaVersion)); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_PROTOCOL_VERSION_ATTRIBUTE, String.valueOf(LATEST_PROTOCOL_VERSION)); if (id.getBranch().isPresent()) { attributes.put(SCHEMA_BRANCH_ATTRIBUTE, id.getBranch().get()); } return attributes; }
@Override public Map<String, String> getAttributes(final RecordSchema schema) { final Map<String,String> attributes = new HashMap<>(3); final SchemaIdentifier identifier = schema.getIdentifier(); final Optional<String> nameOption = identifier.getName(); if (nameOption.isPresent()) { attributes.put(SCHEMA_NAME_ATTRIBUTE, nameOption.get()); } final OptionalInt versionOption = identifier.getVersion(); if (versionOption.isPresent()) { attributes.put(SCHEMA_VERSION_ATTRIBUTE, String.valueOf(versionOption.getAsInt())); } final Optional<String> branchOption = identifier.getBranch(); if (branchOption.isPresent()) { attributes.put(SCHEMA_BRANCH_ATTRIBUTE, branchOption.get()); } return attributes; }
@Override public void validateSchema(final RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier schemaId = schema.getIdentifier(); if (schemaId == null) { throw new SchemaNotFoundException("Cannot write Schema Name As Attribute because Schema Identifier is not known"); } if (!schemaId.getName().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Name As Attribute because the Schema Name is not known"); } }
@Override public void validateSchema(RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier identifier = schema.getIdentifier(); final OptionalLong identifierOption = identifier.getIdentifier(); if (!identifierOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Confluent Schema Registry Reference because the Schema Identifier is not known"); } final OptionalInt versionOption = identifier.getVersion(); if (!versionOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Confluent Schema Registry Reference because the Schema Version is not known"); } }
@Override public void validateSchema(RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier identifier = schema.getIdentifier(); final OptionalLong identifierOption = identifier.getIdentifier(); if (!identifierOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Encoded Schema Reference because the Schema Identifier is not known"); } final OptionalInt versionOption = identifier.getVersion(); if (!versionOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Encoded Schema Reference because the Schema Version is not known"); } }
@Override public void validateSchema(final RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier id = schema.getIdentifier(); if (!id.getIdentifier().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Reference as Attributes because it does not contain a Schema Identifier"); } if (!id.getVersion().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Reference as Attributes because it does not contain a Schema Version"); } }
this.recordTagName = recordTagName; } else { Optional<String> recordTagNameOptional = recordSchema.getSchemaName().isPresent()? recordSchema.getSchemaName() : recordSchema.getIdentifier().getName(); if (recordTagNameOptional.isPresent()) { this.recordTagName = recordTagNameOptional.get();
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue(); final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue(); final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue()); final boolean normalizeForHive = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); TypeInfo orcSchema = NiFiOrcUtils.getOrcSchema(schema, normalizeForHive); final Writer orcWriter = NiFiOrcUtils.createWriter(path, conf, orcSchema, stripeSize, compressionType, bufferSize); final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet() ? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue() : NiFiOrcUtils.normalizeHiveTableName(schema.getIdentifier().getName().orElse("unknown")); final boolean hiveFieldNames = context.getProperty(HIVE_FIELD_NAMES).asBoolean(); return new ORCHDFSRecordWriter(orcWriter, schema, hiveTableName, hiveFieldNames); } }
@Override public void writeHeader(final RecordSchema schema, final OutputStream out) throws IOException { final SchemaIdentifier identifier = schema.getIdentifier(); final Long id = identifier.getIdentifier().getAsLong(); // This encoding follows the pattern that is provided for serializing data by the Confluent Schema Registry serializer // as it is provided at: // http://docs.confluent.io/current/schema-registry/docs/serializer-formatter.html#wire-format // The format consists of the first byte always being 0, to indicate a 'magic byte' followed by 4 bytes // representing the schema id. final ByteBuffer bb = ByteBuffer.allocate(5); bb.put((byte) 0); bb.putInt(id.intValue()); out.write(bb.array()); }
@Override public void writeHeader(final RecordSchema schema, final OutputStream out) throws IOException { final SchemaIdentifier identifier = schema.getIdentifier(); final Long id = identifier.getIdentifier().getAsLong(); final Integer version = identifier.getVersion().getAsInt(); // This decoding follows the pattern that is provided for serializing data by the Hortonworks Schema Registry serializer // as it is provided at: // https://github.com/hortonworks/registry/blob/master/schema-registry/serdes/src/main/java/com/hortonworks/registries/schemaregistry/serdes/avro/AvroSnapshotSerializer.java final ByteBuffer bb = ByteBuffer.allocate(13); bb.put((byte) LATEST_PROTOCOL_VERSION); bb.putLong(id); bb.putInt(version); out.write(bb.array()); }
@Override public Map<String, String> getAttributes(final RecordSchema schema) { final Map<String, String> attributes = new HashMap<>(4); final SchemaIdentifier id = schema.getIdentifier(); final Long schemaId = id.getIdentifier().getAsLong(); final Integer schemaVersion = id.getVersion().getAsInt(); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_ID_ATTRIBUTE, String.valueOf(schemaId)); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_VERSION_ATTRIBUTE, String.valueOf(schemaVersion)); attributes.put(HortonworksAttributeSchemaReferenceStrategy.SCHEMA_PROTOCOL_VERSION_ATTRIBUTE, String.valueOf(LATEST_PROTOCOL_VERSION)); if (id.getBranch().isPresent()) { attributes.put(SCHEMA_BRANCH_ATTRIBUTE, id.getBranch().get()); } return attributes; }
@Override public Map<String, String> getAttributes(final RecordSchema schema) { final Map<String,String> attributes = new HashMap<>(3); final SchemaIdentifier identifier = schema.getIdentifier(); final Optional<String> nameOption = identifier.getName(); if (nameOption.isPresent()) { attributes.put(SCHEMA_NAME_ATTRIBUTE, nameOption.get()); } final OptionalInt versionOption = identifier.getVersion(); if (versionOption.isPresent()) { attributes.put(SCHEMA_VERSION_ATTRIBUTE, String.valueOf(versionOption.getAsInt())); } final Optional<String> branchOption = identifier.getBranch(); if (branchOption.isPresent()) { attributes.put(SCHEMA_BRANCH_ATTRIBUTE, branchOption.get()); } return attributes; }
@Override public void validateSchema(final RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier schemaId = schema.getIdentifier(); if (schemaId == null) { throw new SchemaNotFoundException("Cannot write Schema Name As Attribute because Schema Identifier is not known"); } if (!schemaId.getName().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Name As Attribute because the Schema Name is not known"); } }
@Override public void validateSchema(final RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier id = schema.getIdentifier(); if (!id.getIdentifier().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Reference as Attributes because it does not contain a Schema Identifier"); } if (!id.getVersion().isPresent()) { throw new SchemaNotFoundException("Cannot write Schema Reference as Attributes because it does not contain a Schema Version"); } }
@Override public void validateSchema(RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier identifier = schema.getIdentifier(); final OptionalLong identifierOption = identifier.getIdentifier(); if (!identifierOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Encoded Schema Reference because the Schema Identifier is not known"); } final OptionalInt versionOption = identifier.getVersion(); if (!versionOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Encoded Schema Reference because the Schema Version is not known"); } }
@Override public void validateSchema(RecordSchema schema) throws SchemaNotFoundException { final SchemaIdentifier identifier = schema.getIdentifier(); final OptionalLong identifierOption = identifier.getIdentifier(); if (!identifierOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Confluent Schema Registry Reference because the Schema Identifier is not known"); } final OptionalInt versionOption = identifier.getVersion(); if (!versionOption.isPresent()) { throw new SchemaNotFoundException("Cannot write Confluent Schema Registry Reference because the Schema Version is not known"); } }