private static void _addIdSchemaField( List<Schema.Field> fields, Set<String> names) { String safeIdFieldName = "_id"; names.add(safeIdFieldName); Schema.Field designField = new Schema.Field( safeIdFieldName, AvroUtils.wrapAsNullable(AvroUtils._string()), null, (Object)null); // This is the first column in the schema fields.add(0, designField); }
protected Schema getSchema() throws IOException { if (schema == null) { schema = liferayConnectionResourceBaseProperties.getSchema(); if (AvroUtils.isIncludeAllFields(schema)) { String resourceURL = null; if (liferayConnectionResourceBaseProperties instanceof TLiferayInputProperties) { resourceURL = liferayConnectionResourceBaseProperties.resource. resourceProperty.getResourceURL(); } BoundedSource boundedSource = getCurrentSource(); schema = boundedSource.getEndpointSchema( runtimeContainer, resourceURL); } } return schema; }
/** * Initialize converters per each schema field * * @param schema design schema * @review */ protected void initConverters(Schema schema) { schemaFields = schema.getFields(); avroConverters = new AvroConverter[schemaFields.size()]; for (int i = 0; i < schemaFields.size(); i++) { Schema.Field field = schemaFields.get(i); Schema fieldSchema = AvroUtils.unwrapIfNullable(field.schema()); if (LogicalTypeUtils.isLogicalTimestampMillis(fieldSchema)) { String datePattern = field.getProp( SchemaConstants.TALEND_COLUMN_PATTERN); avroConverters[i] = new StringTimestampConverter(datePattern); } else { Schema.Type type = fieldSchema.getType(); avroConverters[i] = _converterRegistry.get(type); } } }
Schema basicSchema = AvroUtils.unwrapIfNullable(f.schema()); if (AvroUtils.isSameType(basicSchema, AvroUtils._string())) { return Types.VARCHAR; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._int())) { return Types.INTEGER; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._date())) { return Types.DATE; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._decimal())) { return Types.DECIMAL; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._long())) { return Types.BIGINT; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._double())) { return Types.DOUBLE; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._float())) { return Types.FLOAT; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._boolean())) { return Types.BOOLEAN; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._short())) { return Types.SMALLINT; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._character())) { return Types.CHAR; } else if (AvroUtils.isSameType(basicSchema, AvroUtils._byte())) { return Types.TINYINT; } else {
/** * A helper method to convert the String representation of a datum in the DataPrep system to the Avro type that * matches the Schema generated for it. * * @param f is field in Avro Schema. * @return converter for a given type. */ public AvroConverter<String, ?> getConverterFromString(org.apache.avro.Schema.Field f) { Schema fieldSchema = AvroUtils.unwrapIfNullable(f.schema()); // FIXME use avro type to decide the converter is not correct if the user change the avro type, Date to String // for instance if (AvroUtils.isSameType(fieldSchema, AvroUtils._boolean())) { return new StringToBooleanConverter(f); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._decimal())) { return new StringToDecimalConverter(f); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._double())) { return new StringToDoubleConverter(f); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._int())) { return new StringToIntegerConverter(f); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._date())) { return new StringToDateConverter(f); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._string())) { return super.getConverter(String.class); } throw new UnsupportedOperationException("The type " + fieldSchema.getType() + " is not supported."); //$NON-NLS-1$ //$NON-NLS-2$ }
/** * Get an Avro schema using {@link AvroUtils#wrapAsNullable(Schema)} by node type. * * @param node Json node. * @return an Avro schema using {@link AvroUtils#wrapAsNullable(Schema)} by node type. */ @VisibleForTesting Schema getAvroSchema(JsonNode node) { if (node instanceof TextNode) { return AvroUtils.wrapAsNullable(AvroUtils._string()); } else if (node instanceof IntNode) { return AvroUtils.wrapAsNullable(AvroUtils._int()); } else if (node instanceof LongNode) { return AvroUtils.wrapAsNullable(AvroUtils._long()); } else if (node instanceof DoubleNode) { return AvroUtils.wrapAsNullable(AvroUtils._double()); } else if (node instanceof BooleanNode) { return AvroUtils.wrapAsNullable(AvroUtils._boolean()); } else if (node instanceof NullNode) { return AvroUtils.wrapAsNullable(AvroUtils._string()); } else { return createSubRecord(node); } }
public static Schema getEventSchema() { Schema.Field[] fields = new Schema.Field[10]; Schema.Field field; fields[EventSchemaField.EVENT_IDX] = new Schema.Field("event", AvroUtils._string(), "Type of event", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.PARTITION_IDX] = new Schema.Field("partition", AvroUtils._short(), "Partition number", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.KEY_IDX] = new Schema.Field("key", AvroUtils._string(), "Key", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.CAS_IDX] = new Schema.Field("cas", AvroUtils._long(), "CAS", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.SEQNO_IDX] = new Schema.Field("bySeqno", AvroUtils._long(), "Sequence number", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.REV_SEQNO_IDX] = new Schema.Field("revSeqno", AvroUtils._long(), "Revision sequence number", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.EXPIRATION_IDX] = new Schema.Field("expiration", AvroUtils.wrapAsNullable(AvroUtils._int()), "Expiration", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.FLAGS_IDX] = new Schema.Field("flags", AvroUtils.wrapAsNullable(AvroUtils._int()), "Flags", (Object) null, Schema.Field.Order.ASCENDING); fields[EventSchemaField.LOCK_TIME_IDX] = new Schema.Field("lockTime", AvroUtils.wrapAsNullable(AvroUtils._int()), "Lock time", (Object) null, Schema.Field.Order.ASCENDING); field = new Schema.Field("content", AvroUtils.wrapAsNullable(AvroUtils._bytes()), "Content", (Object) null, Schema.Field.Order.ASCENDING); field.addProp(TALEND_IS_LOCKED, "false"); fields[EventSchemaField.CONTENT_IDX] = field; Schema schema = Schema.createRecord("DcpMessage", "Couchbase DCP message", "com.couchbase", false, Arrays.asList(fields)); schema.addProp(TALEND_IS_LOCKED, "true"); return schema; }
/** * Infers an Avro schema for the given DataPrep Field. This can be an expensive operation so the schema should be * cached where possible. The return type will be the Avro Schema that can contain the field data without loss of * precision. * * @param field the Field to analyse. * @return the schema for data that the field describes. */ private Schema inferSchemaField(DataPrepField field) { Schema base; switch (field.getType()) { case "boolean": base = AvroUtils._boolean(); break; case "double": base = AvroUtils._double(); break; case "integer": base = AvroUtils._int(); break; case "float": base = AvroUtils._float(); break; default: base = AvroUtils._string(); break; } // TODO add handling for numeric, any and date. return base; }
public Schema getQueueSchema() { return SchemaBuilder.builder().record("Main").fields()// .name(AzureStorageQueueProperties.FIELD_MESSAGE_ID).prop(SchemaConstants.TALEND_COLUMN_IS_KEY, "true") .prop(SchemaConstants.TALEND_COLUMN_DB_LENGTH, "100").type(AvroUtils._string()).noDefault()// .name(AzureStorageQueueProperties.FIELD_MESSAGE_CONTENT).type(AvroUtils._string()).noDefault() // .name(AzureStorageQueueProperties.FIELD_INSERTION_TIME) .prop(SchemaConstants.TALEND_COLUMN_PATTERN, "yyyy-MM-dd hh:mm:ss").type(AvroUtils._date()).noDefault() // .name(AzureStorageQueueProperties.FIELD_EXPIRATION_TIME) .prop(SchemaConstants.TALEND_COLUMN_PATTERN, "yyyy-MM-dd hh:mm:ss").type(AvroUtils._date()).noDefault() // .name(AzureStorageQueueProperties.FIELD_NEXT_VISIBLE_TIME) .prop(SchemaConstants.TALEND_COLUMN_PATTERN, "yyyy-MM-dd hh:mm:ss").type(AvroUtils._date()).noDefault() // .name(AzureStorageQueueProperties.FIELD_DEQUEUE_COUNT).type(AvroUtils._int()).noDefault() // .name(AzureStorageQueueProperties.FIELD_POP_RECEIPT).type(AvroUtils._string()).noDefault() // .endRecord(); }
public static Schema createTestSchema5(String tablename) { FieldAssembler<Schema> builder = SchemaBuilder.builder().record(tablename).fields(); Schema schema = AvroUtils._int(); schema = wrap(schema); builder = builder.name("ID").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "ID").type(schema).noDefault(); schema = AvroUtils._string(); schema = wrap(schema); builder = builder.name("NAME").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "NAME").type(schema).noDefault(); schema = AvroUtils._string();// TODO : fix it as should be object type schema = wrap(schema); builder = builder.name("RESULTSET").prop(SchemaConstants.TALEND_COLUMN_DB_COLUMN_NAME, "RESULTSET").type(schema) .noDefault(); return builder.endRecord(); }
private Object convertField(FieldValue fieldValue, org.apache.avro.Schema fieldSchema) { boolean nullable = AvroUtils.isNullable(fieldSchema); if (nullable && fieldValue.isNull()) { return null; } fieldSchema = AvroUtils.unwrapIfNullable(fieldSchema); switch (fieldValue.getAttribute()) { case PRIMITIVE: if (BigQueryType.TIMESTAMP.toString().equals(fieldSchema.getProp(TALEND_COLUMN_DB_TYPE))) { Double doubleValue = ((Long) fieldValue.getTimestampValue()) / 1000000.0; return formatTimestamp(doubleValue.toString()); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._double())) { return fieldValue.getDoubleValue(); } else if (AvroUtils.isSameType(fieldSchema, AvroUtils._boolean())) { return fieldValue.getBooleanValue(); } else { return fieldValue.getValue(); } case REPEATED: List<Object> listValue = new ArrayList<>(); List<FieldValue> repeatedChildValue = fieldValue.getRepeatedValue(); for (FieldValue childValue : repeatedChildValue) { listValue.add(convertField(childValue, fieldSchema.getElementType())); } return listValue; case RECORD: return convertFileds(fieldValue.getRecordValue(), fieldSchema); } throw TalendRuntimeException.build(CommonErrorCodes.UNEXPECTED_ARGUMENT).create(); }
private static Schema _getDeleteSchema() { List<Schema.Field> schemaFields = new ArrayList<>(1); Schema.Field designField = new Schema.Field( AvroConstants.ID, AvroUtils._string(), null, (Object)null); designField.addProp(SchemaConstants.TALEND_IS_LOCKED, "true"); schemaFields.add(designField); Schema schema = Schema.createRecord( "Runtime", null, null, false, schemaFields); return schema; }
protected Object getFieldValue(Object inputValue, Field field) { Schema s = AvroUtils.unwrapIfNullable(field.schema()); if (inputValue != null && inputValue instanceof String && ((String) inputValue).isEmpty()) { return emptyStringValue; } else if (null == inputValue || inputValue instanceof String) { return inputValue; } else if (AvroUtils.isSameType(s, AvroUtils._date())) {//TODO improve the performance as no need to get the runtimefield object from map every time s = AvroUtils.unwrapIfNullable(runtimeField.schema()); } else {
public static Schema getBasicSchema(String pattern) { return SchemaBuilder.builder().record("Schema").fields() // .name("TestBoolean").type().booleanType().noDefault() // .name("TestByte").type(AvroUtils._byte()).noDefault() // .name("TestBytes").type(AvroUtils._bytes()).noDefault() // .name("TestChar").type(AvroUtils._character()).noDefault() // .name("TestDate").prop(SchemaConstants.TALEND_COLUMN_PATTERN, pattern)// .type(AvroUtils._date()).noDefault() // .name("TestDouble").type().doubleType().noDefault() // .name("TestFloat").type().floatType().noDefault() // .name("TestBigDecimal").type(AvroUtils._decimal()).noDefault()// .name("TestInteger").type().intType().noDefault() // .name("TestLong").type().longType().noDefault() // .name("TestObject").type(AvroUtils._bytes()).noDefault().endRecord(); }
@Override public String convertToDatum(T value) { if (value == null) { return null; } if (thousandsSepChar != null || decimalSepChar != null) { return FormatterUtils.formatNumber(new BigDecimal(String.valueOf(value)).toPlainString(), thousandsSepChar, decimalSepChar); } else { if (value instanceof BigDecimal) { String precision = field.getProp(SchemaConstants.TALEND_COLUMN_PRECISION); if (precision != null) { return ((BigDecimal) value).setScale(Integer.valueOf(precision), RoundingMode.HALF_UP).toPlainString(); } else { return ((BigDecimal) value).toPlainString(); } } else if (AvroUtils.isSameType(AvroUtils._decimal(), AvroUtils.unwrapIfNullable(field.schema()))) { String precision = field.getProp(SchemaConstants.TALEND_COLUMN_PRECISION); if (precision != null) { return new BigDecimal(String.valueOf(value)).setScale(Integer.valueOf(precision), RoundingMode.HALF_UP) .toPlainString(); } } return String.valueOf(value); } }
protected Schema inferSchemaDynamicTableEntity(DynamicTableEntity entity) { List<Field> fields = new ArrayList<>(); fields.add(new Field("PartitionKey", AvroUtils._string(), null, (Object) null)); fields.add(new Field("RowKey", AvroUtils._string(), null, (Object) null)); fields.add(new Field("Timestamp", AvroUtils._date(), null, (Object) null)); // FIXME set tableName properly and manage nameMappings String tableName = "schemaInfered"; for (Entry<String, EntityProperty> f : entity.getProperties().entrySet()) { String fieldName = f.getKey(); Field field = getAvroMapping(fieldName, f.getValue()); fields.add(field); } return Schema.createRecord(tableName, null, null, false, fields); }
@Before public void createSchema() { schema = SchemaBuilder.builder() .record("main") .fields() .name("integer_fld") .type(Schema.createUnion(AvroUtils._int(), Schema.create(Schema.Type.NULL))) .withDefault(1) .name("string_fld") .type(Schema.createUnion(AvroUtils._string(), Schema.create(Schema.Type.NULL))) .noDefault() .name("date_fld") .type(Schema.createUnion(AvroUtils._logicalDate(), Schema.create(Schema.Type.NULL))) .noDefault() .endRecord(); }
/** * Generate new field, * if the user did not set an output field path, use the input field name and the operation name * if the user set an output field path, use the name of the last element in the path. * * @param originalField the field to copy * @param operationProps the operation to execute * @return */ public static Schema.Field genField(Schema.Field originalField, AggregateOperationProperties operationProps) { Schema newFieldSchema = AvroUtils.wrapAsNullable(genFieldType(originalField.schema(), operationProps.operation.getValue())); String outputFieldPath = operationProps.outputFieldPath.getValue(); String newFieldName; if (StringUtils.isEmpty(outputFieldPath)) { newFieldName = genOutputFieldNameByOpt(operationProps.fieldPath.getValue(), operationProps.operation.getValue()); } else { newFieldName = outputFieldPath.contains(".") ? StringUtils.substringAfterLast(outputFieldPath, ".") : outputFieldPath; } return new Schema.Field(newFieldName, newFieldSchema, originalField.doc(), originalField.defaultVal()); }
/** * Checks {@link AvroTypeConverter#convertToAvro(String, String)} returns Integer avro schema in case TalendType.INTEGER * Talend type * is passed */ @Test public void testConvertToAvroInteger() { Schema expectedSchema = AvroUtils._int(); assertEquals(expectedSchema, AvroTypeConverter.convertToAvro(TalendType.INTEGER, null)); }
@Override public org.apache.avro.Schema getSchema() { return AvroUtils._double(); }