/** * Searches column names by indexes on a given Parquet file schema, and returns its corresponded * Parquet schema types. * * @param schema Message schema where to search for column names. * @param colNames List of column names. * @param colIndexes List of column indexes. * @return A MessageType object of the column names found. */ public static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) { List<Type> schemaTypes = new ArrayList<Type>(); for (Integer i : colIndexes) { if (i < colNames.size()) { if (i < schema.getFieldCount()) { schemaTypes.add(schema.getType(i)); } else { //prefixing with '_mask_' to ensure no conflict with named //columns in the file schema schemaTypes.add( Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i))); } } } return new MessageType(schema.getName(), schemaTypes); }
/** * Searches column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ public static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
return new MessageType(schema.getName(), schemaTypes);
String name = fullSchema.getName();
String name = fullSchema.getName();
/** * {@inheritDoc} */ @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("message ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") .append(" {\n"); membersDisplayString(sb, " "); sb.append("}\n"); }
ReadBuilder(com.netflix.iceberg.Schema schema, MessageType type) { this.schema = schema; this.avroSchemas = AvroSchemaUtil.convertTypes(schema.asStruct(), type.getName()); this.type = type; }
/** * {@inheritDoc} */ @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("message ") .append(getName()) .append(getLogicalTypeAnnotation() == null ? "" : " (" + getLogicalTypeAnnotation().toString() +")") .append(" {\n"); membersDisplayString(sb, " "); sb.append("}\n"); }
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> fileMetadata, MessageType fileMessageType, ReadContext readContext) { // This is the type created in init that was based on the file's schema. The schema that this // will pass to the wrapped ReadSupport needs to match the expected schema's names. Rather than // renaming the file's schema, convert the expected schema to Parquet. This relies on writing // files with the correct schema. // TODO: this breaks when columns are reordered. MessageType readSchema = ParquetSchemaUtil.convert(expectedSchema, fileMessageType.getName()); return wrapped.prepareForRead(configuration, fileMetadata, readSchema, readContext); }
public PigParquetReader(Schema readSchema, MessageType fileSchema, Map<Integer, Object> partitionValues) { this.reader = buildReader(convert(readSchema, fileSchema.getName()), readSchema, partitionValues); }
public MessageType union(MessageType toMerge, boolean strict) { return new MessageType(this.getName(), mergeFields(toMerge, strict)); }
public Schema convert(MessageType parquetSchema) { return convertFields(parquetSchema.getName(), parquetSchema.getFields()); }
public MessageType union(MessageType toMerge, boolean strict) { return new MessageType(this.getName(), mergeFields(toMerge, strict)); }
@Override public Type message(MessageType message, List<Type> fields) { Types.MessageTypeBuilder builder = Types.buildMessage(); boolean hasChange = false; int fieldCount = 0; for (int i = 0; i < fields.size(); i += 1) { Type originalField = message.getType(i); Type field = fields.get(i); if (selectedIds.contains(getId(originalField))) { builder.addField(originalField); fieldCount += 1; } else if (field != null) { builder.addField(field); fieldCount += 1; hasChange = true; } } if (hasChange) { return builder.named(message.getName()); } else if (message.getFieldCount() == fieldCount) { return message; } return builder.named(message.getName()); }
public static MessageType addFallbackIds(MessageType fileSchema) { MessageTypeBuilder builder = org.apache.parquet.schema.Types.buildMessage(); int ordinal = 1; // ids are assigned starting at 1 for (Type type : fileSchema.getFields()) { builder.addField(type.withId(ordinal)); ordinal += 1; } return builder.named(fileSchema.getName()); } }
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
@Override public void visit(MessageType messageType) { SchemaElement element = new SchemaElement(messageType.getName()); if (messageType.getId() != null) { element.setField_id(messageType.getId().intValue()); } visitChildren(result, messageType.asGroupType(), element); }
@Override public void visit(MessageType messageType) { SchemaElement element = new SchemaElement(messageType.getName()); if (messageType.getId() != null) { element.setField_id(messageType.getId().intValue()); } visitChildren(result, messageType.asGroupType(), element); }