.collect(toList()); MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
/** * {@inheritDoc} */ @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("message ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") .append(" {\n"); membersDisplayString(sb, " "); sb.append("}\n"); }
/** * {@inheritDoc} */ @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("message ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") .append(" {\n"); membersDisplayString(sb, " "); sb.append("}\n"); }
public MessageType union(MessageType toMerge, boolean strict) { return new MessageType(this.getName(), mergeFields(toMerge, strict)); }
@Override @SuppressWarnings("deprecation") public ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType messageType) { List<parquet.schema.Type> fields = columns.stream() .filter(column -> !column.isPartitionKey()) .map(column -> getParquetType(column, messageType, useParquetColumnNames)) .filter(Objects::nonNull) .collect(toList()); MessageType requestedProjection = new MessageType(messageType.getName(), fields); return new ReadContext(requestedProjection); }
@Override public void visit(MessageType messageType) { SchemaElement element = new SchemaElement(messageType.getName()); visitChildren(result, messageType.asGroupType(), element); }
/** * Searchs column names by index on a given Parquet file schema, and returns its corresponded * Parquet schema types. * * @param schema Message schema where to search for column names. * @param colNames List of column names. * @param colIndexes List of column indexes. * @return A MessageType object of the column names found. */ private static MessageType getSchemaByIndex(MessageType schema, List<String> colNames, List<Integer> colIndexes) { List<Type> schemaTypes = new ArrayList<Type>(); for (Integer i : colIndexes) { if (i < colNames.size()) { if (i < schema.getFieldCount()) { schemaTypes.add(schema.getType(i)); } else { //prefixing with '_mask_' to ensure no conflict with named //columns in the file schema schemaTypes.add(Types.optional(PrimitiveTypeName.BINARY).named("_mask_" + colNames.get(i))); } } } return new MessageType(schema.getName(), schemaTypes); }
/** * filters a Parquet schema based on a pig schema for projection * @param schemaToFilter the schema to be filter * @param requestedPigSchema the pig schema to filter it with * @param requiredFieldList projected required fields * @return the resulting filtered schema */ public MessageType filter(MessageType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldList) { try { if (DEBUG) LOG.debug("filtering schema:\n" + schemaToFilter + "\nwith requested pig schema:\n " + requestedPigSchema); List<Type> result = columnAccess.filterTupleSchema(schemaToFilter, requestedPigSchema, requiredFieldList); if (DEBUG) LOG.debug("schema:\n" + schemaToFilter + "\nfiltered to:\n" + result); return new MessageType(schemaToFilter.getName(), result); } catch (RuntimeException e) { throw new RuntimeException("can't filter " + schemaToFilter + " with " + requestedPigSchema, e); } }
/** * Searchs column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ private static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
/** * Creates a new instance. * @param descriptor the target descriptor * @param schema the file schema * @param configuration the mapping configuration */ public DataModelMaterializer( DataModelDescriptor descriptor, MessageType schema, DataModelMapping configuration) { List<Mapping> mappings = computeMapping(descriptor, schema, configuration); List<Type> fields = new ArrayList<>(); List<PropertyDescriptor> properties = new ArrayList<>(); for (Mapping mapping : mappings) { if (mapping != null) { fields.add(new PrimitiveType( Repetition.OPTIONAL, mapping.source.getType(), mapping.source.getPath()[0])); properties.add(mapping.target); } } this.materializeSchema = new MessageType(schema.getName(), fields); this.root = new DataModelConverter(properties); }
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
.collect(toList()); MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
.collect(toList()); MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
public static void showDetails(PrettyPrintWriter out, FileMetaData meta) { out.format("creator: %s%n", meta.getCreatedBy()); Map<String,String> extra = meta.getKeyValueMetaData(); if (extra != null) { for (Map.Entry<String,String> entry : meta.getKeyValueMetaData().entrySet()) { out.print("extra: "); out.incrementTabLevel(); out.format("%s = %s%n", entry.getKey(), entry.getValue()); out.decrementTabLevel(); } } out.println(); out.format("file schema: %s%n", meta.getSchema().getName()); out.rule('-'); showDetails(out, meta.getSchema()); }