Refine search
public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes) { return new MessageType("hive_schema", convertTypes(columnNames, columnTypes)); }
public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) { if (messageType.containsField(columnName)) { return messageType.getType(columnName); } // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase // check for direct match above but if no match found, try case-insensitive match for (parquet.schema.Type type : messageType.getFields()) { if (type.getName().equalsIgnoreCase(columnName)) { return type; } } return null; }
.collect(toList()); MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
public static int getFieldIndex(MessageType fileSchema, String name) { try { return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH)); } catch (InvalidRecordException e) { for (parquet.schema.Type type : fileSchema.getFields()) { if (type.getName().equalsIgnoreCase(name)) { return fileSchema.getFieldIndex(type.getName()); } } return -1; } }
public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) { if (useParquetColumnNames) { return getParquetTypeByName(column.getName(), messageType); } if (column.getHiveColumnIndex() < messageType.getFieldCount()) { return messageType.getType(column.getHiveColumnIndex()); } return null; } }
public List<ColumnDescriptor> getColumns() { List<String[]> paths = this.getPaths(0); List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size()); for (String[] path : paths) { // TODO: optimize this columns.add(new ColumnDescriptor(path, getType(path).asPrimitiveType().getPrimitiveTypeName(), getMaxRepetitionLevel(path), getMaxDefinitionLevel(path))); } return columns; }
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) { if(requestedFields == Fields.UNKNOWN) requestedFields = Fields.ALL; Fields newFields = Fields.NONE; List<Type> newSchemaFields = new ArrayList<Type>(); int schemaSize = fileSchema.getFieldCount(); for (int i = 0; i < schemaSize; i++) { Type type = fileSchema.getType(i); Fields name = new Fields(type.getName()); if(requestedFields.contains(name)) { newFields = newFields.append(name); newSchemaFields.add(type); } } this.sourceFields = newFields; this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields); }
@Override public void write(TupleEntry record) { recordConsumer.startMessage(); final List<Type> fields = rootSchema.getFields(); for (int i = 0; i < fields.size(); i++) { Type field = fields.get(i); if (record == null || record.getObject(field.getName()) == null) { continue; } recordConsumer.startField(field.getName(), i); if (field.isPrimitive()) { writePrimitive(record, field.asPrimitiveType()); } else { throw new UnsupportedOperationException("Complex type not implemented"); } recordConsumer.endField(field.getName(), i); } recordConsumer.endMessage(); }
.toArray(String[]::new); ColumnPath columnPath = ColumnPath.get(path); PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(); ColumnChunkMetaData column = ColumnChunkMetaData.get( columnPath,
/** * Returns equivalent Hive table schema read from a parquet file * * @param messageType : Parquet Schema * @return : Hive Table schema read from parquet file MAP[String,String] */ public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException { Map<String, String> schema = Maps.newLinkedHashMap(); List<Type> parquetFields = messageType.getFields(); for (Type parquetType : parquetFields) { StringBuilder result = new StringBuilder(); String key = parquetType.getName(); if (parquetType.isRepetition(Type.Repetition.REPEATED)) { result.append(createHiveArray(parquetType, "")); } else { result.append(convertField(parquetType)); } schema.put(hiveCompatibleFieldName(key, false), result.toString()); } return schema; }
private void initializeColumnReaders() { for (ColumnDescriptor column : requestedSchema.getColumns()) { columnReadersMap.put(column, ParquetColumnReader.createReader(column)); } } }
public MessageType getConvertedMessageType() { // the root should be a GroupType if (currentType == null) return new MessageType(currentName, new ArrayList<Type>()); GroupType rootType = currentType.asGroupType(); return new MessageType(currentName, rootType.getFields()); }
private SchemaCompatibilityValidator(MessageType schema) { for (ColumnDescriptor cd : schema.getColumns()) { ColumnPath columnPath = ColumnPath.get(cd.getPath()); columnsAccordingToSchema.put(columnPath, cd); OriginalType ot = schema.getType(cd.getPath()).getOriginalType(); if (ot != null) { originalTypes.put(columnPath, ot); } } }
private static List<Mapping> computeMappingByPosition( DataModelDescriptor target, MessageType source) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Mapping columns by their position: model={0}", //$NON-NLS-1$ target.getDataModelClass().getName())); } List<ColumnDescriptor> sources = source.getColumns(); List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors(); List<Mapping> mappings = new ArrayList<>(); int limit = Math.min(sources.size(), targets.size()); for (int i = 0; i < limit; i++) { ColumnDescriptor s = sources.get(i); Type sType = source.getType(s.getPath()); PropertyDescriptor t = targets.get(i); mappings.add(new Mapping(s, sType, t)); } for (int i = limit, n = sources.size(); i < n; i++) { ColumnDescriptor s = sources.get(i); Type sType = source.getType(s.getPath()); mappings.add(new Mapping(s, sType, null)); } for (int i = limit, n = targets.size(); i < n; i++) { mappings.add(new Mapping(null, null, targets.get(i))); } return mappings; }
public MessageType union(MessageType toMerge, boolean strict) { return new MessageType(this.getName(), mergeFields(toMerge, strict)); }
/** * {@inheritDoc} */ @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append("message ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") .append(" {\n"); membersDisplayString(sb, " "); sb.append("}\n"); }
public void initialize(MessageType requestedSchema, MessageType fileSchema, Map<String, String> extraMetadata, Map<String, String> readSupportMetadata, Path file, List<BlockMetaData> blocks, Configuration configuration) throws IOException { this.requestedSchema = requestedSchema; this.fileSchema = fileSchema; this.file = file; this.columnCount = this.requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, extraMetadata, fileSchema, new ReadSupport.ReadContext(requestedSchema, readSupportMetadata)); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, file, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } LOG.info("RecordReader initialized will read a total of " + total + " records."); }
MessageType parsed = MessageTypeParser.parseMessageType(example); MessageType manuallyMade = new MessageType("Document", new PrimitiveType(REQUIRED, INT64, "DocId"), new GroupType(OPTIONAL, "Links", assertEquals(manuallyMade, parsed); MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString()); new MessageType("m", new GroupType(REQUIRED, "a", new PrimitiveType(REQUIRED, BINARY, "b")), parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
public static void showDetails(PrettyPrintWriter out, MessageType type) { List<String> cpath = new ArrayList<String>(); for (Type ftype : type.getFields()) { showDetails(out, ftype, 0, type, cpath); } }