public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) { if (messageType.containsField(columnName)) { return messageType.getType(columnName); } // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase // check for direct match above but if no match found, try case-insensitive match for (parquet.schema.Type type : messageType.getFields()) { if (type.getName().equalsIgnoreCase(columnName)) { return type; } } return null; }
public static int getFieldIndex(MessageType fileSchema, String name) { try { return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH)); } catch (InvalidRecordException e) { for (parquet.schema.Type type : fileSchema.getFields()) { if (type.getName().equalsIgnoreCase(name)) { return fileSchema.getFieldIndex(type.getName()); } } return -1; } }
String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName(); fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
public static void showDetails(PrettyPrintWriter out, MessageType type) { List<String> cpath = new ArrayList<String>(); for (Type ftype : type.getFields()) { showDetails(out, ftype, 0, type, cpath); } }
/** * converts a parquet schema into a pig schema * @param parquetSchema the parquet schema to convert to Pig schema * @return the resulting schema */ public Schema convert(MessageType parquetSchema) { return convertFields(parquetSchema.getFields()); }
/** * Converts a Parquet schema to a Tajo schema. * * @param parquetSchema The Parquet schema to convert. * @return The resulting Tajo schema. */ public Schema convert(MessageType parquetSchema) { return convertFields(parquetSchema.getFields()); }
/** * Returns equivalent Hive table schema read from a parquet file * * @param messageType : Parquet Schema * @return : Hive Table schema read from parquet file MAP[String,String] */ public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType) throws IOException { Map<String, String> schema = Maps.newLinkedHashMap(); List<Type> parquetFields = messageType.getFields(); for (Type parquetType : parquetFields) { StringBuilder result = new StringBuilder(); String key = parquetType.getName(); if (parquetType.isRepetition(Type.Repetition.REPEATED)) { result.append(createHiveArray(parquetType, "")); } else { result.append(convertField(parquetType)); } schema.put(hiveCompatibleFieldName(key, false), result.toString()); } return schema; }
private static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType) { if (messageType.containsField(columnName)) { return messageType.getType(columnName); } // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase // check for direct match above but if no match found, try case-insensitive match for (Type type : messageType.getFields()) { if (type.getName().equalsIgnoreCase(columnName)) { return type; } } return null; } }
this.fields = this.fileMetaDataList.get(0).getFileMetaData().getSchema().getFields(); this.columnCount = this.fileMetaDataList.get(0).getFileMetaData().getSchema().getFieldCount();
@Override public void write(TupleEntry record) { recordConsumer.startMessage(); final List<Type> fields = rootSchema.getFields(); for (int i = 0; i < fields.size(); i++) { Type field = fields.get(i); if (record == null || record.getObject(field.getName()) == null) { continue; } recordConsumer.startField(field.getName(), i); if (field.isPrimitive()) { writePrimitive(record, field.asPrimitiveType()); } else { throw new UnsupportedOperationException("Complex type not implemented"); } recordConsumer.endField(field.getName(), i); } recordConsumer.endMessage(); }