parquet.schema.MessageType java code examples

Refine search

Type

public static MessageType convert(final List<String> columnNames, final List<TypeInfo> columnTypes)
{
  return new MessageType("hive_schema", convertTypes(columnNames, columnTypes));
}

public static parquet.schema.Type getParquetTypeByName(String columnName, MessageType messageType)
{
  if (messageType.containsField(columnName)) {
    return messageType.getType(columnName);
  }
  // parquet is case-sensitive, but hive is not. all hive columns get converted to lowercase
  // check for direct match above but if no match found, try case-insensitive match
  for (parquet.schema.Type type : messageType.getFields()) {
    if (type.getName().equalsIgnoreCase(columnName)) {
      return type;
    }
  }
  return null;
}

    .collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);

public static int getFieldIndex(MessageType fileSchema, String name)
{
  try {
    return fileSchema.getFieldIndex(name.toLowerCase(Locale.ENGLISH));
  }
  catch (InvalidRecordException e) {
    for (parquet.schema.Type type : fileSchema.getFields()) {
      if (type.getName().equalsIgnoreCase(name)) {
        return fileSchema.getFieldIndex(type.getName());
      }
    }
    return -1;
  }
}

  public static parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
  {
    if (useParquetColumnNames) {
      return getParquetTypeByName(column.getName(), messageType);
    }

    if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
      return messageType.getType(column.getHiveColumnIndex());
    }
    return null;
  }
}

public List<ColumnDescriptor> getColumns() {
 List<String[]> paths = this.getPaths(0);
 List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>(paths.size());
 for (String[] path : paths) {
  // TODO: optimize this
  columns.add(new ColumnDescriptor(path, getType(path).asPrimitiveType().getPrimitiveTypeName(), getMaxRepetitionLevel(path), getMaxDefinitionLevel(path)));
 }
 return columns;
}

public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
 if(requestedFields == Fields.UNKNOWN)
  requestedFields = Fields.ALL;
 Fields newFields = Fields.NONE;
 List<Type> newSchemaFields = new ArrayList<Type>();
 int schemaSize = fileSchema.getFieldCount();
 for (int i = 0; i < schemaSize; i++) {
  Type type = fileSchema.getType(i);
  Fields name = new Fields(type.getName());
  if(requestedFields.contains(name)) {
   newFields = newFields.append(name);
   newSchemaFields.add(type);
  }
 }
 this.sourceFields = newFields;
 this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}

@Override
public void write(TupleEntry record) {
 recordConsumer.startMessage();
 final List<Type> fields = rootSchema.getFields();
 for (int i = 0; i < fields.size(); i++) {
  Type field = fields.get(i);
  if (record == null || record.getObject(field.getName()) == null) {
   continue;
  }
  recordConsumer.startField(field.getName(), i);
  if (field.isPrimitive()) {
   writePrimitive(record, field.asPrimitiveType());
  } else {
   throw new UnsupportedOperationException("Complex type not implemented");
  }
  recordConsumer.endField(field.getName(), i);
 }
 recordConsumer.endMessage();
}

    .toArray(String[]::new);
ColumnPath columnPath = ColumnPath.get(path);
PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName();
ColumnChunkMetaData column = ColumnChunkMetaData.get(
    columnPath,

/**
 * Returns equivalent Hive table schema read from a parquet file
 *
 * @param messageType : Parquet Schema
 * @return : Hive Table schema read from parquet file MAP[String,String]
 */
public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
  throws IOException {
 Map<String, String> schema = Maps.newLinkedHashMap();
 List<Type> parquetFields = messageType.getFields();
 for (Type parquetType : parquetFields) {
  StringBuilder result = new StringBuilder();
  String key = parquetType.getName();
  if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
   result.append(createHiveArray(parquetType, ""));
  } else {
   result.append(convertField(parquetType));
  }
  schema.put(hiveCompatibleFieldName(key, false), result.toString());
 }
 return schema;
}

  private void initializeColumnReaders()
  {
    for (ColumnDescriptor column : requestedSchema.getColumns()) {
      columnReadersMap.put(column, ParquetColumnReader.createReader(column));
    }
  }
}

String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));

public MessageType getConvertedMessageType() {
 // the root should be a GroupType
 if (currentType == null)
  return new MessageType(currentName, new ArrayList<Type>());
 GroupType rootType = currentType.asGroupType();
 return new MessageType(currentName, rootType.getFields());
}

private SchemaCompatibilityValidator(MessageType schema) {
 for (ColumnDescriptor cd : schema.getColumns()) {
  ColumnPath columnPath = ColumnPath.get(cd.getPath());
  columnsAccordingToSchema.put(columnPath, cd);
  OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
  if (ot != null) {
   originalTypes.put(columnPath, ot);
  }
 }
}

private static List<Mapping> computeMappingByPosition(
    DataModelDescriptor target, MessageType source) {
  if (LOG.isDebugEnabled()) {
    LOG.debug(MessageFormat.format(
        "Mapping columns by their position: model={0}", //$NON-NLS-1$
        target.getDataModelClass().getName()));
  }
  List<ColumnDescriptor> sources = source.getColumns();
  List<? extends PropertyDescriptor> targets = target.getPropertyDescriptors();
  List<Mapping> mappings = new ArrayList<>();
  int limit = Math.min(sources.size(), targets.size());
  for (int i = 0; i < limit; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    PropertyDescriptor t = targets.get(i);
    mappings.add(new Mapping(s, sType, t));
  }
  for (int i = limit, n = sources.size(); i < n; i++) {
    ColumnDescriptor s = sources.get(i);
    Type sType = source.getType(s.getPath());
    mappings.add(new Mapping(s, sType, null));
  }
  for (int i = limit, n = targets.size(); i < n; i++) {
    mappings.add(new Mapping(null, null, targets.get(i)));
  }
  return mappings;
}

public MessageType union(MessageType toMerge, boolean strict) {
 return new MessageType(this.getName(), mergeFields(toMerge, strict));
}

/**
 * {@inheritDoc}
 */
@Override
public void writeToStringBuilder(StringBuilder sb, String indent) {
 sb.append("message ")
   .append(getName())
   .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
   .append(" {\n");
 membersDisplayString(sb, "  ");
 sb.append("}\n");
}

public void initialize(MessageType requestedSchema, MessageType fileSchema,
            Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 this.requestedSchema = requestedSchema;
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = this.requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, extraMetadata, fileSchema,
   new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

MessageType parsed = MessageTypeParser.parseMessageType(example);
MessageType manuallyMade =
  new MessageType("Document",
    new PrimitiveType(REQUIRED, INT64, "DocId"),
    new GroupType(OPTIONAL, "Links",
assertEquals(manuallyMade, parsed);
MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
  new MessageType("m",
    new GroupType(REQUIRED, "a",
      new PrimitiveType(REQUIRED, BINARY, "b")),
parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());

public static void showDetails(PrettyPrintWriter out, MessageType type) {
 List<String> cpath = new ArrayList<String>();
 for (Type ftype : type.getFields()) {
  showDetails(out, ftype, 0, type, cpath);
 }
}

Javadoc

The root of a schema

Most used methods

Popular in Java

Making http requests using okhttp
getExternalFilesDir (Context)
findViewById (Activity)
notifyDataSetChanged (ArrayAdapter)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
Top PhpStorm plugins

How to useMessageType in parquet.schema

Best Java code snippets using parquet.schema.MessageType (Showing top 20 results out of 315)

Refine search

How to use
MessageType
in
parquet.schema