public static GroupColumnIO getMapKeyValueColumn(GroupColumnIO groupColumnIO) { while (groupColumnIO.getChildrenCount() == 1) { groupColumnIO = (GroupColumnIO) groupColumnIO.getChild(0); } return groupColumnIO; }
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
MapType mapType = (MapType) type; GroupColumnIO keyValueColumnIO = getMapKeyValueColumn(groupColumnIO); if (keyValueColumnIO.getChildrenCount() != 2) { return Optional.empty(); GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO; List<Type> types = type.getTypeParameters(); if (groupColumnIO.getChildrenCount() != 1) { return Optional.empty();
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
@Override public void visit(PrimitiveType primitiveType) { PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, current.getChildrenCount(), leaves.size()); current.add(newIO); leaves.add(newIO); }
@Override public void visit(GroupType groupType) { GroupColumnIO newIO; if (groupType.getRepetition() == Repetition.REPEATED) { newIO = new GroupColumnIO(groupType, current, current.getChildrenCount()); } else { newIO = new GroupColumnIO(groupType, current, current.getChildrenCount()); } current.add(newIO); visitChildren(newIO, groupType); }
@Override public void startMessage() { if (DEBUG) log("< MESSAGE START >"); currentColumnIO = MessageColumnIO.this; r[0] = 0; int numberOfFieldsToVisit = ((GroupColumnIO)currentColumnIO).getChildrenCount(); fieldsWritten[0].reset(numberOfFieldsToVisit); if (DEBUG) printState(); }
@Override public void startGroup() { if (DEBUG) log("startGroup()"); ++ currentLevel; r[currentLevel] = r[currentLevel - 1]; int fieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount(); fieldsWritten[currentLevel].reset(fieldsCount); if (DEBUG) printState(); }
@Override public void endMessage() { writeNullForMissingFields(((GroupColumnIO)currentColumnIO).getChildrenCount() - 1); if (DEBUG) log("< MESSAGE END >"); if (DEBUG) printState(); }
@Override public void endGroup() { if (DEBUG) log("endGroup()"); int lastIndex = ((GroupColumnIO)currentColumnIO).getChildrenCount() - 1; writeNullForMissingFields(lastIndex); -- currentLevel; setRepetitionLevel(); if (DEBUG) printState(); }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO)currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
private void writeNull(ColumnIO undefinedField, int r, int d) { if (undefinedField.getType().isPrimitive()) { columnWriter[((PrimitiveColumnIO)undefinedField).getId()].writeNull(r, d); } else { GroupColumnIO groupColumnIO = (GroupColumnIO)undefinedField; int childrenCount = groupColumnIO.getChildrenCount(); for (int i = 0; i < childrenCount; i++) { writeNull(groupColumnIO.getChild(i), r, d); } } }
private void writeNull(ColumnIO undefinedField, int r, int d) { if (undefinedField.getType().isPrimitive()) { columnWriter[((PrimitiveColumnIO)undefinedField).getId()].writeNull(r, d); } else { GroupColumnIO groupColumnIO = (GroupColumnIO)undefinedField; int childrenCount = groupColumnIO.getChildrenCount(); for (int i = 0; i < childrenCount; i++) { writeNull(groupColumnIO.getChild(i), r, d); } } }