public static GroupColumnIO getMapKeyValueColumn(GroupColumnIO groupColumnIO) { while (groupColumnIO.getChildrenCount() == 1) { groupColumnIO = (GroupColumnIO) groupColumnIO.getChild(0); } return groupColumnIO; }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
public ListWriteProtocol(GroupColumnIO columnIO, ThriftField values, Events returnClause) { super(returnClause); this.listContent = columnIO.getChild(0); this.contentProtocol = getProtocol(values, listContent, new Events() { int consumedRecords = 0; @Override public void start() { } @Override public void end() { ++ consumedRecords; if (consumedRecords == size) { currentProtocol = ListWriteProtocol.this; consumedRecords = 0; } } }); }
@Override public void visit(GroupType groupType) { GroupColumnIO newIO; if (groupType.getRepetition() == Repetition.REPEATED) { newIO = new GroupColumnIO(groupType, current, current.getChildrenCount()); } else { newIO = new GroupColumnIO(groupType, current, current.getChildrenCount()); } current.add(newIO); visitChildren(newIO, groupType); }
@Override public void startGroup() { if (DEBUG) log("startGroup()"); ++ currentLevel; r[currentLevel] = r[currentLevel - 1]; int fieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount(); fieldsWritten[currentLevel].reset(fieldsCount); if (DEBUG) printState(); }
@Override public MessageType getType() { return (MessageType)super.getType(); } }
@Override public void visit(PrimitiveType primitiveType) { PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, current.getChildrenCount(), leaves.size()); current.add(newIO); leaves.add(newIO); }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
@Override public void visit(GroupType groupType) { if (currentRequestedType.isPrimitive()) { incompatibleSchema(groupType, currentRequestedType); } GroupColumnIO newIO = new GroupColumnIO(groupType, current, currentRequestedIndex); current.add(newIO); visitChildren(newIO, groupType, currentRequestedType.asGroupType()); }
public List<String[]> getColumnNames() { return super.getColumnNames(); }
@Override public void visit(PrimitiveType primitiveType) { if (!currentRequestedType.isPrimitive() || (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) { incompatibleSchema(primitiveType, currentRequestedType); } PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size()); current.add(newIO); leaves.add(newIO); }
public MapWriteProtocol(GroupColumnIO columnIO, MapType type, Events returnClause) { super(returnClause); this.mapContent = (GroupColumnIO)columnIO.getChild(0); this.key = mapContent.getChild(0); this.value = mapContent.getChild(1); this.keyProtocol = getProtocol(type.getKey(), this.key, new Events() { @Override
@Override public void startMessage() { if (DEBUG) log("< MESSAGE START >"); currentColumnIO = MessageColumnIO.this; r[0] = 0; int numberOfFieldsToVisit = ((GroupColumnIO)currentColumnIO).getChildrenCount(); fieldsWritten[0].reset(numberOfFieldsToVisit); if (DEBUG) printState(); }
@Override public MessageType getType() { return (MessageType)super.getType(); } }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().getRepetition() == Repetition.REPEATED) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
public List<String[]> getColumnNames() { return super.getColumnNames(); }
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
@Override public void startField(String field, int index) { try { if (DEBUG) log("startField(" + field + ", " + index + ")"); currentColumnIO = ((GroupColumnIO)currentColumnIO).getChild(index); emptyField = true; if (DEBUG) printState(); } catch (RuntimeException e) { throw new ParquetEncodingException("error starting field " + field + " at " + index, e); } }
@Override public void endMessage() { writeNullForMissingFields(((GroupColumnIO)currentColumnIO).getChildrenCount() - 1); if (DEBUG) log("< MESSAGE END >"); if (DEBUG) printState(); }
@Override public void writeFieldBegin(TField field) throws TException { if (field.type == TType.STOP) { return; } try { currentType = thriftFieldIdToParquetField[field.id]; if (currentType == null) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } final int index = currentType.getIndex(); recordConsumer.startField(currentType.getName(), index); currentProtocol = children[index]; } catch (ArrayIndexOutOfBoundsException e) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } }