public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
return Optional.empty(); boolean required = columnIO.getType().getRepetition() != OPTIONAL; int repetitionLevel = columnIO.getRepetitionLevel(); int definitionLevel = columnIO.getDefinitionLevel(); if (ROW.equals(type.getTypeSignature().getBase())) { GroupColumnIO groupColumnIO = (GroupColumnIO) columnIO; RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType()); return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
void add(ColumnIO child) { children.add(child); childrenByName.put(child.getType().getName(), child); ++ childrenSize; }
@Override public void addFloat(float value) { if (DEBUG) log("addFloat("+value+")"); getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
private void writeNullForMissingFields(final int to) { final int from = currentIndex[currentLevel]; for (;currentIndex[currentLevel]<=to; ++currentIndex[currentLevel]) { try { ColumnIO undefinedField = ((GroupColumnIO)currentColumnIO).getChild(currentIndex[currentLevel]); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath())+".writeNull("+r[currentLevel]+","+d+")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls from " + from + " to " + to + ". current index: "+currentIndex[currentLevel], e); } } }
@Override void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path); for (ColumnIO child : this.children) { String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1); int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1); newFieldPath[fieldPath.length] = child.getType().getName(); newIndexFieldPath[indexFieldPath.length] = child.getIndex(); List<ColumnIO> newRepetition; if (child.getType().isRepetition(REPEATED)) { newRepetition = new ArrayList<ColumnIO>(repetition); newRepetition.add(child); } else { newRepetition = repetition; } List<ColumnIO> newPath = new ArrayList<ColumnIO>(path); newPath.add(child); child.setLevels( // the type repetition level increases whenever there's a possible repetition child.getType().isRepetition(REPEATED) ? r + 1 : r, // the type definition level increases whenever a field can be missing (not required) !child.getType().isRepetition(REQUIRED) ? d + 1 : d, newFieldPath, newIndexFieldPath, newRepetition, newPath ); } }
public void printState() { log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]); if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) { // sanity check throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)"); } }
@Override void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path); for (ColumnIO child : this.children) { String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1); int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1); newFieldPath[fieldPath.length] = child.getType().getName(); newIndexFieldPath[indexFieldPath.length] = this.getType().asGroupType().getFieldIndex(child.getType().getName()); List<ColumnIO> newRepetition; if (child.getType().getRepetition() == REPEATED) { newRepetition = new ArrayList<ColumnIO>(repetition); newRepetition.add(child); } else { newRepetition = repetition; } List<ColumnIO> newPath = new ArrayList<ColumnIO>(path); newPath.add(child); child.setLevels( // the type repetition level increases whenever there's a possible repetition child.getType().getRepetition() == REPEATED ? r + 1 : r, // the type definition level increases whenever a field can be missing (not required) child.getType().getRepetition() != REQUIRED ? d + 1 : d, newFieldPath, newIndexFieldPath, newRepetition, newPath ); } }
@Override public void endField(String field, int index) { if (DEBUG) log("endField("+field+", "+index+")"); currentColumnIO = currentColumnIO.getParent(); currentIndex[currentLevel] = index + 1; r[currentLevel] = currentLevel == 0 ? 0 : r[currentLevel - 1]; if (DEBUG) printState(); }
private void setRepetitionLevel() { r[currentLevel] = currentColumnIO.getRepetitionLevel(); if (DEBUG) log("r: " + r[currentLevel]); }
PrimitiveColumnIO getLast() { return children.get(children.size()-1).getLast(); }
PrimitiveColumnIO getFirst() { return children.get(0).getFirst(); }
private static int getPathIndex(List<PrimitiveColumnIO> columns, List<String> path) { int maxLevel = path.size(); int index = -1; for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { ColumnIO[] fields = columns.get(columnIndex).getPath(); if (fields.length <= maxLevel) { continue; } if (fields[maxLevel].getName().equalsIgnoreCase(path.get(maxLevel - 1))) { boolean match = true; for (int level = 0; level < maxLevel - 1; level++) { if (!fields[level + 1].getName().equalsIgnoreCase(path.get(level))) { match = false; } } if (match) { index = columnIndex; } } } return index; }
@Override List<String[]> getColumnNames() { ArrayList<String[]> result = new ArrayList<String[]>(); for (ColumnIO c : children) { result.addAll(c.getColumnNames()); } return result; }
ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().getRepetition() == Repetition.REPEATED) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
void add(ColumnIO child) { children.add(child); childrenByName.put(child.getType().getName(), child); ++ childrenSize; }
@Override public void addInteger(int value) { if (DEBUG) log("addInt(" + value + ")"); emptyField = false; getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO)currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }