public MessageColumnIORecordConsumer(ColumnWriteStore columns) { this.columns = columns; int maxDepth = 0; this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()]; for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) { ColumnWriter w = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor()); maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length); columnWriter[primitiveColumnIO.getId()] = w; buildGroupToLeafWriterMap(primitiveColumnIO, w); } fieldsWritten = new FieldsMarker[maxDepth]; for (int i = 0; i < maxDepth; i++) { fieldsWritten[i] = new FieldsMarker(); } r = new int[maxDepth]; }
private void buildGroupToLeafWriterMap(PrimitiveColumnIO primitive, ColumnWriter writer) { GroupColumnIO parent = primitive.getParent(); do { getLeafWriters(parent).add(writer); parent = parent.getParent(); } while (parent != null); }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
public MessageColumnIORecordConsumer(ColumnWriteStore columns) { this.columns = columns; int maxDepth = 0; this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()]; for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) { ColumnWriter w = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor()); maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length); columnWriter[primitiveColumnIO.getId()] = w; buildGroupToLeafWriterMap(primitiveColumnIO, w); } fieldsWritten = new FieldsMarker[maxDepth]; for (int i = 0; i < maxDepth; i++) { fieldsWritten[i] = new FieldsMarker(); } r = new int[maxDepth]; }
@Override public void startGroup() { if (DEBUG) log("startGroup()"); GroupColumnIO group = (GroupColumnIO) currentColumnIO; // current group is not null, need to flush all the nulls that were cached before if (hasNullCache(group)) { flushCachedNulls(group); } ++currentLevel; r[currentLevel] = r[currentLevel - 1]; int fieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); fieldsWritten[currentLevel].reset(fieldsCount); if (DEBUG) printState(); }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
@Override public void startGroup() { if (DEBUG) log("startGroup()"); GroupColumnIO group = (GroupColumnIO) currentColumnIO; // current group is not null, need to flush all the nulls that were cached before if (hasNullCache(group)) { flushCachedNulls(group); } ++currentLevel; r[currentLevel] = r[currentLevel - 1]; int fieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); fieldsWritten[currentLevel].reset(fieldsCount); if (DEBUG) printState(); }
private void setRepetitionLevel() { r[currentLevel] = currentColumnIO.getRepetitionLevel(); if (DEBUG) log("r: {}", r[currentLevel]); }
private void flushCachedNulls(GroupColumnIO group) { //flush children first for (int i = 0; i < group.getChildrenCount(); i++) { ColumnIO child = group.getChild(i); if (child instanceof GroupColumnIO) { flushCachedNulls((GroupColumnIO) child); } } //then flush itself writeNullToLeaves(group); }
/** * Flush null for all groups */ @Override public void flush() { flushCachedNulls(MessageColumnIO.this); } }
private void buildGroupToLeafWriterMap(PrimitiveColumnIO primitive, ColumnWriter writer) { GroupColumnIO parent = primitive.getParent(); do { getLeafWriters(parent).add(writer); parent = parent.getParent(); } while (parent != null); }
@Override public void startField(String field, int index) { try { if (DEBUG) log("startField({}, {})", field, index); currentColumnIO = ((GroupColumnIO) currentColumnIO).getChild(index); emptyField = true; if (DEBUG) printState(); } catch (RuntimeException e) { throw new ParquetEncodingException("error starting field " + field + " at " + index, e); } }
@Override public void endMessage() { writeNullForMissingFieldsAtCurrentLevel(); columns.endRecord(); if (DEBUG) log("< MESSAGE END >"); if (DEBUG) printState(); }
private void setRepetitionLevel() { r[currentLevel] = currentColumnIO.getRepetitionLevel(); if (DEBUG) log("r: {}", r[currentLevel]); }
@Override public void startField(String field, int index) { try { if (DEBUG) log("startField({}, {})", field, index); currentColumnIO = ((GroupColumnIO) currentColumnIO).getChild(index); emptyField = true; if (DEBUG) printState(); } catch (RuntimeException e) { throw new ParquetEncodingException("error starting field " + field + " at " + index, e); } }
@Override public void addFloat(float value) { if (DEBUG) log("addFloat({})", value); emptyField = false; getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
@Override public void addBinary(Binary value) { if (DEBUG) log("addBinary({} bytes)", value.length()); emptyField = false; getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
private void printState() { if (DEBUG) { log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]); if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) { // sanity check throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)"); } } }
/** * Flush null for all groups */ @Override public void flush() { flushCachedNulls(MessageColumnIO.this); } }
private void writeNull(ColumnIO undefinedField, int r, int d) { if (undefinedField.getType().isPrimitive()) { columnWriter[((PrimitiveColumnIO) undefinedField).getId()].writeNull(r, d); } else { GroupColumnIO groupColumnIO = (GroupColumnIO) undefinedField; // only cache the repetition level, the definition level should always be the definition level of the parent node cacheNullForGroup(groupColumnIO, r); } }