private void updateFileStatistics(OrcProto.StripeStatistics stripeStatistics) { List<OrcProto.ColumnStatistics> cs = stripeStatistics.getColStatsList(); List<TreeWriter> allWriters = getAllColumnTreeWriters(treeWriter); for (int i = 0; i < allWriters.size(); i++) { allWriters.get(i).fileStatistics.merge(ColumnStatisticsImpl.deserialize(cs.get(i))); } }
private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) { long result = 0; long numVals = child.fileStatistics.getNumberOfValues(); switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) { case BOOLEAN:
/** * Add a new value to the column. * * @param obj The value to write to the column * @throws IOException if an error occurs during add */ void write(Object obj) throws IOException { if (obj != null) { indexStatistics.increment(); } else { indexStatistics.setNull(); } if (isPresent != null) { isPresent.write(obj == null ? 0 : 1); if (obj == null) { foundNulls = true; } } }
private void writeStripeStatistics(OrcProto.StripeStatistics.Builder builder, TreeWriter treeWriter) { treeWriter.fileStatistics.merge(treeWriter.stripeColStatistics); builder.addColStats(treeWriter.stripeColStatistics.serialize().build()); treeWriter.stripeColStatistics.reset(); for (TreeWriter child : treeWriter.getChildrenWriters()) { writeStripeStatistics(builder, child); } }
private void writeFileStatistics(OrcProto.Footer.Builder builder, TreeWriter writer) throws IOException { builder.addStatistics(writer.fileStatistics.serialize()); for (TreeWriter child : writer.getChildrenWriters()) { writeFileStatistics(builder, child); } }
@Override void reset() { super.reset(); hasMinimum = false; minimum = Double.MAX_VALUE; maximum = Double.MIN_VALUE; sum = 0; }
@Override void merge(ColumnStatisticsImpl other) { if (other instanceof BinaryColumnStatistics) { BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other; sum += bin.sum; } else { if (isStatsExists() && sum != 0) { throw new IllegalArgumentException("Incompatible merging of binary column statistics"); } } super.merge(other); }
/** * Return list of column statistics * * @return column stats */ public ColumnStatistics[] getColumnStatistics() { ColumnStatistics[] result = new ColumnStatistics[cs.size()]; for (int i = 0; i < result.length; ++i) { result[i] = ColumnStatisticsImpl.deserialize(cs.get(i)); } return result; } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { boolean val = ((BooleanObjectInspector) inspector).get(obj); indexStatistics.updateBoolean(val); writer.write(val ? 1 : 0); } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { BytesWritable val = ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj); stream.write(val.getBytes(), 0, val.getLength()); length.write(val.getLength()); indexStatistics.updateBinary(val); if (createBloomFilter) { bloomFilter.addBytes(val.getBytes(), val.getLength()); } } }
/** * Create a row index entry with the previous location and the current * index statistics. Also merges the index statistics into the file * statistics before they are cleared. Finally, it records the start of the * next index and ensures all of the children columns also create an entry. * * @throws IOException if an error occurs during create */ void createRowIndexEntry() throws IOException { stripeColStatistics.merge(indexStatistics); rowIndexEntry.setStatistics(indexStatistics.serialize()); indexStatistics.reset(); rowIndex.addEntry(rowIndexEntry); rowIndexEntry.clear(); addBloomFilterEntry(); recordPosition(rowIndexPosition); for (TreeWriter child : childrenWriters) { child.createRowIndexEntry(); } }
@Override OrcProto.ColumnStatistics.Builder serialize() { OrcProto.ColumnStatistics.Builder result = super.serialize(); OrcProto.BinaryStatistics.Builder bin = OrcProto.BinaryStatistics.newBuilder(); bin.setSum(sum); result.setBinaryStatistics(bin); return result; }
@Override void reset() { super.reset(); minimum = null; maximum = null; sum = HiveDecimal.ZERO; }
@Override void merge(ColumnStatisticsImpl other) { if (other instanceof BooleanStatisticsImpl) { BooleanStatisticsImpl bkt = (BooleanStatisticsImpl) other; trueCount += bkt.trueCount; } else { if (isStatsExists() && trueCount != 0) { throw new IllegalArgumentException("Incompatible merging of boolean column statistics"); } } super.merge(other); }
@Override public ColumnStatistics[] getStatistics() { ColumnStatistics[] result = new ColumnStatistics[footer.getTypesCount()]; for(int i=0; i < result.length; ++i) { result[i] = ColumnStatisticsImpl.deserialize(footer.getStatistics(i)); } return result; }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { boolean val = ((BooleanObjectInspector) inspector).get(obj); indexStatistics.updateBoolean(val); writer.write(val ? 1 : 0); } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { BytesWritable val = ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj); stream.write(val.getBytes(), 0, val.getLength()); length.write(val.getLength()); indexStatistics.updateBinary(val); if (createBloomFilter) { bloomFilter.addBytes(val.getBytes(), val.getLength()); } } }