/** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ public boolean isSet(_Fields field) { if (field == null) { throw new IllegalArgumentException(); } switch (field) { case MAX: return isSetMax(); case MIN: return isSetMin(); case NULL_COUNT: return isSetNull_count(); case DISTINCT_COUNT: return isSetDistinct_count(); case MAX_VALUE: return isSetMax_value(); case MIN_VALUE: return isSetMin_value(); } throw new IllegalStateException(); }
public Statistics deepCopy() { return new Statistics(this); }
public Object getFieldValue(_Fields field) { switch (field) { case MAX: return getMax(); case MIN: return getMin(); case NULL_COUNT: return getNull_count(); case DISTINCT_COUNT: return getDistinct_count(); case MAX_VALUE: return getMax_value(); case MIN_VALUE: return getMin_value(); } throw new IllegalStateException(); }
case MAX: if (value == null) { unsetMax(); } else { setMax((ByteBuffer)value); unsetMin(); } else { setMin((ByteBuffer)value); unsetNull_count(); } else { setNull_count((Long)value); unsetDistinct_count(); } else { setDistinct_count((Long)value); unsetMax_value(); } else { setMax_value((ByteBuffer)value); unsetMin_value(); } else { setMin_value((ByteBuffer)value);
if (formatStats.isSetMin_value() && formatStats.isSetMax_value()) { byte[] min = formatStats.min_value.array(); byte[] max = formatStats.max_value.array(); boolean isSet = formatStats.isSetMax() && formatStats.isSetMin(); boolean maxEqualsMin = isSet ? Arrays.equals(formatStats.getMin(), formatStats.getMax()) : false; boolean sortOrdersMatch = SortOrder.SIGNED == typeSortOrder; if (formatStats.isSetNull_count()) { statsBuilder.withNumNulls(formatStats.null_count);
public static Statistics toParquetStatistics( org.apache.parquet.column.statistics.Statistics stats) { Statistics formatStats = new Statistics(); // Don't write stats larger than the max size rather than truncating. The // rationale is that some engines may use the minimum value in the page as // the true minimum for aggregations and there is no way to mark that a // value has been truncated and is a lower bound and not in the page. if (!stats.isEmpty() && stats.isSmallerThan(MAX_STATS_SIZE)) { formatStats.setNull_count(stats.getNumNulls()); if (stats.hasNonNullValue()) { byte[] min = stats.getMinBytes(); byte[] max = stats.getMaxBytes(); // Fill the former min-max statistics only if the comparison logic is // signed so the logic of V1 and V2 stats are the same (which is // trivially true for equal min-max values) if (sortOrder(stats.type()) == SortOrder.SIGNED || Arrays.equals(min, max)) { formatStats.setMin(min); formatStats.setMax(max); } if (isMinMaxStatsSupported(stats.type()) || Arrays.equals(min, max)) { formatStats.setMin_value(min); formatStats.setMax_value(max); } } } return formatStats; }
/** * Performs a deep copy on <i>other</i>. */ public Statistics(Statistics other) { __isset_bitfield = other.__isset_bitfield; if (other.isSetMax()) { this.max = org.apache.thrift.TBaseHelper.copyBinary(other.max); } if (other.isSetMin()) { this.min = org.apache.thrift.TBaseHelper.copyBinary(other.min); } this.null_count = other.null_count; this.distinct_count = other.distinct_count; if (other.isSetMax_value()) { this.max_value = org.apache.thrift.TBaseHelper.copyBinary(other.max_value); } if (other.isSetMin_value()) { this.min_value = org.apache.thrift.TBaseHelper.copyBinary(other.min_value); } }
@Override public void read(org.apache.thrift.protocol.TProtocol prot, DataPageHeader struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; struct.num_values = iprot.readI32(); struct.setNum_valuesIsSet(true); struct.encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setEncodingIsSet(true); struct.definition_level_encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setDefinition_level_encodingIsSet(true); struct.repetition_level_encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setRepetition_level_encodingIsSet(true); BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { struct.statistics = new Statistics(); struct.statistics.read(iprot); struct.setStatisticsIsSet(true); } } }
public static org.apache.parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type) { org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType(type); if (statistics != null) { if (statistics.isSetMax() && statistics.isSetMin()) { stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array()); } stats.setNumNulls(statistics.null_count); } return stats; }
/** * Min and max values for the column, determined by its ColumnOrder. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. */ public byte[] getMax_value() { setMax_value(org.apache.thrift.TBaseHelper.rightSize(max_value)); return max_value == null ? null : max_value.array(); }
/** * DEPRECATED: min and max value of the column. Use min_value and max_value. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. * * These fields encode min and max values determined by signed comparison * only. New files should use the correct order for a column's logical type * and store the values in the min_value and max_value fields. * * To support older readers, these may be set when the column order is * signed. */ public byte[] getMax() { setMax(org.apache.thrift.TBaseHelper.rightSize(max)); return max == null ? null : max.array(); }
case MAX: if (value == null) { unsetMax(); } else { if (value instanceof byte[]) { setMax((byte[])value); } else { setMax((java.nio.ByteBuffer)value); unsetMin(); } else { if (value instanceof byte[]) { setMin((byte[])value); } else { setMin((java.nio.ByteBuffer)value); unsetNull_count(); } else { setNull_count((java.lang.Long)value); unsetDistinct_count(); } else { setDistinct_count((java.lang.Long)value); unsetMax_value(); } else { if (value instanceof byte[]) { setMax_value((byte[])value); } else {
if (formatStats.isSetMin_value() && formatStats.isSetMax_value()) { byte[] min = formatStats.min_value.array(); byte[] max = formatStats.max_value.array(); statsBuilder.withMax(max); if (formatStats.isSetNull_count()) { statsBuilder.withNumNulls(formatStats.null_count); boolean isSet = formatStats.isSetMax() && formatStats.isSetMin(); boolean maxEqualsMin = isSet ? Arrays.equals(formatStats.getMin(), formatStats.getMax()) : false; boolean sortOrdersMatch = SortOrder.SIGNED == typeSortOrder; statsBuilder.withMax(formatStats.max.array()); if (formatStats.isSetNull_count()) { statsBuilder.withNumNulls(formatStats.null_count);
public static Statistics toParquetStatistics( org.apache.parquet.column.statistics.Statistics stats) { Statistics formatStats = new Statistics(); // Don't write stats larger than the max size rather than truncating. The // rationale is that some engines may use the minimum value in the page as // the true minimum for aggregations and there is no way to mark that a // value has been truncated and is a lower bound and not in the page. if (!stats.isEmpty() && stats.isSmallerThan(MAX_STATS_SIZE)) { formatStats.setNull_count(stats.getNumNulls()); if (stats.hasNonNullValue()) { byte[] min = stats.getMinBytes(); byte[] max = stats.getMaxBytes(); // Fill the former min-max statistics only if the comparison logic is // signed so the logic of V1 and V2 stats are the same (which is // trivially true for equal min-max values) if (sortOrder(stats.type()) == SortOrder.SIGNED || Arrays.equals(min, max)) { formatStats.setMin(min); formatStats.setMax(max); } if (isMinMaxStatsSupported(stats.type()) || Arrays.equals(min, max)) { formatStats.setMin_value(min); formatStats.setMax_value(max); } } } return formatStats; }
/** * Performs a deep copy on <i>other</i>. */ public Statistics(Statistics other) { __isset_bitfield = other.__isset_bitfield; if (other.isSetMax()) { this.max = org.apache.thrift.TBaseHelper.copyBinary(other.max); } if (other.isSetMin()) { this.min = org.apache.thrift.TBaseHelper.copyBinary(other.min); } this.null_count = other.null_count; this.distinct_count = other.distinct_count; if (other.isSetMax_value()) { this.max_value = org.apache.thrift.TBaseHelper.copyBinary(other.max_value); } if (other.isSetMin_value()) { this.min_value = org.apache.thrift.TBaseHelper.copyBinary(other.min_value); } }
@Override public void read(org.apache.thrift.protocol.TProtocol prot, DataPageHeader struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TTupleProtocol iprot = (org.apache.thrift.protocol.TTupleProtocol) prot; struct.num_values = iprot.readI32(); struct.setNum_valuesIsSet(true); struct.encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setEncodingIsSet(true); struct.definition_level_encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setDefinition_level_encodingIsSet(true); struct.repetition_level_encoding = org.apache.parquet.format.Encoding.findByValue(iprot.readI32()); struct.setRepetition_level_encodingIsSet(true); java.util.BitSet incoming = iprot.readBitSet(1); if (incoming.get(0)) { struct.statistics = new Statistics(); struct.statistics.read(iprot); struct.setStatisticsIsSet(true); } } }
public static org.apache.parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type) { org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType(type); if (statistics != null) { if (statistics.isSetMax() && statistics.isSetMin()) { stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array()); } stats.setNumNulls(statistics.null_count); } return stats; }
/** * Min and max values for the column, determined by its ColumnOrder. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. */ public byte[] getMax_value() { setMax_value(org.apache.thrift.TBaseHelper.rightSize(max_value)); return max_value == null ? null : max_value.array(); }
/** * DEPRECATED: min and max value of the column. Use min_value and max_value. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. * * These fields encode min and max values determined by signed comparison * only. New files should use the correct order for a column's logical type * and store the values in the min_value and max_value fields. * * To support older readers, these may be set when the column order is * signed. */ public byte[] getMax() { setMax(org.apache.thrift.TBaseHelper.rightSize(max)); return max == null ? null : max.array(); }
/** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ public boolean isSet(_Fields field) { if (field == null) { throw new java.lang.IllegalArgumentException(); } switch (field) { case MAX: return isSetMax(); case MIN: return isSetMin(); case NULL_COUNT: return isSetNull_count(); case DISTINCT_COUNT: return isSetDistinct_count(); case MAX_VALUE: return isSetMax_value(); case MIN_VALUE: return isSetMin_value(); } throw new java.lang.IllegalStateException(); }