parquet.column.statistics.Statistics.getStatsBasedOnType java code examples

public static parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type)
{
  parquet.column.statistics.Statistics<?> stats = parquet.column.statistics.Statistics.getStatsBasedOnType(type);
  if (statistics != null) {
    if (statistics.isSetMax() && statistics.isSetMin()) {
      stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array());
    }
    stats.setNumNulls(statistics.null_count);
  }
  return stats;
}

@Test
public void testMatchesWithStatistics()
    throws ParquetCorruptionException
{
  String value = "Test";
  ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0);
  RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
  TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value));
  TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
  Statistics stats = getStatsBasedOnType(column.getType());
  stats.setNumNulls(1L);
  stats.setMinMaxFromBytes(value.getBytes(), value.getBytes());
  assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true));
}

private void resetStatistics() {
 this.statistics = Statistics.getStatsBasedOnType(this.path.getType());
}

private void resetStatistics() {
 this.statistics = Statistics.getStatsBasedOnType(this.path.getType());
}

private ColumnChunkPageWriter(ColumnDescriptor path, BytesCompressor compressor, int initialSize) {
 this.path = path;
 this.compressor = compressor;
 this.buf = new CapacityByteArrayOutputStream(initialSize);
 this.totalStatistics = Statistics.getStatsBasedOnType(this.path.getType());
}

private ColumnChunkPageWriter(ColumnDescriptor path, BytesCompressor compressor, int pageSize) {
 this.path = path;
 this.compressor = compressor;
 this.buf = new ConcatenatingByteArrayCollector();
 this.totalStatistics = getStatsBasedOnType(this.path.getType());
}

public static parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type)
{
  parquet.column.statistics.Statistics<?> stats = parquet.column.statistics.Statistics.getStatsBasedOnType(type);
  if (statistics != null) {
    if (statistics.isSetMax() && statistics.isSetMin()) {
      stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array());
    }
    stats.setNumNulls(statistics.null_count);
  }
  return stats;
}

public static parquet.column.statistics.Statistics fromParquetStatistics(Statistics statistics, PrimitiveTypeName type) {
 // create stats object based on the column type
 parquet.column.statistics.Statistics stats = parquet.column.statistics.Statistics.getStatsBasedOnType(type);
 // If there was no statistics written to the footer, create an empty Statistics object and return
 if (statistics != null) {
  if (statistics.isSetMax() && statistics.isSetMin()) {
   stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array());
  }
  stats.setNumNulls(statistics.null_count);
 }
 return stats;
}

/**
 * start a column inside a block
 * @param descriptor the column descriptor
 * @param valueCount the value count in this column
 * @param statistics the statistics in this column
 * @param compressionCodecName
 * @throws IOException
 */
public void startColumn(ColumnDescriptor descriptor,
            long valueCount,
            CompressionCodecName compressionCodecName) throws IOException {
 state = state.startColumn();
 if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
 currentEncodings = new HashSet<parquet.column.Encoding>();
 currentChunkPath = ColumnPath.get(descriptor.getPath());
 currentChunkType = descriptor.getType();
 currentChunkCodec = compressionCodecName;
 currentChunkValueCount = valueCount;
 currentChunkFirstDataPage = out.getPos();
 compressedLength = 0;
 uncompressedLength = 0;
 // need to know what type of stats to initialize to
 // better way to do this?
 currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}

/**
 * start a column inside a block
 * @param descriptor the column descriptor
 * @param valueCount the value count in this column
 * @param statistics the statistics in this column
 * @param compressionCodecName
 * @throws IOException
 */
public void startColumn(ColumnDescriptor descriptor,
            long valueCount,
            CompressionCodecName compressionCodecName) throws IOException {
 state = state.startColumn();
 if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
 currentEncodings = new HashSet<parquet.column.Encoding>();
 currentChunkPath = ColumnPath.get(descriptor.getPath());
 currentChunkType = descriptor.getType();
 currentChunkCodec = compressionCodecName;
 currentChunkValueCount = valueCount;
 currentChunkFirstDataPage = out.getPos();
 compressedLength = 0;
 uncompressedLength = 0;
 // need to know what type of stats to initialize to
 // better way to do this?
 currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}

@Test
public void testMatchesWithStatistics()
    throws ParquetCorruptionException
{
  String value = "Test";
  ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0);
  RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
  TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value));
  TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
  Statistics stats = getStatsBasedOnType(column.getType());
  stats.setNumNulls(1L);
  stats.setMinMaxFromBytes(value.getBytes(), value.getBytes());
  assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true));
}

Javadoc

Returns the typed statistics object based on the passed type parameter

Popular methods of Statistics

setMinMaxFromBytes
Abstract method to set min and max values from byte arrays.
setNumNulls
Sets the number of nulls to the parameter value
genericGetMax
genericGetMin
getNumNulls
Returns the null count
isEmpty
Returns a boolean specifying if the Statistics object is empty, i.e does not contain valid statistic
mergeStatistics
Method to merge this statistics object with the object passed as parameter. Merging keeps the smalle
getMaxBytes
Abstract method to return the max value as a byte array
getMinBytes
Abstract method to return the min value as a byte array
hasNonNullValue
Returns whether there have been non-null values added to this statistics
incrementNumNulls
Increments the null count by the parameter value
markAsNotEmpty
Sets the page/column as having a valid non-null value kind of misnomer here

Popular in Java

Creating JSON documents from java classes using gson
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
addToBackStack (FragmentTransaction)
getSupportFragmentManager (FragmentActivity)
PrintStream (java.io)
Fake signature of an existing Java class.
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top 12 Jupyter Notebook extensions

How to use getStatsBasedOnTypemethodin parquet.column.statistics.Statistics

Best Java code snippets using parquet.column.statistics.Statistics.getStatsBasedOnType (Showing top 11 results out of 315)

How to use
getStatsBasedOnType
method
in
parquet.column.statistics.Statistics