@Override public Map<Integer, ColumnStatistics> finishRowGroup() { checkState(!closed); checkState(inRowGroup); inRowGroup = false; if (directEncoded) { return directColumnWriter.finishRowGroup(); } ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics(); rowGroups.add(new DictionaryRowGroup(values, rowGroupValueCount, statistics)); rowGroupValueCount = 0; statisticsBuilder = newStringStatisticsBuilder(); values = new IntBigArray(); return ImmutableMap.of(column, statistics); }
@Override public Map<Integer, ColumnStatistics> finishRowGroup() { checkState(!closed); checkState(inRowGroup); inRowGroup = false; if (directEncoded) { return directColumnWriter.finishRowGroup(); } ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics(); rowGroups.add(new DictionaryRowGroup(values, rowGroupValueCount, statistics)); rowGroupValueCount = 0; statisticsBuilder = newStringStatisticsBuilder(); values = new IntBigArray(); return ImmutableMap.of(column, statistics); }
protected AbstractGroupCollectionAggregationState(PageBuilder pageBuilder) { this.headBlockIndex = new ShortBigArray(NULL); this.headPosition = new IntBigArray(NULL); this.nextBlockIndex = new ShortBigArray(NULL); this.nextPosition = new IntBigArray(NULL); this.tailBlockIndex = new ShortBigArray(NULL); this.tailPosition = new IntBigArray(NULL); this.currentPageBuilder = pageBuilder; this.values = new ArrayList<>(); this.sumPositions = new LongArrayList(); this.groupEntryCount = new IntBigArray(); values.add(currentPageBuilder); sumPositions.add(0L); valueBlocksRetainedSizeInBytes = 0; totalPositions = 0; capacity = 1024; nextBlockIndex.ensureCapacity(capacity); nextPosition.ensureCapacity(capacity); groupEntryCount.ensureCapacity(capacity); }
protected AbstractGroupCollectionAggregationState(PageBuilder pageBuilder) { this.headBlockIndex = new ShortBigArray(NULL); this.headPosition = new IntBigArray(NULL); this.nextBlockIndex = new ShortBigArray(NULL); this.nextPosition = new IntBigArray(NULL); this.tailBlockIndex = new ShortBigArray(NULL); this.tailPosition = new IntBigArray(NULL); this.currentPageBuilder = pageBuilder; this.values = new ArrayList<>(); this.sumPositions = new LongArrayList(); this.groupEntryCount = new IntBigArray(); values.add(currentPageBuilder); sumPositions.add(0L); valueBlocksRetainedSizeInBytes = 0; totalPositions = 0; capacity = 1024; nextBlockIndex.ensureCapacity(capacity); nextPosition.ensureCapacity(capacity); groupEntryCount.ensureCapacity(capacity); }
private SingleTypedHistogram(Type type, int expectedSize, int hashCapacity, BlockBuilder values) { this.type = type; this.expectedSize = expectedSize; this.hashCapacity = hashCapacity; this.values = values; checkArgument(expectedSize > 0, "expectedSize must be greater than zero"); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; hashPositions = new IntBigArray(-1); hashPositions.ensureCapacity(hashCapacity); counts = new LongBigArray(); counts.ensureCapacity(hashCapacity); }
private SingleTypedHistogram(Type type, int expectedSize, int hashCapacity, BlockBuilder values) { this.type = type; this.expectedSize = expectedSize; this.hashCapacity = hashCapacity; this.values = values; checkArgument(expectedSize > 0, "expectedSize must be greater than zero"); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; hashPositions = new IntBigArray(-1); hashPositions.ensureCapacity(hashCapacity); counts = new LongBigArray(); counts.ensureCapacity(hashCapacity); }
public SliceDictionaryColumnWriter(int column, Type type, CompressionKind compression, int bufferSize, OrcEncoding orcEncoding, DataSize stringStatisticsLimit) { checkArgument(column >= 0, "column is negative"); this.column = column; this.type = requireNonNull(type, "type is null"); this.compression = requireNonNull(compression, "compression is null"); this.bufferSize = bufferSize; this.orcEncoding = requireNonNull(orcEncoding, "orcEncoding is null"); this.stringStatisticsLimitInBytes = toIntExact(requireNonNull(stringStatisticsLimit, "stringStatisticsLimit is null").toBytes()); LongOutputStream result; if (orcEncoding == DWRF) { result = new LongOutputStreamV1(compression, bufferSize, false, DATA); } else { result = new LongOutputStreamV2(compression, bufferSize, false, DATA); } this.dataStream = result; this.presentStream = new PresentOutputStream(compression, bufferSize); this.dictionaryDataStream = new ByteArrayOutputStream(compression, bufferSize, StreamKind.DICTIONARY_DATA); this.dictionaryLengthStream = createLengthOutputStream(compression, bufferSize, orcEncoding); values = new IntBigArray(); this.statisticsBuilder = newStringStatisticsBuilder(); }
public SliceDictionaryColumnWriter(int column, Type type, CompressionKind compression, int bufferSize, OrcEncoding orcEncoding, DataSize stringStatisticsLimit) { checkArgument(column >= 0, "column is negative"); this.column = column; this.type = requireNonNull(type, "type is null"); this.compression = requireNonNull(compression, "compression is null"); this.bufferSize = bufferSize; this.orcEncoding = requireNonNull(orcEncoding, "orcEncoding is null"); this.stringStatisticsLimitInBytes = toIntExact(requireNonNull(stringStatisticsLimit, "stringStatisticsLimit is null").toBytes()); LongOutputStream result; if (orcEncoding == DWRF) { result = new LongOutputStreamV1(compression, bufferSize, false, DATA); } else { result = new LongOutputStreamV2(compression, bufferSize, false, DATA); } this.dataStream = result; this.presentStream = new PresentOutputStream(compression, bufferSize); this.dictionaryDataStream = new ByteArrayOutputStream(compression, bufferSize, StreamKind.DICTIONARY_DATA); this.dictionaryLengthStream = createLengthOutputStream(compression, bufferSize, orcEncoding); values = new IntBigArray(); this.statisticsBuilder = newStringStatisticsBuilder(); }
@VisibleForTesting public ValueStore(int expectedSize, BlockBuilder values) { bucketCount = computeBucketCount(expectedSize, MAX_FILL_RATIO); mask = bucketCount - 1; maxFill = calculateMaxFill(bucketCount, MAX_FILL_RATIO); this.values = values; buckets = new IntBigArray(-1); buckets.ensureCapacity(bucketCount); valueHashes = new LongBigArray(-1); valueHashes.ensureCapacity(bucketCount); }
@VisibleForTesting public ValueStore(int expectedSize, BlockBuilder values) { bucketCount = computeBucketCount(expectedSize, MAX_FILL_RATIO); mask = bucketCount - 1; maxFill = calculateMaxFill(bucketCount, MAX_FILL_RATIO); this.values = values; buckets = new IntBigArray(-1); buckets.ensureCapacity(bucketCount); valueHashes = new LongBigArray(-1); valueHashes.ensureCapacity(bucketCount); }
public BigintGroupByHash(int hashChannel, boolean outputRawHash, int expectedSize, UpdateMemory updateMemory) { checkArgument(hashChannel >= 0, "hashChannel must be at least zero"); checkArgument(expectedSize > 0, "expectedSize must be greater than zero"); this.hashChannel = hashChannel; this.outputRawHash = outputRawHash; hashCapacity = arraySize(expectedSize, FILL_RATIO); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; values = new LongBigArray(); values.ensureCapacity(hashCapacity); groupIds = new IntBigArray(-1); groupIds.ensureCapacity(hashCapacity); valuesByGroupId = new LongBigArray(); valuesByGroupId.ensureCapacity(hashCapacity); // This interface is used for actively reserving memory (push model) for rehash. // The caller can also query memory usage on this object (pull model) this.updateMemory = requireNonNull(updateMemory, "updateMemory is null"); }
public BigintGroupByHash(int hashChannel, boolean outputRawHash, int expectedSize, UpdateMemory updateMemory) { checkArgument(hashChannel >= 0, "hashChannel must be at least zero"); checkArgument(expectedSize > 0, "expectedSize must be greater than zero"); this.hashChannel = hashChannel; this.outputRawHash = outputRawHash; hashCapacity = arraySize(expectedSize, FILL_RATIO); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; values = new LongBigArray(); values.ensureCapacity(hashCapacity); groupIds = new IntBigArray(-1); groupIds.ensureCapacity(hashCapacity); valuesByGroupId = new LongBigArray(); valuesByGroupId.ensureCapacity(hashCapacity); // This interface is used for actively reserving memory (push model) for rehash. // The caller can also query memory usage on this object (pull model) this.updateMemory = requireNonNull(updateMemory, "updateMemory is null"); }
public GroupedTypedHistogram(Type type, int expectedCount) { checkArgument(expectedCount > 0, "expectedSize must be greater than zero"); this.type = type; this.bucketId = expectedCount; this.bucketCount = computeBucketCount(expectedCount, MAX_FILL_RATIO); this.mask = bucketCount - 1; this.maxFill = calculateMaxFill(bucketCount, MAX_FILL_RATIO); this.values = type.createBlockBuilder(null, computeBucketCount(expectedCount, GroupedTypedHistogram.MAX_FILL_RATIO)); // buckets and node-arrays (bucket "points" to a node, so 1:1 relationship) buckets = new IntBigArray(-1); buckets.ensureCapacity(bucketCount); counts = new LongBigArray(); valuePositions = new IntBigArray(); valueAndGroupHashes = new LongBigArray(); nextPointers = new IntBigArray(NULL); groupIds = new LongBigArray(-1); // here, one bucket is one node in the hash structure (vs a bucket may be a chain of nodes in closed-hashing with linked list hashing) // ie, this is open-address hashing resizeNodeArrays(bucketCount); // end bucket/node based arrays // per-group arrays: size will be set by external call, same as groups since the number will be the same headPointers = new LongBigArray(NULL); // index into counts/valuePositions nextNodePointer = 0; bucketNodeFactory = this.new BucketNodeFactory(); valueStore = new ValueStore(expectedCount, values); }
public GroupedTypedHistogram(Type type, int expectedCount) { checkArgument(expectedCount > 0, "expectedSize must be greater than zero"); this.type = type; this.bucketId = expectedCount; this.bucketCount = computeBucketCount(expectedCount, MAX_FILL_RATIO); this.mask = bucketCount - 1; this.maxFill = calculateMaxFill(bucketCount, MAX_FILL_RATIO); this.values = type.createBlockBuilder(null, computeBucketCount(expectedCount, GroupedTypedHistogram.MAX_FILL_RATIO)); // buckets and node-arrays (bucket "points" to a node, so 1:1 relationship) buckets = new IntBigArray(-1); buckets.ensureCapacity(bucketCount); counts = new LongBigArray(); valuePositions = new IntBigArray(); valueAndGroupHashes = new LongBigArray(); nextPointers = new IntBigArray(NULL); groupIds = new LongBigArray(-1); // here, one bucket is one node in the hash structure (vs a bucket may be a chain of nodes in closed-hashing with linked list hashing) // ie, this is open-address hashing resizeNodeArrays(bucketCount); // end bucket/node based arrays // per-group arrays: size will be set by external call, same as groups since the number will be the same headPointers = new LongBigArray(NULL); // index into counts/valuePositions nextNodePointer = 0; bucketNodeFactory = this.new BucketNodeFactory(); valueStore = new ValueStore(expectedCount, values); }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
int newMask = newBucketCount - 1; IntBigArray newBuckets = new IntBigArray(-1);
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
private void rehash() { long newBucketCountLong = bucketCount * 2L; if (newBucketCountLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")"); } int newBucketCount = computeBucketCount((int) newBucketCountLong, MAX_FILL_RATIO); int newMask = newBucketCount - 1; IntBigArray newBuckets = new IntBigArray(-1); newBuckets.ensureCapacity(newBucketCount); for (int i = 0; i < nextNodePointer; i++) { // find the old one int bucketId = getBucketIdForNode(i, newMask); int probeCount = 1; int originalBucket = bucketId; // find new one while (newBuckets.get(bucketId) != -1) { int probe = nextProbe(probeCount); bucketId = nextBucketId(originalBucket, newMask, probe); probeCount++; } // record the mapping newBuckets.set(bucketId, i); } buckets = newBuckets; bucketCount = newBucketCount; maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO); mask = newMask; resizeNodeArrays(newBucketCount); }