private void open(int numBucketSegments) { synchronized (stateLock) { if (!closed) { throw new IllegalStateException("currently not closed."); } closed = false; } allocateBucketSegments(numBucketSegments); stagingSegments.add(forcedAllocateSegment()); reuse = buildSideSerializer.createInstance(); }
private MemorySegment forcedAllocateSegment() { MemorySegment segment = allocateSegment(); if (segment == null) { throw new RuntimeException("Bug in InPlaceMutableHashTable: A free segment should have been available."); } return segment; }
/** * Initialize the hash table */ @Override public void open() { open(calcInitialNumBucketSegments()); }
/** * If there is wasted space (due to updated records not fitting in their old places), then do a compaction. * Else, throw EOFException to indicate that memory ran out. * @throws IOException */ private void compactOrThrow() throws IOException { if (holes > (double)recordArea.getTotalSize() * 0.05) { rebuild(); } else { throw new EOFException("InPlaceMutableHashTable memory ran out. " + getMemoryConsumptionString()); } }
/** Same as above, but the number of bucket segments of the new table can be specified. */ private void rebuild(long newNumBucketSegments) throws IOException { // Get new bucket segments releaseBucketSegments(); allocateBucketSegments((int)newNumBucketSegments); T record = buildSideSerializer.createInstance(); try { EntryIterator iter = getEntryIterator(); recordArea.resetAppendPosition(); recordArea.setWritePosition(0); while ((record = iter.next(record)) != null && !closed) { final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); long ptrToAppended = recordArea.noSeekAppendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, ptrToAppended); } recordArea.freeSegmentsAfterAppendPosition(); holes = 0; } catch (EOFException ex) { throw new RuntimeException("Bug in InPlaceMutableHashTable: we shouldn't get out of memory during a rebuild, " + "because we aren't allocating any new memory."); } }
/** * Inserts the given record into the hash table. * Note: this method doesn't care about whether a record with the same key is already present. * @param record The record to insert. * @throws IOException (EOFException specifically, if memory ran out) */ @Override public void insert(T record) throws IOException { if (closed) { return; } final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); try { final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, newFirstPointer); } catch (EOFException ex) { compactOrThrow(); insert(record); return; } numElements++; resizeTableIfNecessary(); }
@Override public void abort() { LOG.debug("Aborting InPlaceMutableHashTable."); close(); }
@Override public void openTask() throws Exception { // open the stub first final Configuration stubConfig = config.getStubParameters(); BatchTask.openUserCode(reducer, stubConfig); // instantiate the serializer / comparator serializer = config.<T>getInputSerializer(0, userCodeClassLoader).getSerializer(); comparator = config.<T>getDriverComparator(0, userCodeClassLoader).createComparator(); MemoryManager memManager = parent.getEnvironment().getMemoryManager(); final int numMemoryPages = memManager.computeNumberOfPages(config.getRelativeMemoryDriver()); memory = memManager.allocatePages(parent, numMemoryPages); LOG.debug("ChainedReduceCombineDriver object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + "."); switch (strategy) { case SORTED_PARTIAL_REDUCE: // instantiate a fix-length in-place sorter, if possible, otherwise the out-of-place sorter if (comparator.supportsSerializationWithKeyNormalization() && serializer.getLength() > 0 && serializer.getLength() <= THRESHOLD_FOR_IN_PLACE_SORTING) { sorter = new FixedLengthRecordSorter<T>(serializer, comparator.duplicate(), memory); } else { sorter = new NormalizedKeySorter<T>(serializer, comparator.duplicate(), memory); } break; case HASHED_PARTIAL_REDUCE: table = new InPlaceMutableHashTable<T>(serializer, comparator, memory); table.open(); reduceFacade = table.new ReduceFacade(reducer, outputCollector, objectReuseEnabled); break; } }
private void allocateBucketSegments(int numBucketSegments) { if (numBucketSegments < 1) { throw new RuntimeException("Bug in InPlaceMutableHashTable"); } bucketSegments = new MemorySegment[numBucketSegments]; for(int i = 0; i < bucketSegments.length; i++) { bucketSegments[i] = forcedAllocateSegment(); // Init all pointers in all buckets to END_OF_LIST for(int j = 0; j < numBucketsPerSegment; j++) { bucketSegments[i].putLong(j << bucketSizeBits, END_OF_LIST); } } numBuckets = numBucketSegments * numBucketsPerSegment; numBucketsMask = (1 << MathUtils.log2strict(numBuckets)) - 1; }
table = new InPlaceMutableHashTable<T>(serializer, comparator, memory); reduceFacade = table.new ReduceFacade(reducer, output, objectReuseEnabled); break;
} catch (EOFException ex) { compactOrThrow(); insert(record); return;
/** Same as above, but the number of bucket segments of the new table can be specified. */ private void rebuild(long newNumBucketSegments) throws IOException { // Get new bucket segments releaseBucketSegments(); allocateBucketSegments((int)newNumBucketSegments); T record = buildSideSerializer.createInstance(); try { EntryIterator iter = getEntryIterator(); recordArea.resetAppendPosition(); recordArea.setWritePosition(0); while ((record = iter.next(record)) != null && !closed) { final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); long ptrToAppended = recordArea.noSeekAppendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, ptrToAppended); } recordArea.freeSegmentsAfterAppendPosition(); holes = 0; } catch (EOFException ex) { throw new RuntimeException("Bug in InPlaceMutableHashTable: we shouldn't get out of memory during a rebuild, " + "because we aren't allocating any new memory."); } }
/** * Inserts the given record into the hash table. * Note: this method doesn't care about whether a record with the same key is already present. * @param record The record to insert. * @throws IOException (EOFException specifically, if memory ran out) */ @Override public void insert(T record) throws IOException { if (closed) { return; } final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record)); final int bucket = hashCode & numBucketsMask; final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex]; final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment final long firstPointer = bucketSegment.getLong(bucketOffset); try { final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record); bucketSegment.putLong(bucketOffset, newFirstPointer); } catch (EOFException ex) { compactOrThrow(); insert(record); return; } numElements++; resizeTableIfNecessary(); }
@Override public void abort() { LOG.debug("Aborting InPlaceMutableHashTable."); close(); }
/** * If there is wasted space (due to updated records not fitting in their old places), then do a compaction. * Else, throw EOFException to indicate that memory ran out. * @throws IOException */ private void compactOrThrow() throws IOException { if (holes > (double)recordArea.getTotalSize() * 0.05) { rebuild(); } else { throw new EOFException("InPlaceMutableHashTable memory ran out. " + getMemoryConsumptionString()); } }
@Override public void openTask() throws Exception { // open the stub first final Configuration stubConfig = config.getStubParameters(); BatchTask.openUserCode(reducer, stubConfig); // instantiate the serializer / comparator serializer = config.<T>getInputSerializer(0, userCodeClassLoader).getSerializer(); comparator = config.<T>getDriverComparator(0, userCodeClassLoader).createComparator(); MemoryManager memManager = parent.getEnvironment().getMemoryManager(); final int numMemoryPages = memManager.computeNumberOfPages(config.getRelativeMemoryDriver()); memory = memManager.allocatePages(parent, numMemoryPages); LOG.debug("ChainedReduceCombineDriver object reuse: " + (objectReuseEnabled ? "ENABLED" : "DISABLED") + "."); switch (strategy) { case SORTED_PARTIAL_REDUCE: // instantiate a fix-length in-place sorter, if possible, otherwise the out-of-place sorter if (comparator.supportsSerializationWithKeyNormalization() && serializer.getLength() > 0 && serializer.getLength() <= THRESHOLD_FOR_IN_PLACE_SORTING) { sorter = new FixedLengthRecordSorter<T>(serializer, comparator.duplicate(), memory); } else { sorter = new NormalizedKeySorter<T>(serializer, comparator.duplicate(), memory); } break; case HASHED_PARTIAL_REDUCE: table = new InPlaceMutableHashTable<T>(serializer, comparator, memory); table.open(); reduceFacade = table.new ReduceFacade(reducer, outputCollector, objectReuseEnabled); break; } }
private void allocateBucketSegments(int numBucketSegments) { if (numBucketSegments < 1) { throw new RuntimeException("Bug in InPlaceMutableHashTable"); } bucketSegments = new MemorySegment[numBucketSegments]; for(int i = 0; i < bucketSegments.length; i++) { bucketSegments[i] = forcedAllocateSegment(); // Init all pointers in all buckets to END_OF_LIST for(int j = 0; j < numBucketsPerSegment; j++) { bucketSegments[i].putLong(j << bucketSizeBits, END_OF_LIST); } } numBuckets = numBucketSegments * numBucketsPerSegment; numBucketsMask = (1 << MathUtils.log2strict(numBuckets)) - 1; }
table = new InPlaceMutableHashTable<T>(serializer, comparator, memory); reduceFacade = table.new ReduceFacade(reducer, output, objectReuseEnabled); break;