/** * Checks whether there is enough space to insert an additional record in to the sort pointer * array and grows the array if additional space is required. If the required space cannot be * obtained, then the in-memory data will be spilled to disk. */ private void growPointerArrayIfNecessary() throws IOException { assert(inMemSorter != null); if (!inMemSorter.hasSpaceForAnotherRecord()) { long used = inMemSorter.getMemoryUsage(); LongArray array; try { // could trigger spilling array = allocateArray(used / 8 * 2); } catch (OutOfMemoryError e) { // should have trigger spilling if (!inMemSorter.hasSpaceForAnotherRecord()) { logger.error("Unable to grow the pointer array"); throw e; } return; } // check if spilling is triggered or not if (inMemSorter.hasSpaceForAnotherRecord()) { freeArray(array); } else { inMemSorter.expandPointerArray(array); } } }
/** * Frees this sorter's in-memory data structures and cleans up its spill files. */ public void cleanupResources() { synchronized (this) { deleteSpillFiles(); freeMemory(); if (inMemSorter != null) { inMemSorter.free(); inMemSorter = null; } } }
if (inMemSorter == null || inMemSorter.numRecords() <= 0) { return 0L; if (inMemSorter.numRecords() > 0) { final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics, inMemSorter.numRecords()); spillWriters.add(spillWriter); spillIterator(inMemSorter.getSortedIterator(), spillWriter); inMemSorter.reset();
UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, recordComparator, prefixComparator, dataToSort.length, shouldUseRadixSort()); if (!sorter.hasSpaceForAnotherRecord()) { sorter.expandPointerArray( consumer.allocateArray(sorter.getMemoryUsage() / 8 * 2)); final String str = getStringFromDataPage(baseObject, position + 4, recordLength); final int partitionId = hashPartitioner.getPartition(str); sorter.insertRecord(address, partitionId, false); position += 4 + recordLength; final UnsafeSorterIterator iter = sorter.getSortedIterator(); int iterLength = 0; long prevPrefix = -1;
final UnsafeInMemorySorter inMemSorter = new UnsafeInMemorySorter( null, taskMemoryManager, prefixComputer.computePrefix(row); inMemSorter.insertRecord(address, prefix.value, prefix.isNull);
UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, recordComparator, prefixComparator, 100, shouldUseRadixSort()); sorter.reset(); fail("expected OutOfMmoryError but it seems operation surprisingly succeeded"); } catch (OutOfMemoryError oom) { sorter.free(); sorter.free();
@Test public void testSortingEmptyInput() { final TaskMemoryManager memoryManager = new TaskMemoryManager( new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0); final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager); final UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, mock(RecordComparator.class), mock(PrefixComparator.class), 100, shouldUseRadixSort()); final UnsafeSorterIterator iter = sorter.getSortedIterator(); Assert.assertFalse(iter.hasNext()); }
released += inMemSorter.getMemoryUsage(); totalSortTimeNanos += inMemSorter.getSortTimeNanos(); inMemSorter.free(); inMemSorter = null; taskContext.taskMetrics().incMemoryBytesSpilled(released);
/** * Write a record to the sorter. */ public void insertRecord( Object recordBase, long recordOffset, int length, long prefix, boolean prefixIsNull) throws IOException { assert(inMemSorter != null); if (inMemSorter.numRecords() >= numElementsForSpillThreshold) { logger.info("Spilling data because number of spilledRecords crossed the threshold " + numElementsForSpillThreshold); spill(); } growPointerArrayIfNecessary(); int uaoSize = UnsafeAlignedOffset.getUaoSize(); // Need 4 bytes to store the record length. final int required = length + uaoSize; acquireNewPageIfNecessary(required); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); UnsafeAlignedOffset.putSize(base, pageCursor, length); pageCursor += uaoSize; Platform.copyMemory(recordBase, recordOffset, base, pageCursor, length); pageCursor += length; inMemSorter.insertRecord(recordAddress, prefix, prefixIsNull); }
/** * Write a key-value record to the sorter. The key and value will be put together in-memory, * using the following format: * * record length (4 bytes), key length (4 bytes), key data, value data * * record length = key length + value length + 4 */ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen, Object valueBase, long valueOffset, int valueLen, long prefix, boolean prefixIsNull) throws IOException { growPointerArrayIfNecessary(); int uaoSize = UnsafeAlignedOffset.getUaoSize(); final int required = keyLen + valueLen + (2 * uaoSize); acquireNewPageIfNecessary(required); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); UnsafeAlignedOffset.putSize(base, pageCursor, keyLen + valueLen + uaoSize); pageCursor += uaoSize; UnsafeAlignedOffset.putSize(base, pageCursor, keyLen); pageCursor += uaoSize; Platform.copyMemory(keyBase, keyOffset, base, pageCursor, keyLen); pageCursor += keyLen; Platform.copyMemory(valueBase, valueOffset, base, pageCursor, valueLen); pageCursor += valueLen; assert(inMemSorter != null); inMemSorter.insertRecord(recordAddress, prefix, prefixIsNull); }
comparator = recordComparatorSupplier.get(); this.inMemSorter = new UnsafeInMemorySorter( this, taskMemoryManager,
/** * Returns a iterator, which will return the rows in the order as inserted. * * It is the caller's responsibility to call `cleanupResources()` * after consuming this iterator. * * TODO: support forced spilling */ public UnsafeSorterIterator getIterator() throws IOException { if (spillWriters.isEmpty()) { assert(inMemSorter != null); return inMemSorter.getSortedIterator(); } else { LinkedList<UnsafeSorterIterator> queue = new LinkedList<>(); for (UnsafeSorterSpillWriter spillWriter : spillWriters) { queue.add(spillWriter.getReader(serializerManager)); } if (inMemSorter != null) { queue.add(inMemSorter.getSortedIterator()); } return new ChainedIterator(queue); } }
/** * Inserts a record to be sorted. Assumes that the record pointer points to a record length * stored as a 4-byte integer, followed by the record's bytes. * * @param recordPointer pointer to a record in a data page, encoded by {@link TaskMemoryManager}. * @param keyPrefix a user-defined key prefix */ public void insertRecord(long recordPointer, long keyPrefix) { if (!hasSpaceForAnotherRecord()) { expandPointerArray(consumer.allocateArray(array.size() * 2)); } array.set(pos, recordPointer); pos++; array.set(pos, keyPrefix); pos++; }
@VisibleForTesting boolean hasSpaceForAnotherRecord() { return inMemSorter.hasSpaceForAnotherRecord(); }
released += inMemSorter.getMemoryUsage(); inMemSorter.free(); inMemSorter = null; return released;
/** * Return the total memory usage of this sorter, including the data pages and the sorter's pointer * array. */ private long getMemoryUsage() { long totalPageSize = 0; for (MemoryBlock page : allocatedPages) { totalPageSize += page.size(); } return ((inMemSorter == null) ? 0 : inMemSorter.getMemoryUsage()) + totalPageSize; }
UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, recordComparator, prefixComparator, dataToSort.length, shouldUseRadixSort()); if (!sorter.hasSpaceForAnotherRecord()) { sorter.expandPointerArray( consumer.allocateArray(sorter.getMemoryUsage() / 8 * 2)); final String str = getStringFromDataPage(baseObject, position + 4, recordLength); final int partitionId = hashPartitioner.getPartition(str); sorter.insertRecord(address, partitionId, false); position += 4 + recordLength; final UnsafeSorterIterator iter = sorter.getSortedIterator(); int iterLength = 0; long prevPrefix = -1;
final UnsafeInMemorySorter inMemSorter = new UnsafeInMemorySorter( null, taskMemoryManager, prefixComputer.computePrefix(row); inMemSorter.insertRecord(address, prefix.value, prefix.isNull);
UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, recordComparator, prefixComparator, 100, shouldUseRadixSort()); sorter.reset(); fail("expected OutOfMmoryError but it seems operation surprisingly succeeded"); } catch (OutOfMemoryError oom) { sorter.free(); sorter.free();
@Test public void testSortingEmptyInput() { final TaskMemoryManager memoryManager = new TaskMemoryManager( new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0); final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager); final UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager, mock(RecordComparator.class), mock(PrefixComparator.class), 100, shouldUseRadixSort()); final UnsafeSorterIterator iter = sorter.getSortedIterator(); Assert.assertFalse(iter.hasNext()); }