/** * Close the sorter, causing any buffered data to be sorted and written out to disk. * * @return metadata for the spill files written by this sorter. If no records were ever inserted * into this sorter, then this will return an empty array. * @throws IOException */ public SpillInfo[] closeAndGetSpills() throws IOException { try { if (inMemSorter != null) { // Do not count the final file towards the spill count. writeSortedFile(true); freeMemory(); inMemSorter.free(); inMemSorter = null; } return spills.toArray(new SpillInfo[spills.size()]); } catch (IOException e) { cleanupResources(); throw e; } }
private long freeMemory() { updatePeakMemoryUsed(); long memoryFreed = 0; for (MemoryBlock block : allocatedPages) { memoryFreed += block.size(); freePage(block); } allocatedPages.clear(); currentPage = null; pageCursor = 0; return memoryFreed; }
private void updatePeakMemoryUsed() { long mem = getMemoryUsage(); if (mem > peakMemoryUsedBytes) { peakMemoryUsedBytes = mem; } }
/** * Close the sorter, causing any buffered data to be sorted and written out to disk. * * @return metadata for the spill files written by this sorter. If no records were ever inserted * into this sorter, then this will return an empty array. * @throws IOException */ public SpillInfo[] closeAndGetSpills() throws IOException { if (inMemSorter != null) { // Do not count the final file towards the spill count. writeSortedFile(true); freeMemory(); inMemSorter.free(); inMemSorter = null; } return spills.toArray(new SpillInfo[spills.size()]); }
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
/** * Write a record to the shuffle sorter. */ public void insertRecord(Object recordBase, long recordOffset, int length, int partitionId) throws IOException { // for tests assert(inMemSorter != null); if (inMemSorter.numRecords() >= numElementsForSpillThreshold) { logger.info("Spilling data because number of spilledRecords crossed the threshold " + numElementsForSpillThreshold); spill(); } growPointerArrayIfNecessary(); // Need 4 bytes to store the record length. final int required = length + 4; acquireNewPageIfNecessary(required); assert(currentPage != null); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); Platform.putInt(base, pageCursor, length); pageCursor += 4; Platform.copyMemory(recordBase, recordOffset, base, pageCursor, length); pageCursor += length; inMemSorter.insertRecord(recordAddress, partitionId); }
try { array = allocateArray(used / 8 * 2); } catch (TooLargePageException e) { spill(); return; } catch (SparkOutOfMemoryError e) { freeArray(array); } else { inMemSorter.expandPointerArray(array);
@Override public void write(scala.collection.Iterator<Product2<K, V>> records) throws IOException { // Keep track of success so we know if we encountered an exception // We do this rather than a standard try/catch/re-throw to handle // generic throwables. boolean success = false; try { while (records.hasNext()) { insertRecordIntoSorter(records.next()); } closeAndWriteOutput(); success = true; } finally { if (sorter != null) { try { sorter.cleanupResources(); } catch (Exception e) { // Only throw this error if we won't be masking another // error. if (success) { throw e; } else { logger.error("In addition to a failure during writing, we failed during " + "cleanup.", e); } } } } }
/** * Checks whether there is enough space to insert an additional record in to the sort pointer * array and grows the array if additional space is required. If the required space cannot be * obtained, then the in-memory data will be spilled to disk. */ private void growPointerArrayIfNecessary() throws IOException { assert(inMemSorter != null); if (!inMemSorter.hasSpaceForAnotherRecord()) { long used = inMemSorter.getMemoryUsage(); LongArray array; try { // could trigger spilling array = allocateArray(used / 8 * 2); } catch (OutOfMemoryError e) { // should have trigger spilling if (!inMemSorter.hasSpaceForAnotherRecord()) { logger.error("Unable to grow the pointer array"); throw e; } return; } // check if spilling is triggered or not if (inMemSorter.hasSpaceForAnotherRecord()) { freeArray(array); } else { inMemSorter.expandPointerArray(array); } } }
private void open() { assert (sorter == null); sorter = new ShuffleExternalSorter( memoryManager, blockManager, taskContext, initialSortBufferSize, partitioner.numPartitions(), sparkConf, writeMetrics); serBuffer = new MyByteArrayOutputStream(DEFAULT_INITIAL_SER_BUFFER_SIZE); serOutputStream = serializer.serializeStream(serBuffer); }
/** * Force all memory and spill files to be deleted; called by shuffle error-handling code. */ public void cleanupResources() { freeMemory(); if (inMemSorter != null) { inMemSorter.free(); inMemSorter = null; } for (SpillInfo spill : spills) { if (spill.file.exists() && !spill.file.delete()) { logger.error("Unable to delete spill file {}", spill.file.getPath()); } } }
/** * Allocates more memory in order to insert an additional record. This will request additional * memory from the memory manager and spill if the requested memory can not be obtained. * * @param required the required space in the data page, in bytes, including space for storing * the record size. This must be less than or equal to the page size (records * that exceed the page size are handled via a different code path which uses * special overflow pages). */ private void acquireNewPageIfNecessary(int required) { if (currentPage == null || pageCursor + required > currentPage.getBaseOffset() + currentPage.size() ) { // TODO: try to find space in previous pages currentPage = allocatePage(required); pageCursor = currentPage.getBaseOffset(); allocatedPages.add(currentPage); } }
@VisibleForTesting void closeAndWriteOutput() throws IOException { assert(sorter != null); updatePeakMemoryUsed(); serBuffer = null; serOutputStream = null; final SpillInfo[] spills = sorter.closeAndGetSpills(); sorter = null; final long[] partitionLengths; final File output = shuffleBlockResolver.getDataFile(shuffleId, mapId); final File tmp = Utils.tempFileWith(output); try { try { partitionLengths = mergeSpills(spills, tmp); } finally { for (SpillInfo spill : spills) { if (spill.file.exists() && ! spill.file.delete()) { logger.error("Error while deleting spill file {}", spill.file.getPath()); } } } shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp); } finally { if (tmp.exists() && !tmp.delete()) { logger.error("Error while deleting temp file {}", tmp.getAbsolutePath()); } } mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths); }
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
/** * Write a record to the shuffle sorter. */ public void insertRecord(Object recordBase, long recordOffset, int length, int partitionId) throws IOException { // for tests assert(inMemSorter != null); if (inMemSorter.numRecords() >= numElementsForSpillThreshold) { logger.info("Spilling data because number of spilledRecords crossed the threshold " + numElementsForSpillThreshold); spill(); } growPointerArrayIfNecessary(); final int uaoSize = UnsafeAlignedOffset.getUaoSize(); // Need 4 or 8 bytes to store the record length. final int required = length + uaoSize; acquireNewPageIfNecessary(required); assert(currentPage != null); final Object base = currentPage.getBaseObject(); final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor); UnsafeAlignedOffset.putSize(base, pageCursor, length); pageCursor += uaoSize; Platform.copyMemory(recordBase, recordOffset, base, pageCursor, length); pageCursor += length; inMemSorter.insertRecord(recordAddress, partitionId); }
try { array = allocateArray(used / 8 * 2); } catch (TooLargePageException e) { spill(); return; } catch (SparkOutOfMemoryError e) { freeArray(array); } else { inMemSorter.expandPointerArray(array);
/** * Close the sorter, causing any buffered data to be sorted and written out to disk. * * @return metadata for the spill files written by this sorter. If no records were ever inserted * into this sorter, then this will return an empty array. * @throws IOException */ public SpillInfo[] closeAndGetSpills() throws IOException { if (inMemSorter != null) { // Do not count the final file towards the spill count. writeSortedFile(true); freeMemory(); inMemSorter.free(); inMemSorter = null; } return spills.toArray(new SpillInfo[spills.size()]); }
@Override public void write(scala.collection.Iterator<Product2<K, V>> records) throws IOException { // Keep track of success so we know if we encountered an exception // We do this rather than a standard try/catch/re-throw to handle // generic throwables. boolean success = false; try { while (records.hasNext()) { insertRecordIntoSorter(records.next()); } closeAndWriteOutput(); success = true; } finally { if (sorter != null) { try { sorter.cleanupResources(); } catch (Exception e) { // Only throw this error if we won't be masking another // error. if (success) { throw e; } else { logger.error("In addition to a failure during writing, we failed during " + "cleanup.", e); } } } } }
private void open() { assert (sorter == null); sorter = new ShuffleExternalSorter( memoryManager, blockManager, taskContext, initialSortBufferSize, partitioner.numPartitions(), sparkConf, writeMetrics); serBuffer = new MyByteArrayOutputStream(DEFAULT_INITIAL_SER_BUFFER_SIZE); serOutputStream = serializer.serializeStream(serBuffer); }
/** * Force all memory and spill files to be deleted; called by shuffle error-handling code. */ public void cleanupResources() { freeMemory(); if (inMemSorter != null) { inMemSorter.free(); inMemSorter = null; } for (SpillInfo spill : spills) { if (spill.file.exists() && !spill.file.delete()) { logger.error("Unable to delete spill file {}", spill.file.getPath()); } } }