public static void updateSparkBytesWrittenMetrics(Logger log, FileSystem fs, Path[] commitPaths) { AtomicLong bytesWritten = new AtomicLong(); Arrays.stream(commitPaths).parallel().forEach(path -> { try { bytesWritten.addAndGet(fs.getFileStatus(path).getLen()); } catch (IOException e) { log.debug("Unable to collect stats for file: " + path + " output metrics may be inaccurate", e); } }); if (bytesWritten.get() > 0) { TaskContext.get().taskMetrics().outputMetrics().setBytesWritten(bytesWritten.get()); } } }
public static void updateSparkRecordsWrittenMetrics(long numRows) { TaskContext taskContext = TaskContext.get(); if (taskContext != null && numRows > 0) { taskContext.taskMetrics().outputMetrics().setRecordsWritten(numRows); } }
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
/** * Sort and spill the current records in response to memory pressure. */ @Override public long spill(long size, MemoryConsumer trigger) throws IOException { if (trigger != this || inMemSorter == null || inMemSorter.numRecords() == 0) { return 0L; } logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), Utils.bytesToString(getMemoryUsage()), spills.size(), spills.size() > 1 ? " times" : " time"); writeSortedFile(false); final long spillSize = freeMemory(); inMemSorter.reset(); // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the // records. Otherwise, if the task is over allocated memory, then without freeing the memory // pages, we might not be able to get memory for the pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); return spillSize; }
@Override public Option<MapStatus> stop(boolean success) { try { taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes()); if (stopping) { return Option.apply(null); } else { stopping = true; if (success) { if (mapStatus == null) { throw new IllegalStateException("Cannot call stop(true) without having called write()"); } return Option.apply(mapStatus); } else { return Option.apply(null); } } } finally { if (sorter != null) { // If sorter is non-null, then this implies that we called stop() in response to an error, // so we need to clean up memory and spill files created by the sorter sorter.cleanupResources(); } } } }
@Override public Option<MapStatus> stop(boolean success) { try { taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes()); if (stopping) { return Option.apply(null); } else { stopping = true; if (success) { if (mapStatus == null) { throw new IllegalStateException("Cannot call stop(true) without having called write()"); } return Option.apply(mapStatus); } else { return Option.apply(null); } } } finally { if (sorter != null) { // If sorter is non-null, then this implies that we called stop() in response to an error, // so we need to clean up memory and spill files created by the sorter sorter.cleanupResources(); } } } }
spillFilesCreated.clear(); taskContext = mock(TaskContext.class); when(taskContext.taskMetrics()).thenReturn(new TaskMetrics()); when(blockManager.diskBlockManager()).thenReturn(diskBlockManager); when(diskBlockManager.createTempLocalBlock()).thenAnswer(invocationOnMock -> {
@Override public Option<MapStatus> stop(boolean success) { try { taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes()); if (stopping) { return Option.apply(null); } else { stopping = true; if (success) { if (mapStatus == null) { throw new IllegalStateException("Cannot call stop(true) without having called write()"); } return Option.apply(mapStatus); } else { return Option.apply(null); } } } finally { if (sorter != null) { // If sorter is non-null, then this implies that we called stop() in response to an error, // so we need to clean up memory and spill files created by the sorter sorter.cleanupResources(); } } } }
inMemSorter.free(); inMemSorter = null; taskContext.taskMetrics().incMemoryBytesSpilled(released); taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); totalSpillBytes += released; return released;
inMemSorter.free(); inMemSorter = null; taskContext.taskMetrics().incMemoryBytesSpilled(released); taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); totalSpillBytes += released; return released;
spillFilesCreated.clear(); taskContext = mock(TaskContext.class); when(taskContext.taskMetrics()).thenReturn(new TaskMetrics()); when(blockManager.diskBlockManager()).thenReturn(diskBlockManager); when(diskBlockManager.createTempLocalBlock()).thenAnswer(invocationOnMock -> {
spillFilesCreated.clear(); taskContext = mock(TaskContext.class); when(taskContext.taskMetrics()).thenReturn(new TaskMetrics()); when(blockManager.diskBlockManager()).thenReturn(diskBlockManager); when(diskBlockManager.createTempLocalBlock()).thenAnswer(invocationOnMock -> {
BypassMergeSortShuffleWriter( BlockManager blockManager, IndexShuffleBlockResolver shuffleBlockResolver, BypassMergeSortShuffleHandle<K, V> handle, int mapId, TaskContext taskContext, SparkConf conf) { // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true); this.blockManager = blockManager; final ShuffleDependency<K, V, V> dep = handle.dependency(); this.mapId = mapId; this.shuffleId = dep.shuffleId(); this.partitioner = dep.partitioner(); this.numPartitions = partitioner.numPartitions(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.serializer = dep.serializer(); this.shuffleBlockResolver = shuffleBlockResolver; }
BypassMergeSortShuffleWriter( BlockManager blockManager, IndexShuffleBlockResolver shuffleBlockResolver, BypassMergeSortShuffleHandle<K, V> handle, int mapId, TaskContext taskContext, SparkConf conf) { // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true); this.blockManager = blockManager; final ShuffleDependency<K, V, V> dep = handle.dependency(); this.mapId = mapId; this.shuffleId = dep.shuffleId(); this.partitioner = dep.partitioner(); this.numPartitions = partitioner.numPartitions(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.serializer = dep.serializer(); this.shuffleBlockResolver = shuffleBlockResolver; }
BypassMergeSortShuffleWriter( BlockManager blockManager, IndexShuffleBlockResolver shuffleBlockResolver, BypassMergeSortShuffleHandle<K, V> handle, int mapId, TaskContext taskContext, SparkConf conf) { // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true); this.blockManager = blockManager; final ShuffleDependency<K, V, V> dep = handle.dependency(); this.mapId = mapId; this.shuffleId = dep.shuffleId(); this.partitioner = dep.partitioner(); this.numPartitions = partitioner.numPartitions(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.serializer = dep.serializer(); this.shuffleBlockResolver = shuffleBlockResolver; }
this.serializer = dep.serializer().newInstance(); this.partitioner = dep.partitioner(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.taskContext = taskContext; this.sparkConf = sparkConf;
@Test public void testDiskSpilledBytes() throws Exception { final UnsafeExternalSorter sorter = newSorter(); long[] record = new long[100]; int recordSize = record.length * 8; int n = (int) pageSizeBytes / recordSize * 3; for (int i = 0; i < n; i++) { record[0] = (long) i; sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); } // We will have at-least 2 memory pages allocated because of rounding happening due to // integer division of pageSizeBytes and recordSize. assertTrue(sorter.getNumberOfAllocatedPages() >= 2); assertTrue(taskContext.taskMetrics().diskBytesSpilled() == 0); UnsafeExternalSorter.SpillableIterator iter = (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); assertTrue(iter.spill() > 0); assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); assertEquals(0, iter.spill()); // Even if we did not spill second time, the disk spilled bytes should still be non-zero assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
@Test public void testDiskSpilledBytes() throws Exception { final UnsafeExternalSorter sorter = newSorter(); long[] record = new long[100]; int recordSize = record.length * 8; int n = (int) pageSizeBytes / recordSize * 3; for (int i = 0; i < n; i++) { record[0] = (long) i; sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false); } // We will have at-least 2 memory pages allocated because of rounding happening due to // integer division of pageSizeBytes and recordSize. assertTrue(sorter.getNumberOfAllocatedPages() >= 2); assertTrue(taskContext.taskMetrics().diskBytesSpilled() == 0); UnsafeExternalSorter.SpillableIterator iter = (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator(); assertTrue(iter.spill() > 0); assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); assertEquals(0, iter.spill()); // Even if we did not spill second time, the disk spilled bytes should still be non-zero assertTrue(taskContext.taskMetrics().diskBytesSpilled() > 0); sorter.cleanupResources(); assertSpillFilesWereCleanedUp(); }
this.serializer = dep.serializer().newInstance(); this.partitioner = dep.partitioner(); this.writeMetrics = taskContext.taskMetrics().shuffleWriteMetrics(); this.taskContext = taskContext; this.sparkConf = sparkConf;