org.apache.spark.shuffle.sort.UnsafeShuffleWriter java code examples

@Override
public void write(scala.collection.Iterator<Product2<K, V>> records) throws IOException {
 // Keep track of success so we know if we encountered an exception
 // We do this rather than a standard try/catch/re-throw to handle
 // generic throwables.
 boolean success = false;
 try {
  while (records.hasNext()) {
   insertRecordIntoSorter(records.next());
  }
  closeAndWriteOutput();
  success = true;
 } finally {
  if (sorter != null) {
   try {
    sorter.cleanupResources();
   } catch (Exception e) {
    // Only throw this error if we won't be masking another
    // error.
    if (success) {
     throw e;
    } else {
     logger.error("In addition to a failure during writing, we failed during " +
            "cleanup.", e);
    }
   }
  }
 }
}

@Test
public void writeRecordsThatAreBiggerThanDiskWriteBufferSize() throws Exception {
 final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
 final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
 final byte[] bytes = new byte[(int) (ShuffleExternalSorter.DISK_WRITE_BUFFER_SIZE * 2.5)];
 new Random(42).nextBytes(bytes);
 dataToWrite.add(new Tuple2<>(1, ByteBuffer.wrap(bytes)));
 writer.write(dataToWrite.iterator());
 writer.stop(true);
 assertEquals(
  HashMultiset.create(dataToWrite),
  HashMultiset.create(readRecordsFromFile()));
 assertSpillFilesWereCleanedUp();
}

@Test
public void spillFilesAreDeletedWhenStoppingAfterError() throws IOException {
 final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
 writer.insertRecordIntoSorter(new Tuple2<>(1, 1));
 writer.insertRecordIntoSorter(new Tuple2<>(2, 2));
 writer.forceSorterToSpill();
 writer.insertRecordIntoSorter(new Tuple2<>(2, 2));
 writer.stop(false);
 assertSpillFilesWereCleanedUp();
}

@VisibleForTesting
void closeAndWriteOutput() throws IOException {
 assert(sorter != null);
 updatePeakMemoryUsed();
 serBuffer = null;
 serOutputStream = null;
 final SpillInfo[] spills = sorter.closeAndGetSpills();
 sorter = null;
 final long[] partitionLengths;
 final File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
 final File tmp = Utils.tempFileWith(output);
 try {
  try {
   partitionLengths = mergeSpills(spills, tmp);
  } finally {
   for (SpillInfo spill : spills) {
    if (spill.file.exists() && ! spill.file.delete()) {
     logger.error("Error while deleting spill file {}", spill.file.getPath());
    }
   }
  }
  shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp);
 } finally {
  if (tmp.exists() && !tmp.delete()) {
   logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
  }
 }
 mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
}

when(taskMemoryManager.pageSizeBytes()).thenReturn(pageSizeBytes);
final UnsafeShuffleWriter<Object, Object> writer =
 new UnsafeShuffleWriter<>(
  blockManager,
  shuffleBlockResolver,
long previousPeakMemory = writer.getPeakMemoryUsedBytes();
long newPeakMemory;
try {
 for (int i = 0; i < numRecordsPerPage * 10; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
  newPeakMemory = writer.getPeakMemoryUsedBytes();
  if (i % numRecordsPerPage == 0) {
 writer.forceSorterToSpill();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 for (int i = 0; i < numRecordsPerPage; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 writer.closeAndWriteOutput();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
} finally {
 writer.stop(false);

 dataToWrite.add(new Tuple2<>(i, i));
writer.insertRecordIntoSorter(dataToWrite.get(0));
writer.insertRecordIntoSorter(dataToWrite.get(1));
writer.insertRecordIntoSorter(dataToWrite.get(2));
writer.insertRecordIntoSorter(dataToWrite.get(3));
writer.forceSorterToSpill();
writer.insertRecordIntoSorter(dataToWrite.get(4));
writer.insertRecordIntoSorter(dataToWrite.get(5));
writer.closeAndWriteOutput();
final Option<MapStatus> mapStatus = writer.stop(true);
assertTrue(mapStatus.isDefined());
assertTrue(mergedOutputFile.exists());

/**
 * This convenience method should only be called in test code.
 */
@VisibleForTesting
public void write(Iterator<Product2<K, V>> records) throws IOException {
 write(JavaConverters.asScalaIteratorConverter(records).asScala());
}

 @Override
 public Option<MapStatus> stop(boolean success) {
  try {
   taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes());

   if (stopping) {
    return Option.apply(null);
   } else {
    stopping = true;
    if (success) {
     if (mapStatus == null) {
      throw new IllegalStateException("Cannot call stop(true) without having called write()");
     }
     return Option.apply(mapStatus);
    } else {
     return Option.apply(null);
    }
   }
  } finally {
   if (sorter != null) {
    // If sorter is non-null, then this implies that we called stop() in response to an error,
    // so we need to clean up memory and spill files created by the sorter
    sorter.cleanupResources();
   }
  }
 }
}

 partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
} else {
 logger.debug("Using fileStream-based fast merge");
 partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null);
partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec);

private UnsafeShuffleWriter<Object, Object> createWriter(
  boolean transferToEnabled) throws IOException {
 conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
 return new UnsafeShuffleWriter<>(
  blockManager,
  shuffleBlockResolver,
  taskMemoryManager,
  new SerializedShuffleHandle<>(0, 1, shuffleDep),
  0, // map id
  taskContext,
  conf
 );
}

this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
                       DEFAULT_INITIAL_SORT_BUFFER_SIZE);
open();

when(taskMemoryManager.pageSizeBytes()).thenReturn(pageSizeBytes);
final UnsafeShuffleWriter<Object, Object> writer =
 new UnsafeShuffleWriter<>(
  blockManager,
  shuffleBlockResolver,
long previousPeakMemory = writer.getPeakMemoryUsedBytes();
long newPeakMemory;
try {
 for (int i = 0; i < numRecordsPerPage * 10; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
  newPeakMemory = writer.getPeakMemoryUsedBytes();
  if (i % numRecordsPerPage == 0) {
 writer.forceSorterToSpill();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 for (int i = 0; i < numRecordsPerPage; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 writer.closeAndWriteOutput();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
} finally {
 writer.stop(false);

 dataToWrite.add(new Tuple2<>(i, i));
writer.insertRecordIntoSorter(dataToWrite.get(0));
writer.insertRecordIntoSorter(dataToWrite.get(1));
writer.insertRecordIntoSorter(dataToWrite.get(2));
writer.insertRecordIntoSorter(dataToWrite.get(3));
writer.forceSorterToSpill();
writer.insertRecordIntoSorter(dataToWrite.get(4));
writer.insertRecordIntoSorter(dataToWrite.get(5));
writer.closeAndWriteOutput();
final Option<MapStatus> mapStatus = writer.stop(true);
assertTrue(mapStatus.isDefined());
assertTrue(mergedOutputFile.exists());

/**
 * This convenience method should only be called in test code.
 */
@VisibleForTesting
public void write(Iterator<Product2<K, V>> records) throws IOException {
 write(JavaConverters.asScalaIteratorConverter(records).asScala());
}

 @Override
 public Option<MapStatus> stop(boolean success) {
  try {
   taskContext.taskMetrics().incPeakExecutionMemory(getPeakMemoryUsedBytes());

   if (stopping) {
    return Option.apply(null);
   } else {
    stopping = true;
    if (success) {
     if (mapStatus == null) {
      throw new IllegalStateException("Cannot call stop(true) without having called write()");
     }
     return Option.apply(mapStatus);
    } else {
     return Option.apply(null);
    }
   }
  } finally {
   if (sorter != null) {
    // If sorter is non-null, then this implies that we called stop() in response to an error,
    // so we need to clean up memory and spill files created by the sorter
    sorter.cleanupResources();
   }
  }
 }
}

@VisibleForTesting
void closeAndWriteOutput() throws IOException {
 assert(sorter != null);
 updatePeakMemoryUsed();
 serBuffer = null;
 serOutputStream = null;
 final SpillInfo[] spills = sorter.closeAndGetSpills();
 sorter = null;
 final long[] partitionLengths;
 final File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
 final File tmp = Utils.tempFileWith(output);
 try {
  try {
   partitionLengths = mergeSpills(spills, tmp);
  } finally {
   for (SpillInfo spill : spills) {
    if (spill.file.exists() && ! spill.file.delete()) {
     logger.error("Error while deleting spill file {}", spill.file.getPath());
    }
   }
  }
  shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp);
 } finally {
  if (tmp.exists() && !tmp.delete()) {
   logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
  }
 }
 mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
}

 partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
} else {
 logger.debug("Using fileStream-based fast merge");
 partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null);
partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec);

private UnsafeShuffleWriter<Object, Object> createWriter(
  boolean transferToEnabled) throws IOException {
 conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
 return new UnsafeShuffleWriter<>(
  blockManager,
  shuffleBlockResolver,
  taskMemoryManager,
  new SerializedShuffleHandle<>(0, 1, shuffleDep),
  0, // map id
  taskContext,
  conf
 );
}

this.outputBufferSizeInBytes =
 (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
open();

when(taskMemoryManager.pageSizeBytes()).thenReturn(pageSizeBytes);
final UnsafeShuffleWriter<Object, Object> writer =
 new UnsafeShuffleWriter<>(
  blockManager,
  shuffleBlockResolver,
long previousPeakMemory = writer.getPeakMemoryUsedBytes();
long newPeakMemory;
try {
 for (int i = 0; i < numRecordsPerPage * 10; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
  newPeakMemory = writer.getPeakMemoryUsedBytes();
  if (i % numRecordsPerPage == 0) {
 writer.forceSorterToSpill();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 for (int i = 0; i < numRecordsPerPage; i++) {
  writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
 writer.closeAndWriteOutput();
 newPeakMemory = writer.getPeakMemoryUsedBytes();
 assertEquals(previousPeakMemory, newPeakMemory);
} finally {
 writer.stop(false);

Most used methods

closeAndWriteOutput
getPeakMemoryUsedBytes
Return the peak memory used so far, in bytes.
insertRecordIntoSorter
write
<init>
forceSorterToSpill
mergeSpills
Merge zero or more spill files together, choosing the fastest merging strategy based on the number o
mergeSpillsWithFileStream
Merges spill files using Java FileStreams. This code path is typically slower than the NIO-based mer
mergeSpillsWithTransferTo
Merges spill files by using NIO's transferTo to concatenate spill partitions' bytes. This is only sa
open
stop
updatePeakMemoryUsed

Popular in Java

Reading from database using SQL prepared statement
getExternalFilesDir (Context)
setScale (BigDecimal)
setRequestProperty (URLConnection)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Kernel (java.awt.image)
JList (javax.swing)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top plugins for Android Studio

How to useUnsafeShuffleWriter in org.apache.spark.shuffle.sort

Best Java code snippets using org.apache.spark.shuffle.sort.UnsafeShuffleWriter (Showing top 20 results out of 315)

How to use
UnsafeShuffleWriter
in
org.apache.spark.shuffle.sort