@Override public int compare(DataFileInfo<T> file1, DataFileInfo<T> file2) { if (file1.getFileLength() == file2.getFileLength()) { return 0; } else { return file1.getFileLength() >= file2.getFileLength() ? 1 : -1; } } }
private void mergeIfPossible(List<MemorySegment> mergeReadMemory, ChannelDeleteRegistry<Tuple2<Integer, T>> channelDeleteRegistry, AtomicBoolean aliveFlag) throws IOException { // select merge candidates List<DataFileInfo<SortedDataFile<Tuple2<Integer, T>>>> mergeCandidates = mergePolicy.selectMergeCandidates(mergeReadMemory.size()); while (mergeCandidates != null && aliveFlag.get()) { int maxMergeRound = 0; LinkedList<PartitionedSortedDataFile<T>> toBeMerged = new LinkedList<>(); for (DataFileInfo<SortedDataFile<Tuple2<Integer, T>>> mergeCandidate: mergeCandidates) { maxMergeRound = Math.max(maxMergeRound, mergeCandidate.getMergeRound()); PartitionedSortedDataFile<T> partitionedSortedDataFile = (PartitionedSortedDataFile<T>) mergeCandidate.getDataFile(); toBeMerged.add(partitionedSortedDataFile); } LOG.info("Start merging {} files to one file.", toBeMerged.size()); try { // merge the candidates to one file SortedDataFile<Tuple2<Integer, T>> mergedFile = mergeToOutput( toBeMerged, mergeReadMemory, channelDeleteRegistry, mergeFileIndex--); DataFileInfo<SortedDataFile<Tuple2<Integer, T>>> mergedFileInfo = new DataFileInfo<>( mergedFile.getBytesWritten(), maxMergeRound + 1, numberOfSubpartitions, mergedFile); // notify new file mergePolicy.addNewCandidate(mergedFileInfo); } catch (InterruptedException e) { throw new RuntimeException("Merge was interrupted.", e); } // select new candidates mergeCandidates = mergePolicy.selectMergeCandidates(mergeReadMemory.size()); } }
@Override public List<T> getFinalMergeResult() { Preconditions.checkState(layeredDataFiles.size() <= 1, "Illegal merge state: " + layeredDataFiles.size()); ArrayList<T> dataFiles = new ArrayList<>(); if (layeredDataFiles.size() > 0) { for (DataFileInfo<T> fileInfo : layeredDataFiles.get(0)) { dataFiles.add(fileInfo.getDataFile()); } } return dataFiles; }
@Override public void addNewCandidate(DataFileInfo<T> dataFileInfo) { if (isFinalMergeStarted) { // after starting final merger, there should be only one layer of candidate files Preconditions.checkState(layeredDataFiles.size() == 1, "Illegal layer size: " + layeredDataFiles.size()); layeredDataFiles.get(0).addLast(dataFileInfo); } else { int mergeRound = dataFileInfo.getMergeRound(); Preconditions.checkArgument(layeredDataFiles.size() >= mergeRound, "Illegal merge round: (" + mergeRound + " " + layeredDataFiles.size() + ")"); if (layeredDataFiles.size() == mergeRound) { LinkedList<DataFileInfo<T>> dataFiles = new LinkedList<>(); layeredDataFiles.add(dataFiles); } layeredDataFiles.get(mergeRound).addLast(dataFileInfo); } }
@Override public void notifyNewSortedDataFile(SortedDataFile<Tuple2<Integer, T>> sortedDataFile, List<MemorySegment> writeMemory, List<MemorySegment> mergeReadMemory, ChannelDeleteRegistry<Tuple2<Integer, T>> channelDeleteRegistry, AtomicBoolean aliveFlag) throws IOException { if (!(sortedDataFile instanceof PartitionedBufferSortedDataFile)) { throw new IllegalArgumentException("Only PartitionedBufferSortedDataFile is supported: " + sortedDataFile.getClass().getName()); } DataFileInfo<SortedDataFile<Tuple2<Integer, T>>> dataFileInfo = new DataFileInfo<>( sortedDataFile.getBytesWritten(), 0, numberOfSubpartitions, sortedDataFile); mergePolicy.addNewCandidate(dataFileInfo); mergeIfPossible(mergeReadMemory, channelDeleteRegistry, aliveFlag); }