/** If the current segment has too many points then we spill over to temp files / offline sort. */ private void spillToOffline() throws IOException { // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", 0, singleValuePerDoc); tempInput = offlinePointWriter.out; PointReader reader = heapPointWriter.getReader(0, pointCount); for(int i=0;i<pointCount;i++) { boolean hasNext = reader.next(); assert hasNext; offlinePointWriter.append(reader.packedValue(), i, heapPointWriter.docIDs[i]); } heapPointWriter = null; }
/** Pull a partition back into heap once the point count is low enough while recursing. */ private PathSlice switchToHeap(PathSlice source, List<Closeable> toCloseHeroically) throws IOException { int count = Math.toIntExact(source.count); // Not inside the try because we don't want to close it here: PointReader reader = source.writer.getSharedReader(source.start, source.count, toCloseHeroically); try (PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc)) { for(int i=0;i<count;i++) { boolean hasNext = reader.next(); assert hasNext; writer.append(reader.packedValue(), reader.ord(), reader.docID()); } return new PathSlice(writer, 0, count); } catch (Throwable t) { throw verifyChecksum(t, source.writer); } }
/** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */ private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException { // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim: // Read the split value, then mark all ords in the right tree (larger than the split value): // TODO: find a way to also checksum this reader? If we changed to markLeftTree, and scanned the final chunk, it could work? try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) { boolean result = reader.next(); assert result: "rightCount=" + rightCount + " source.count=" + source.count + " source.writer=" + source.writer; System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim); if (numDataDims > 1) { assert ordBitSet.get(reader.ord()) == false; ordBitSet.set(reader.ord()); // Subtract 1 from rightCount because we already did the first value above (so we could record the split value): reader.markOrds(rightCount-1, ordBitSet); } } catch (Throwable t) { throw verifyChecksum(t, source.writer); } return scratch1; }
/** Splits this reader into left and right partitions */ public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException { // Partition this source according to how the splitDim split the values: long rightCount = 0; for (long i=0;i<count;i++) { boolean result = next(); assert result; byte[] packedValue = packedValue(); long ord = ord(); int docID = docID(); if (rightTree.get(ord)) { right.append(packedValue, ord, docID); rightCount++; if (doClearBits) { rightTree.clear(ord); } } else { left.append(packedValue, ord, docID); } } return rightCount; } }
/** If the current segment has too many points then we spill over to temp files / offline sort. */ private void spillToOffline() throws IOException { // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", 0, singleValuePerDoc); tempInput = offlinePointWriter.out; PointReader reader = heapPointWriter.getReader(0, pointCount); for(int i=0;i<pointCount;i++) { boolean hasNext = reader.next(); assert hasNext; offlinePointWriter.append(reader.packedValue(), i, heapPointWriter.docIDs[i]); } heapPointWriter = null; }
/** If the current segment has too many points then we spill over to temp files / offline sort. */ private void spillToOffline() throws IOException { // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree: offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", 0, singleValuePerDoc); tempInput = offlinePointWriter.out; PointReader reader = heapPointWriter.getReader(0, pointCount); for(int i=0;i<pointCount;i++) { boolean hasNext = reader.next(); assert hasNext; offlinePointWriter.append(reader.packedValue(), i, heapPointWriter.docIDs[i]); } heapPointWriter = null; }
/** Pull a partition back into heap once the point count is low enough while recursing. */ private PathSlice switchToHeap(PathSlice source, List<Closeable> toCloseHeroically) throws IOException { int count = Math.toIntExact(source.count); // Not inside the try because we don't want to close it here: PointReader reader = source.writer.getSharedReader(source.start, source.count, toCloseHeroically); try (PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc)) { for(int i=0;i<count;i++) { boolean hasNext = reader.next(); assert hasNext; writer.append(reader.packedValue(), reader.ord(), reader.docID()); } return new PathSlice(writer, 0, count); } catch (Throwable t) { throw verifyChecksum(t, source.writer); } }
/** Pull a partition back into heap once the point count is low enough while recursing. */ private PathSlice switchToHeap(PathSlice source, List<Closeable> toCloseHeroically) throws IOException { int count = Math.toIntExact(source.count); // Not inside the try because we don't want to close it here: PointReader reader = source.writer.getSharedReader(source.start, source.count, toCloseHeroically); try (PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc)) { for(int i=0;i<count;i++) { boolean hasNext = reader.next(); assert hasNext; writer.append(reader.packedValue(), reader.ord(), reader.docID()); } return new PathSlice(writer, 0, count); } catch (Throwable t) { throw verifyChecksum(t, source.writer); } }
/** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */ private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException { // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim: // Read the split value, then mark all ords in the right tree (larger than the split value): // TODO: find a way to also checksum this reader? If we changed to markLeftTree, and scanned the final chunk, it could work? try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) { boolean result = reader.next(); assert result: "rightCount=" + rightCount + " source.count=" + source.count + " source.writer=" + source.writer; System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim); if (numDataDims > 1) { assert ordBitSet.get(reader.ord()) == false; ordBitSet.set(reader.ord()); // Subtract 1 from rightCount because we already did the first value above (so we could record the split value): reader.markOrds(rightCount-1, ordBitSet); } } catch (Throwable t) { throw verifyChecksum(t, source.writer); } return scratch1; }
/** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */ private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException { // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim: // Read the split value, then mark all ords in the right tree (larger than the split value): // TODO: find a way to also checksum this reader? If we changed to markLeftTree, and scanned the final chunk, it could work? try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) { boolean result = reader.next(); assert result; System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim); if (numDataDims > 1) { assert ordBitSet.get(reader.ord()) == false; ordBitSet.set(reader.ord()); // Subtract 1 from rightCount because we already did the first value above (so we could record the split value): reader.markOrds(rightCount-1, ordBitSet); } } catch (Throwable t) { throw verifyChecksum(t, source.writer); } return scratch1; }
/** Splits this reader into left and right partitions */ public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException { // Partition this source according to how the splitDim split the values: long rightCount = 0; for (long i=0;i<count;i++) { boolean result = next(); assert result; byte[] packedValue = packedValue(); long ord = ord(); int docID = docID(); if (rightTree.get(ord)) { right.append(packedValue, ord, docID); rightCount++; if (doClearBits) { rightTree.clear(ord); } } else { left.append(packedValue, ord, docID); } } return rightCount; } }