/** * Returns the number of primary (on-disk) index entries between the index summary entry at `index` and the next * index summary entry (assuming there is one). Without any downsampling, this will always be equivalent to * the index interval. * * @param index the index of an index summary entry (between zero and the index entry size) * * @return the number of partitions after `index` until the next partition with a summary entry */ public int getEffectiveIndexIntervalAfterIndex(int index) { return Downsampling.getEffectiveIndexIntervalAfterIndex(index, samplingLevel, minIndexInterval); }
/** * Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this * returns the number of partitions in the primary on-disk index before the next partition that has an entry in * the index summary. If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval. * @param index an index into an IndexSummary * @param samplingLevel the current sampling level for that IndexSummary * @param minIndexInterval the min index interval (effective index interval at full sampling) * @return the number of partitions before the next index summary entry, inclusive on one end */ public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval) { assert index >= 0; index %= samplingLevel; List<Integer> originalIndexes = getOriginalIndexes(samplingLevel); int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1); return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval; }
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) { List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); // calculate starting indexes for sampling rounds int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); int[] startPoints = new int[numRounds]; for (int i = 0; i < numRounds; ++i) { int start = allStartPoints.get(initialRound + i); // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point int adjustment = 0; for (int j = 0; j < initialRound; ++j) { if (allStartPoints.get(j) < start) adjustment++; } startPoints[i] = start - adjustment; } return startPoints; } }
public IndexSummaryBuilder(long expectedKeys, int minIndexInterval, int samplingLevel) { this.samplingLevel = samplingLevel; this.startPoints = Downsampling.getStartPoints(BASE_SAMPLING_LEVEL, samplingLevel); long maxExpectedEntries = expectedKeys / minIndexInterval; if (maxExpectedEntries > Integer.MAX_VALUE) { // that's a _lot_ of keys, and a very low min index interval int effectiveMinInterval = (int) Math.ceil((double) Integer.MAX_VALUE / expectedKeys); maxExpectedEntries = expectedKeys / effectiveMinInterval; assert maxExpectedEntries <= Integer.MAX_VALUE : maxExpectedEntries; logger.warn("min_index_interval of {} is too low for {} expected keys; using interval of {} instead", minIndexInterval, expectedKeys, effectiveMinInterval); this.minIndexInterval = effectiveMinInterval; } else { this.minIndexInterval = minIndexInterval; } // for initializing data structures, adjust our estimates based on the sampling level maxExpectedEntries = Math.max(1, (maxExpectedEntries * samplingLevel) / BASE_SAMPLING_LEVEL); offsets = new SafeMemoryWriter(4 * maxExpectedEntries).order(ByteOrder.nativeOrder()); entries = new SafeMemoryWriter(40 * maxExpectedEntries).order(ByteOrder.nativeOrder()); // the summary will always contain the first index entry (downsampling will never remove it) nextSamplePosition = 0; indexIntervalMatches++; }
public IndexSummaryBuilder(long expectedKeys, int minIndexInterval, int samplingLevel) { this.samplingLevel = samplingLevel; this.startPoints = Downsampling.getStartPoints(BASE_SAMPLING_LEVEL, samplingLevel); long maxExpectedEntries = expectedKeys / minIndexInterval; if (maxExpectedEntries > Integer.MAX_VALUE) { // that's a _lot_ of keys, and a very low min index interval int effectiveMinInterval = (int) Math.ceil((double) Integer.MAX_VALUE / expectedKeys); maxExpectedEntries = expectedKeys / effectiveMinInterval; assert maxExpectedEntries <= Integer.MAX_VALUE : maxExpectedEntries; logger.warn("min_index_interval of {} is too low for {} expected keys; using interval of {} instead", minIndexInterval, expectedKeys, effectiveMinInterval); this.minIndexInterval = effectiveMinInterval; } else { this.minIndexInterval = minIndexInterval; } // for initializing data structures, adjust our estimates based on the sampling level maxExpectedEntries = Math.max(1, (maxExpectedEntries * samplingLevel) / BASE_SAMPLING_LEVEL); offsets = new SafeMemoryWriter(4 * maxExpectedEntries).withByteOrder(ByteOrder.nativeOrder()); entries = new SafeMemoryWriter(40 * maxExpectedEntries).withByteOrder(ByteOrder.nativeOrder()); // the summary will always contain the first index entry (downsampling will never remove it) nextSamplePosition = 0; indexIntervalMatches++; }
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) { List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); // calculate starting indexes for sampling rounds int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); int[] startPoints = new int[numRounds]; for (int i = 0; i < numRounds; ++i) { int start = allStartPoints.get(initialRound + i); // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point int adjustment = 0; for (int j = 0; j < initialRound; ++j) { if (allStartPoints.get(j) < start) adjustment++; } startPoints[i] = start - adjustment; } return startPoints; } }
this.startPoints = Downsampling.getStartPoints(BASE_SAMPLING_LEVEL, samplingLevel);
/** * Returns the number of primary (on-disk) index entries between the index summary entry at `index` and the next * index summary entry (assuming there is one). Without any downsampling, this will always be equivalent to * the index interval. * * @param index the index of an index summary entry (between zero and the index entry size) * * @return the number of partitions after `index` until the next partition with a summary entry */ public int getEffectiveIndexIntervalAfterIndex(int index) { return Downsampling.getEffectiveIndexIntervalAfterIndex(index, samplingLevel, minIndexInterval); }
/** * Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this * returns the number of partitions in the primary on-disk index before the next partition that has an entry in * the index summary. If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval. * @param index an index into an IndexSummary * @param samplingLevel the current sampling level for that IndexSummary * @param minIndexInterval the min index interval (effective index interval at full sampling) * @return the number of partitions before the next index summary entry, inclusive on one end */ public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval) { assert index >= 0; index %= samplingLevel; List<Integer> originalIndexes = getOriginalIndexes(samplingLevel); int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1); return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval; }
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) { List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); // calculate starting indexes for sampling rounds int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); int[] startPoints = new int[numRounds]; for (int i = 0; i < numRounds; ++i) { int start = allStartPoints.get(initialRound + i); // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point int adjustment = 0; for (int j = 0; j < initialRound; ++j) { if (allStartPoints.get(j) < start) adjustment++; } startPoints[i] = start - adjustment; } return startPoints; } }
this.startPoints = Downsampling.getStartPoints(BASE_SAMPLING_LEVEL, samplingLevel);
/** * Returns the number of primary (on-disk) index entries between the index summary entry at `index` and the next * index summary entry (assuming there is one). Without any downsampling, this will always be equivalent to * the index interval. * * @param index the index of an index summary entry (between zero and the index entry size) * * @return the number of partitions after `index` until the next partition with a summary entry */ public int getEffectiveIndexIntervalAfterIndex(int index) { return Downsampling.getEffectiveIndexIntervalAfterIndex(index, samplingLevel, minIndexInterval); }
/** * Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this * returns the number of partitions in the primary on-disk index before the next partition that has an entry in * the index summary. If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval. * @param index an index into an IndexSummary * @param samplingLevel the current sampling level for that IndexSummary * @param minIndexInterval the min index interval (effective index interval at full sampling) * @return the number of partitions before the next index summary entry, inclusive on one end */ public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval) { assert index >= 0; index %= samplingLevel; List<Integer> originalIndexes = getOriginalIndexes(samplingLevel); int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1); return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval; }
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) { List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); // calculate starting indexes for sampling rounds int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); int[] startPoints = new int[numRounds]; for (int i = 0; i < numRounds; ++i) { int start = allStartPoints.get(initialRound + i); // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point int adjustment = 0; for (int j = 0; j < initialRound; ++j) { if (allStartPoints.get(j) < start) adjustment++; } startPoints[i] = start - adjustment; } return startPoints; } }
this.startPoints = Downsampling.getStartPoints(BASE_SAMPLING_LEVEL, samplingLevel);
/** * Returns the number of primary (on-disk) index entries between the index summary entry at `index` and the next * index summary entry (assuming there is one). Without any downsampling, this will always be equivalent to * the index interval. * * @param index the index of an index summary entry (between zero and the index entry size) * * @return the number of partitions after `index` until the next partition with a summary entry */ public int getEffectiveIndexIntervalAfterIndex(int index) { return Downsampling.getEffectiveIndexIntervalAfterIndex(index, samplingLevel, minIndexInterval); }
/** * Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this * returns the number of partitions in the primary on-disk index before the next partition that has an entry in * the index summary. If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval. * @param index an index into an IndexSummary * @param samplingLevel the current sampling level for that IndexSummary * @param minIndexInterval the min index interval (effective index interval at full sampling) * @return the number of partitions before the next index summary entry, inclusive on one end */ public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval) { assert index >= 0; index %= samplingLevel; List<Integer> originalIndexes = getOriginalIndexes(samplingLevel); int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1); return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval; }
public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel) { List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL); // calculate starting indexes for sampling rounds int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel; int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel); int[] startPoints = new int[numRounds]; for (int i = 0; i < numRounds; ++i) { int start = allStartPoints.get(initialRound + i); // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point int adjustment = 0; for (int j = 0; j < initialRound; ++j) { if (allStartPoints.get(j) < start) adjustment++; } startPoints[i] = start - adjustment; } return startPoints; } }
int[] startPoints = Downsampling.getStartPoints(currentSamplingLevel, newSamplingLevel);
/** * Returns the number of primary (on-disk) index entries between the index summary entry at `index` and the next * index summary entry (assuming there is one). Without any downsampling, this will always be equivalent to * the index interval. * * @param index the index of an index summary entry (between zero and the index entry size) * * @return the number of partitions after `index` until the next partition with a summary entry */ public int getEffectiveIndexIntervalAfterIndex(int index) { return Downsampling.getEffectiveIndexIntervalAfterIndex(index, samplingLevel, minIndexInterval); }