/** * Gets the "key" (word + context offset) for a given offset * * @param contextOffset_ * @param ngramOrder * @return */ private long getKey(long offset, final int ngramOrder) { return maps[ngramOrder].getKey(offset); }
/** * @param contextOffset_ * @param word * @param ngramOrder * @return */ private long getOffsetHelp(final long contextOffset_, final int word, final int ngramOrder) { final long contextOffset = Math.max(contextOffset_, 0); final long key = combineToKey(word, contextOffset); final HashMap map = maps[ngramOrder]; final long offset = map.getOffset(key); return offset; }
public Iterable<Long> getNgramOffsetsForOrder(final int ngramOrder) { final HashMap map = getMap(ngramOrder); if (map == null) return Collections.emptyList(); else return map.keys(); }
@Override public long put(final int[] ngram, int startPos, int endPos, final T val) { final int ngramOrder = endPos - startPos - 1; HashMap map = maps[ngramOrder]; if (map == null) { initMap(initCapacities[ngramOrder], ngramOrder); map = maps[ngramOrder]; } if (map instanceof ExplicitWordHashMap && map.getLoadFactor() >= maxLoadFactor) { rehash(ngramOrder, map.getCapacity() * 3 / 2); map = maps[ngramOrder]; } final long key = getKey(ngram, startPos, endPos); if (key < 0) return -1L; long oldSize = map.size(); final long index = map.put(key); final long suffixIndex = getSuffixOffset(ngram, startPos, endPos); values.add(ngram, startPos, endPos, ngramOrder, index, contextOffsetOf(key), wordOf(key), val, suffixIndex, map.size() > oldSize); return index; }
@Override public void trim() { for (int ngramOrder = 0; ngramOrder < maps.length; ++ngramOrder) { if (maps[ngramOrder] == null) break; values.trimAfterNgram(ngramOrder, maps[ngramOrder].getCapacity()); Logger.logss("Load factor for " + (ngramOrder + 1) + ": " + maps[ngramOrder].getLoadFactor()); } values.trim(); }
private void rehash(final int changedNgramOrder, final long newCapacity) { final ValueContainer<T> newValues = values.createFreshValues(); final long[] newCapacities = new long[maps.length]; Arrays.fill(newCapacities, -1L); for (int ngramOrder = 0; ngramOrder < maps.length; ++ngramOrder) { if (maps[ngramOrder] == null) break; newCapacities[ngramOrder] = ngramOrder == changedNgramOrder ? newCapacity : maps[ngramOrder].getCapacity(); } final HashNgramMap<T> newMap = new HashNgramMap<T>(newValues, opts, newCapacities, reversed); for (int ngramOrder = 0; ngramOrder < maps.length; ++ngramOrder) { final HashMap currMap = maps[ngramOrder]; if (currMap == null) continue; for (long actualIndex = 0; actualIndex < currMap.getCapacity(); ++actualIndex) { final long key = currMap.getKey(actualIndex); if (currMap.isEmptyKey(key)) continue; final int[] ngram = getNgramFromContextEncoding(AbstractNgramMap.contextOffsetOf(key), ngramOrder - 1, AbstractNgramMap.wordOf(key)); final T val = values.getScratchValue(); values.getFromOffset(actualIndex, ngramOrder, val); newMap.put(ngram, 0, ngram.length, val); } } System.arraycopy(newMap.maps, 0, maps, 0, newMap.maps.length); values.setFromOtherValues(newValues); values.setMap(this); }
/** * @param map * @param ngram * @param startPos * @param endPos * @param key * @param val * @param forcedNew * @param suffixIndex * @return */ private long putHelpWithSuffixIndex(final HashMap map, final int[] ngram, final int startPos, final int endPos, final long key, final T val, final boolean forcedNew, final long suffixIndex) { final int ngramOrder = endPos - startPos - 1; final long oldSize = map.size(); final long index = map.put(key); final boolean addWorked = values.add(ngram, startPos, endPos, ngramOrder, index, contextOffsetOf(key), wordOf(key), val, suffixIndex, map.size() > oldSize || forcedNew); if (!addWorked) return -1; return index; }
@Override public long getNumNgrams(int ngramOrder) { return maps[ngramOrder].size(); }
/** * @param values * @param newCapacities * @param ngramOrder */ private void initMap(long newCapacity, int ngramOrder) { maps[ngramOrder] = new ExplicitWordHashMap(newCapacity); values.setSizeAtLeast(maps[ngramOrder].getCapacity(), ngramOrder); }
@Override public boolean wordHasBigrams(int word) { return maps[1].hasContexts(word); }
@Override public void trim() { for (int ngramOrder = 0; ngramOrder < getMaxNgramOrder(); ++ngramOrder) { final HashMap currMap = getMap(ngramOrder); if (currMap == null) break; values.trimAfterNgram(ngramOrder, currMap.getCapacity()); Logger.logss("Load factor for " + (ngramOrder + 1) + ": " + currMap.getLoadFactor()); } values.trim(); }
@Override public long getNumNgrams(final int ngramOrder) { return getMap(ngramOrder).size(); }
private HashNgramMap(final ValueContainer<T> values, final ConfigOptions opts, final LongArray[] numNgramsForEachWord, final boolean reversed) { super(values, opts); this.reversed = reversed; this.maxLoadFactor = opts.hashTableLoadFactor; maps = new HashMap[numNgramsForEachWord.length]; initCapacities = null; for (int ngramOrder = 0; ngramOrder < numNgramsForEachWord.length; ++ngramOrder) { maps[ngramOrder] = (ngramOrder == 0) ? new UnigramHashMap(numNgramsForEachWord[ngramOrder].size()) : new ImplicitWordHashMap( numNgramsForEachWord[ngramOrder], maxLoadFactor); values.setSizeAtLeast(maps[ngramOrder].getCapacity(), ngramOrder); } values.setMap(this); }
/** * @param ngram * @param startPos * @param endPos * @param val * @return */ private long putHelp(final int[] ngram, final int startPos, final int endPos, final T val, final boolean forcedNew) { final int ngramOrder = endPos - startPos - 1; HashMap map = getHashMapForOrder(ngramOrder); if (!forcedNew && map instanceof ExplicitWordHashMap && map.getLoadFactor() >= maxLoadFactor) { rehash(ngramOrder, map.getCapacity() * 3 / 2, 1); map = getHashMapForOrder(ngramOrder); } final long key = getKey(ngram, startPos, endPos); if (key < 0) return -1L; return putHelp(map, ngram, startPos, endPos, key, val, forcedNew); }
/** * Gets the "key" (word + context offset) for a given offset * * @param contextOffset_ * @param ngramOrder * @return */ private long getKey(final long offset, final int ngramOrder) { return getMap(ngramOrder).getKey(offset); }
@Override public Iterable<Entry<T>> getNgramsForOrder(final int ngramOrder) { return Iterators.able(new Iterators.Transform<Long, Entry<T>>(maps[ngramOrder].keys().iterator()) { @Override protected Entry<T> transform(Long next) { long offset = next; final T val = values.getScratchValue(); values.getFromOffset(offset, ngramOrder, val); return new Entry<T>(getNgramForOffset(offset, ngramOrder), val); } }); }
public long getTotalSize() { long ret = 0L; for (int ngramOrder = 0; ngramOrder < getMaxNgramOrder(); ++ngramOrder) { final HashMap currMap = getMap(ngramOrder); if (currMap == null) break; ret += currMap.size(); } return ret; }
/** * @param ngram * @param startPos * @param endPos * @return */ private long getOffsetFromRawNgram(final int[] ngram, final int startPos, final int endPos) { if (containsOutOfVocab(ngram, startPos, endPos)) return -1; final int ngramOrder = endPos - startPos - 1; if (ngramOrder >= maps.length) return -1; final HashMap currMap = maps[ngramOrder]; final long key = getKey(ngram, startPos, endPos); if (key < 0) return -1; final long index = currMap.getOffset(key); return index; }
public int[] getNgramForOffset(long offset, int ngramOrder) { int[] ret = new int[ngramOrder + 1]; long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { long key = maps[ngramOrder - i].getKey(offset_); offset_ = AbstractNgramMap.contextOffsetOf(key); int word_ = AbstractNgramMap.wordOf(key); ret[reversed ? (i) : (ret.length - i - 1)] = word_; } return ret; }
@Override public Iterable<Entry<T>> getNgramsForOrder(final int ngramOrder) { final HashMap map = getMap(ngramOrder); if (map == null) return Collections.emptyList(); else return Iterators.able(new Iterators.Transform<Long, Entry<T>>(map.keys().iterator()) { @Override protected Entry<T> transform(final Long next) { final long offset = next; final T val = values.getScratchValue(); values.getFromOffset(offset, ngramOrder, val); return new Entry<T>(getNgramForOffset(offset, ngramOrder), val); } }); }