@Override public PhraseTableValueContainer createFreshValues(long[] numNgramsForEachOrder_) { return new PhraseTableValueContainer(separatorWord, numFeatures); }
@Override public void add(int[] ngram, int startPos, int endPos, int ngramOrder, long offset, long contextOffset, int word, PhraseTableValues val, long suffixOffset, boolean ngramIsNew) { assert !map.isReversed(); final boolean isSourceSidePhrase = !containsSeparator(ngram, startPos, endPos); if (isSourceSidePhrase) { addNewSrcPhrase(ngramOrder, offset); } else if (val instanceof FeaturePhraseTableValues && ((FeaturePhraseTableValues) val).features != null) { addFeaturesForWholePhrase(ngramOrder, offset, val); addPointerToTargetSidePhrase(ngramOrder, offset, contextOffset, word); } else if (ngramIsNew) { assert val instanceof TargetTranslationsValues || ((FeaturePhraseTableValues) val).features == null; growValueIndexArrayIfNecessary(ngramOrder); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), EMPTY_VALUE_INDEX); } }
@Override public void getFromOffset(final long offset, final int ngramOrder, @OutputParameter final PhraseTableValues outputVal) { if (offset >= valueIndexes[ngramOrder].size()) return; final long valueIndex = valueIndexes[ngramOrder].get(offset); if (valueIndex == EMPTY_VALUE_INDEX) return; if (outputVal instanceof FeaturePhraseTableValues && valueIndex >= 0) { final float[] fs = new float[numFeatures]; for (int i = 0; i < numFeatures; ++i) fs[i] = Float.intBitsToFloat((int) features[ngramOrder].get((int) (valueIndex + i))); ((FeaturePhraseTableValues) outputVal).features = fs; } if (outputVal instanceof TargetTranslationsValues && valueIndex < 0) { ((TargetTranslationsValues) outputVal).targetTranslationOffsets = readOffsets(targetTranslations[ngramOrder].get((int) (-valueIndex - 1))); ((TargetTranslationsValues) outputVal).targetTranslationOrders = readOrders(targetTranslations[ngramOrder].get((int) (-valueIndex - 1))); } }
/** * @param ngramOrder * @param offset */ private void addNewSrcPhrase(int ngramOrder, long offset) { growValueIndexArrayIfNecessary(ngramOrder); if (ngramOrder >= targetTranslations.length) { targetTranslations = Arrays.copyOf(targetTranslations, targetTranslations.length * 3 / 2); } if (targetTranslations[ngramOrder] == null) { targetTranslations[ngramOrder] = new ArrayList<LongArray>(); } final ArrayList<LongArray> targetTranslationPointersHere = targetTranslations[ngramOrder]; long currVal = offset >= valueIndexes[ngramOrder].size() ? 0 : valueIndexes[ngramOrder].get((int) (offset)); if (currVal == 0) valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), (-targetTranslations[ngramOrder].size() - 1)); targetTranslationPointersHere.add(new CustomWidthArray(3, Integer.SIZE + Byte.SIZE)); }
/** * @param ngramOrder * @param offset * @param contextOffset * @param word */ private void addPointerToTargetSidePhrase(int ngramOrder, long offset, long contextOffset, int word) { int currWord = word; long srcPhraseOffset = contextOffset; int srcPhraseOrder = ngramOrder - 1; while (currWord != separatorWord) { currWord = map.getNextWord(srcPhraseOffset, srcPhraseOrder); srcPhraseOffset = map.getNextContextOffset(srcPhraseOffset, srcPhraseOrder); srcPhraseOrder--; } final long valueIndex = -valueIndexes[srcPhraseOrder].get(srcPhraseOffset) - 1; final ArrayList<LongArray> targetTranslationPointersHere = targetTranslations[srcPhraseOrder]; targetTranslationPointersHere.get((int) valueIndex).add(combineOrderAndOffset(ngramOrder, offset)); }
public List<TargetSideTranslation> getTranslations(final int[] src, final int startPos, final int endPos) { final long offsetForNgram = map.getOffsetForNgramInModel(src, startPos, endPos); if (offsetForNgram < 0) return Collections.emptyList(); final TargetTranslationsValues scratch = new PhraseTableValueContainer.TargetTranslationsValues(); map.getValues().getFromOffset(offsetForNgram, endPos - startPos - 1, scratch); final List<TargetSideTranslation> ret = new ArrayList<TargetSideTranslation>(); for (int i = 0; i < scratch.targetTranslationOffsets.length; ++i) { final FeaturePhraseTableValues features = new PhraseTableValueContainer.FeaturePhraseTableValues(null); final long currOffset = scratch.targetTranslationOffsets[i]; final int currOrder = scratch.targetTranslationOrders[i]; map.getValues().getFromOffset(currOffset, currOrder, features); if (features.features == null) { Logger.warn("Should probably fix"); continue; } final TargetSideTranslation tr = new TargetSideTranslation(); tr.features = Arrays.copyOf(features.features, features.features.length); int sepIndex = 0; final int[] srcAndTrg = map.getNgramForOffset(currOffset, currOrder); for (; sepIndex < srcAndTrg.length; ++sepIndex) { if (srcAndTrg[sepIndex] == ((PhraseTableValueContainer) map.getValues()).getSeparatorWord()) { break; } } tr.trgWords = Arrays.copyOfRange(srcAndTrg, sepIndex + 1, srcAndTrg.length); assert tr.trgWords.length > 0; ret.add(tr); } return ret; }
@Override public void getFromOffset(final long offset, final int ngramOrder, @OutputParameter final PhraseTableValues outputVal) { if (offset >= valueIndexes[ngramOrder].size()) return; long valueIndex = valueIndexes[ngramOrder].get(offset); if (valueIndex == EMPTY_VALUE_INDEX) return; if (outputVal instanceof FeaturePhraseTableValues && valueIndex >= 0) { final float[] fs = new float[numFeatures]; for (int i = 0; i < numFeatures; ++i) fs[i] = Float.intBitsToFloat((int) features[ngramOrder].get((int) (valueIndex + i))); ((FeaturePhraseTableValues) outputVal).features = fs; } if (outputVal instanceof TargetTranslationsValues && valueIndex < 0) { ((TargetTranslationsValues) outputVal).targetTranslationOffsets = readOffsets(targetTranslations[ngramOrder].get((int) (-valueIndex - 1))); ((TargetTranslationsValues) outputVal).targetTranslationOrders = readOrders(targetTranslations[ngramOrder].get((int) (-valueIndex - 1))); } }
/** * @param ngramOrder * @param offset */ private void addNewSrcPhrase(final int ngramOrder, final long offset) { growValueIndexArrayIfNecessary(ngramOrder); if (ngramOrder >= targetTranslations.length) { targetTranslations = Arrays.copyOf(targetTranslations, targetTranslations.length * 3 / 2); } if (targetTranslations[ngramOrder] == null) { targetTranslations[ngramOrder] = new ArrayList<CustomWidthArray>(); } final ArrayList<CustomWidthArray> targetTranslationPointersHere = targetTranslations[ngramOrder]; final long currVal = offset >= valueIndexes[ngramOrder].size() ? 0 : valueIndexes[ngramOrder].get((int) (offset)); if (currVal == 0) valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), (-targetTranslations[ngramOrder].size() - 1)); targetTranslationPointersHere.add(new CustomWidthArray(3, Integer.SIZE + Byte.SIZE)); }
/** * @param ngramOrder * @param offset * @param contextOffset * @param word */ private void addPointerToTargetSidePhrase(final int ngramOrder, final long offset, final long contextOffset, final int word) { int currWord = word; long srcPhraseOffset = contextOffset; int srcPhraseOrder = ngramOrder - 1; while (currWord != separatorWord) { currWord = map.getNextWord(srcPhraseOffset, srcPhraseOrder); srcPhraseOffset = map.getNextContextOffset(srcPhraseOffset, srcPhraseOrder); srcPhraseOrder--; } final long valueIndex = -valueIndexes[srcPhraseOrder].get(srcPhraseOffset) - 1; final ArrayList<CustomWidthArray> targetTranslationPointersHere = targetTranslations[srcPhraseOrder]; targetTranslationPointersHere.get((int) valueIndex).add(combineOrderAndOffset(ngramOrder, offset)); }
public List<TargetSideTranslation> getTranslations(int[] src, int startPos, int endPos) { long offsetForNgram = map.getOffsetForNgramInModel(src, startPos, endPos); if (offsetForNgram < 0) return Collections.emptyList(); TargetTranslationsValues scratch = new PhraseTableValueContainer.TargetTranslationsValues(); map.getValues().getFromOffset(offsetForNgram, endPos - startPos - 1, scratch); List<TargetSideTranslation> ret = new ArrayList<TargetSideTranslation>(); for (int i = 0; i < scratch.targetTranslationOffsets.length; ++i) { FeaturePhraseTableValues features = new PhraseTableValueContainer.FeaturePhraseTableValues(null); final long currOffset = scratch.targetTranslationOffsets[i]; final int currOrder = scratch.targetTranslationOrders[i]; map.getValues().getFromOffset(currOffset, currOrder, features); if (features.features == null) { Logger.warn("Should probably fix"); continue; } TargetSideTranslation tr = new TargetSideTranslation(); tr.features = Arrays.copyOf(features.features, features.features.length); int sepIndex = 0; int[] srcAndTrg = map.getNgramForOffset(currOffset, currOrder); for (; sepIndex < srcAndTrg.length; ++sepIndex) { if (srcAndTrg[sepIndex] == ((PhraseTableValueContainer) map.getValues()).getSeparatorWord()) { break; } } tr.trgWords = Arrays.copyOfRange(srcAndTrg, sepIndex + 1, srcAndTrg.length); assert tr.trgWords.length > 0; ret.add(tr); } return ret; }
@Override public boolean add(final int[] ngram, final int startPos, final int endPos, final int ngramOrder, final long offset, final long contextOffset, final int word, final PhraseTableValues val, final long suffixOffset, final boolean ngramIsNew) { assert !map.isReversed(); final boolean isSourceSidePhrase = !containsSeparator(ngram, startPos, endPos); if (isSourceSidePhrase) { addNewSrcPhrase(ngramOrder, offset); } else if (val instanceof FeaturePhraseTableValues && ((FeaturePhraseTableValues) val).features != null) { addFeaturesForWholePhrase(ngramOrder, offset, val); addPointerToTargetSidePhrase(ngramOrder, offset, contextOffset, word); } else if (ngramIsNew) { assert val instanceof TargetTranslationsValues || ((FeaturePhraseTableValues) val).features == null; growValueIndexArrayIfNecessary(ngramOrder); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), EMPTY_VALUE_INDEX); } return true; }
@Override public PhraseTableValueContainer createFreshValues() { return new PhraseTableValueContainer(separatorWord, numFeatures); }
/** * @param ngramOrder * @param offset * @param val */ private void addFeaturesForWholePhrase(int ngramOrder, long offset, PhraseTableValues val) { growValueIndexArrayIfNecessary(ngramOrder); if (ngramOrder >= features.length) { features = Arrays.copyOf(features, Math.max(ngramOrder + 1, features.length * 3 / 2)); } if (features[ngramOrder] == null) features[ngramOrder] = LongArray.StaticMethods.newLongArray(Integer.MAX_VALUE, Integer.MAX_VALUE); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), features[ngramOrder].size()); for (int f = 0; f < numFeatures; ++f) features[ngramOrder].add(Float.floatToIntBits(((FeaturePhraseTableValues) val).features[f])); }
public MosesPhraseTableReaderCallback(final WordIndexer<W> wordIndexer) { final PhraseTableValueContainer values = new PhraseTableValueContainer(wordIndexer.getOrAddIndexFromString(MosesPhraseTableReader.SEP_WORD), 5); phrases = HashNgramMap.createExplicitWordHashNgramMap(values, new ConfigOptions(), 20, false); }
/** * @param ngramOrder * @param offset * @param val */ private void addFeaturesForWholePhrase(final int ngramOrder, final long offset, final PhraseTableValues val) { growValueIndexArrayIfNecessary(ngramOrder); if (ngramOrder >= features.length) { features = Arrays.copyOf(features, Math.max(ngramOrder + 1, features.length * 3 / 2)); } if (features[ngramOrder] == null) features[ngramOrder] = LongArray.StaticMethods.newLongArray(Integer.MAX_VALUE, Integer.MAX_VALUE); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), features[ngramOrder].size()); for (int f = 0; f < numFeatures; ++f) features[ngramOrder].add(Float.floatToIntBits(((FeaturePhraseTableValues) val).features[f])); }
public MosesPhraseTableReaderCallback(WordIndexer<W> wordIndexer) { final PhraseTableValueContainer values = new PhraseTableValueContainer(wordIndexer.getOrAddIndexFromString(MosesPhraseTableReader.SEP_WORD), 5); phrases = HashNgramMap.createExplicitWordHashNgramMap(values, new ConfigOptions(), 20, false); }