@Override public void parse(final MosesPhraseTableReaderCallback<W> callback) { readFromFiles(callback); }
@Override public PhraseTableValues getScratchValue() { return new FeaturePhraseTableValues(null); }
@Override public PhraseTableValueContainer createFreshValues(long[] numNgramsForEachOrder_) { return new PhraseTableValueContainer(separatorWord, numFeatures); }
public static MosesPhraseTable readFromFile(final String file) { final StringWordIndexer stringWordIndexer = new StringWordIndexer(); final MosesPhraseTableReaderCallback<String> callback = new MosesPhraseTableReaderCallback<String>(stringWordIndexer); new MosesPhraseTableReader<String>(file, stringWordIndexer).parse(callback); return new MosesPhraseTable(callback.getMap(), stringWordIndexer); }
@Override public void add(int[] ngram, int startPos, int endPos, int ngramOrder, long offset, long contextOffset, int word, PhraseTableValues val, long suffixOffset, boolean ngramIsNew) { assert !map.isReversed(); final boolean isSourceSidePhrase = !containsSeparator(ngram, startPos, endPos); if (isSourceSidePhrase) { addNewSrcPhrase(ngramOrder, offset); } else if (val instanceof FeaturePhraseTableValues && ((FeaturePhraseTableValues) val).features != null) { addFeaturesForWholePhrase(ngramOrder, offset, val); addPointerToTargetSidePhrase(ngramOrder, offset, contextOffset, word); } else if (ngramIsNew) { assert val instanceof TargetTranslationsValues || ((FeaturePhraseTableValues) val).features == null; growValueIndexArrayIfNecessary(ngramOrder); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), EMPTY_VALUE_INDEX); } }
public List<TargetSideTranslation> getTranslations(final int[] src, final int startPos, final int endPos) { final long offsetForNgram = map.getOffsetForNgramInModel(src, startPos, endPos); if (offsetForNgram < 0) return Collections.emptyList(); final TargetTranslationsValues scratch = new PhraseTableValueContainer.TargetTranslationsValues(); map.getValues().getFromOffset(offsetForNgram, endPos - startPos - 1, scratch); final List<TargetSideTranslation> ret = new ArrayList<TargetSideTranslation>(); for (int i = 0; i < scratch.targetTranslationOffsets.length; ++i) { final FeaturePhraseTableValues features = new PhraseTableValueContainer.FeaturePhraseTableValues(null); final long currOffset = scratch.targetTranslationOffsets[i]; final int currOrder = scratch.targetTranslationOrders[i]; map.getValues().getFromOffset(currOffset, currOrder, features); if (features.features == null) { Logger.warn("Should probably fix"); continue; } final TargetSideTranslation tr = new TargetSideTranslation(); tr.features = Arrays.copyOf(features.features, features.features.length); int sepIndex = 0; final int[] srcAndTrg = map.getNgramForOffset(currOffset, currOrder); for (; sepIndex < srcAndTrg.length; ++sepIndex) { if (srcAndTrg[sepIndex] == ((PhraseTableValueContainer) map.getValues()).getSeparatorWord()) { break; } } tr.trgWords = Arrays.copyOfRange(srcAndTrg, sepIndex + 1, srcAndTrg.length); assert tr.trgWords.length > 0; ret.add(tr); } return ret; }
@Override public void call(int[] ngram, int startPos, int endPos, PhraseTableCounts value, String words) { for (int ngramOrder = 0; ngramOrder < endPos - startPos; ++ngramOrder) phrases.put(ngram, startPos, startPos + ngramOrder + 1, new PhraseTableValueContainer.TargetTranslationsValues()); phrases.put(ngram, startPos, endPos, new PhraseTableValueContainer.FeaturePhraseTableValues(value.features)); }
private void readFromFiles(final LmReaderCallback<PhraseTableCounts> callback) { Logger.startTrack("Reading from file " + file); try { final Iterable<String> allLinesIterator = Iterators.able(IOUtils.lineIterator(file)); countPhrases(allLinesIterator, callback); } catch (final IOException e) { throw new RuntimeException(e); } Logger.endTrack(); }
public static void main(final String[] argv) { new MosesPhraseTableTest().testPhraseTable(); } }
/** * @param ngramOrder * @param offset * @param contextOffset * @param word */ private void addPointerToTargetSidePhrase(int ngramOrder, long offset, long contextOffset, int word) { int currWord = word; long srcPhraseOffset = contextOffset; int srcPhraseOrder = ngramOrder - 1; while (currWord != separatorWord) { currWord = map.getNextWord(srcPhraseOffset, srcPhraseOrder); srcPhraseOffset = map.getNextContextOffset(srcPhraseOffset, srcPhraseOrder); srcPhraseOrder--; } final long valueIndex = -valueIndexes[srcPhraseOrder].get(srcPhraseOffset) - 1; final ArrayList<LongArray> targetTranslationPointersHere = targetTranslations[srcPhraseOrder]; targetTranslationPointersHere.get((int) valueIndex).add(combineOrderAndOffset(ngramOrder, offset)); }
public static MosesPhraseTable readFromFile(String file) { final StringWordIndexer stringWordIndexer = new StringWordIndexer(); final MosesPhraseTableReaderCallback<String> callback = new MosesPhraseTableReaderCallback<String>(stringWordIndexer); new MosesPhraseTableReader<String>(file, stringWordIndexer).parse(callback); return new MosesPhraseTable(callback.getMap(), stringWordIndexer); }
@Override public boolean add(final int[] ngram, final int startPos, final int endPos, final int ngramOrder, final long offset, final long contextOffset, final int word, final PhraseTableValues val, final long suffixOffset, final boolean ngramIsNew) { assert !map.isReversed(); final boolean isSourceSidePhrase = !containsSeparator(ngram, startPos, endPos); if (isSourceSidePhrase) { addNewSrcPhrase(ngramOrder, offset); } else if (val instanceof FeaturePhraseTableValues && ((FeaturePhraseTableValues) val).features != null) { addFeaturesForWholePhrase(ngramOrder, offset, val); addPointerToTargetSidePhrase(ngramOrder, offset, contextOffset, word); } else if (ngramIsNew) { assert val instanceof TargetTranslationsValues || ((FeaturePhraseTableValues) val).features == null; growValueIndexArrayIfNecessary(ngramOrder); valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), EMPTY_VALUE_INDEX); } return true; }
public List<TargetSideTranslation> getTranslations(int[] src, int startPos, int endPos) { long offsetForNgram = map.getOffsetForNgramInModel(src, startPos, endPos); if (offsetForNgram < 0) return Collections.emptyList(); TargetTranslationsValues scratch = new PhraseTableValueContainer.TargetTranslationsValues(); map.getValues().getFromOffset(offsetForNgram, endPos - startPos - 1, scratch); List<TargetSideTranslation> ret = new ArrayList<TargetSideTranslation>(); for (int i = 0; i < scratch.targetTranslationOffsets.length; ++i) { FeaturePhraseTableValues features = new PhraseTableValueContainer.FeaturePhraseTableValues(null); final long currOffset = scratch.targetTranslationOffsets[i]; final int currOrder = scratch.targetTranslationOrders[i]; map.getValues().getFromOffset(currOffset, currOrder, features); if (features.features == null) { Logger.warn("Should probably fix"); continue; } TargetSideTranslation tr = new TargetSideTranslation(); tr.features = Arrays.copyOf(features.features, features.features.length); int sepIndex = 0; int[] srcAndTrg = map.getNgramForOffset(currOffset, currOrder); for (; sepIndex < srcAndTrg.length; ++sepIndex) { if (srcAndTrg[sepIndex] == ((PhraseTableValueContainer) map.getValues()).getSeparatorWord()) { break; } } tr.trgWords = Arrays.copyOfRange(srcAndTrg, sepIndex + 1, srcAndTrg.length); assert tr.trgWords.length > 0; ret.add(tr); } return ret; }
@Override public void call(final int[] ngram, final int startPos, final int endPos, final PhraseTableCounts value, final String words) { for (int ngramOrder = 0; ngramOrder < endPos - startPos; ++ngramOrder) phrases.put(ngram, startPos, startPos + ngramOrder + 1, new PhraseTableValueContainer.TargetTranslationsValues()); phrases.put(ngram, startPos, endPos, new PhraseTableValueContainer.FeaturePhraseTableValues(value.features)); }
@Override public PhraseTableValueContainer createFreshValues() { return new PhraseTableValueContainer(separatorWord, numFeatures); }
@Override public void parse(final MosesPhraseTableReaderCallback<W> callback) { readFromFiles(callback); }
@Override public PhraseTableValues getScratchValue() { return new FeaturePhraseTableValues(null); }
private void readFromFiles(final LmReaderCallback<PhraseTableCounts> callback) { Logger.startTrack("Reading from file " + file); try { final Iterable<String> allLinesIterator = Iterators.able(IOUtils.lineIterator(file)); countPhrases(allLinesIterator, callback); } catch (final IOException e) { throw new RuntimeException(e); } Logger.endTrack(); }
public MosesPhraseTableReaderCallback(final WordIndexer<W> wordIndexer) { final PhraseTableValueContainer values = new PhraseTableValueContainer(wordIndexer.getOrAddIndexFromString(MosesPhraseTableReader.SEP_WORD), 5); phrases = HashNgramMap.createExplicitWordHashNgramMap(values, new ConfigOptions(), 20, false); }
public MosesPhraseTableReaderCallback(WordIndexer<W> wordIndexer) { final PhraseTableValueContainer values = new PhraseTableValueContainer(wordIndexer.getOrAddIndexFromString(MosesPhraseTableReader.SEP_WORD), 5); phrases = HashNgramMap.createExplicitWordHashNgramMap(values, new ConfigOptions(), 20, false); }