public static double aveSrcTkLen(Translation translation) { double length = 0.0; int n = 0; if (translation.getTargetLabel() instanceof TokenizedLabel) { for (String token : ((TokenizedLabel) translation.getSourceLabel()).getTokens()) { length += token.length(); n++; } } else { for (String token : translation.getSourceLabel().asString().split("\\s+")) { length += token.length(); n++; } } return length / n; } static public final double MOSES_LM_UNKNOWN_WORD_SCORE = -100;
System.out.println(translation.getTargetLabel().asString()); System.err.println("Score: " + translation.getScore()); for (Feature feature : translation.getFeatures()) { System.err.print(feature.name + " = " + feature.score + " ");
public static double countTksInTrg(Translation translation) { if (translation.getTargetLabel() instanceof TokenizedLabel) { return ((TokenizedLabel) translation.getTargetLabel()).getTokens().size(); } else { return translation.getTargetLabel().asString().split("\\s+").length; } }
if(translation.getFeatures().isEmpty()) { Messages.warning("Empty features for translation"); for (Feature f : translation.getFeatures()) { if (!featureNames.contains(f.name)) { Messages.severe("No initial value for feature " + f.name); changed = jMertTranses.add(new JMertTranslation(features2array(featureNames, translation.getFeatures()), evaluator.score(Collections.singletonList(translation)))) || changed;
public static double countTksInSrc(Translation translation) { if (translation.getSourceLabel() instanceof TokenizedLabel) { return ((TokenizedLabel) translation.getSourceLabel()).getTokens().size(); } else { return translation.getSourceLabel().asString().split("\\s+").length; } }
public static double countPunctuationInTarget(Translation translation) { return countPunctuation(translation.getTargetLabel()); } private static final String puncClass = "[!-/:-@\\[-`\\{-~\u2000-\u206f]+";
public static double countPunctuationInSource(Translation translation) { return countPunctuation(translation.getSourceLabel()); }
public double aveTranslationCount(Translation translation, double minProb) { final TranslationSource source = getSource(translation.getSourceLabel().getLanguage(), translation.getTargetLabel().getLanguage()); if (source == null) { return 0.0; } final double p = Math.log(minProb); final List<String> tokens = getTokens(translation.getSourceLabel()); int transCt = 0; for (String token : tokens) { final PhraseTable candidates = source.candidates(new ChunkImpl(token)); for (PhraseTableEntry entry : candidates) { if (entry.getFeatures()[2].score >= p) { transCt++; } } } return (double) transCt / tokens.size(); }
public double targetLMProb(Translation translation) { final LanguageModel model = getModel(translation.getTargetLabel().getLanguage()); if (model == null) { return 0; } return lmProb(model, getTokens(translation.getTargetLabel())); }
public double percentUnigramsInLM(Translation translation) { final LanguageModel nGramSource = getModel(translation.getSourceLabel().getLanguage()); if (nGramSource == null) { return 1.0; } final List<String> tokens = getTokens(translation.getSourceLabel()); int count = 0; for (String s : tokens) { if (Double.isInfinite(nGramSource.score(Arrays.asList(s)))) { count++; } } return (double) (tokens.size() - count) / tokens.size(); }
public static double aveOccurencesInTarget(Translation translation) { final HashMap<String, Integer> occMap = new HashMap<String, Integer>(); final Label label = translation.getTargetLabel(); if (label instanceof TokenizedLabel) { for (String token : ((TokenizedLabel) label).getTokens()) { if (occMap.containsKey(token)) { occMap.put(token, 1 + occMap.get(token)); } else { occMap.put(token, 1); } } } else { for (String token : label.asString().split("\\s+")) { if (occMap.containsKey(token)) { occMap.put(token, 1 + occMap.get(token)); } else { occMap.put(token, 1); } } } double aveCount = 0.0; for (int i : occMap.values()) { aveCount += i; } return aveCount / occMap.size(); }
public double sourceLMProb(Translation translation) { final LanguageModel model = getModel(translation.getSourceLabel().getLanguage()); if (model == null) { return 0; } return lmProb(model, getTokens(translation.getSourceLabel())); }
public double[] percentNGramsInTopBotQuartile(Translation translation, int n) { final LanguageModel nGramSource = getModel(translation.getSourceLabel().getLanguage()); if (nGramSource == null) { return new double[]{0.0, 0.0}; final List<String> tokens = getTokens(translation.getSourceLabel()); int botCount = 0; int topCount = 0;