@Override public int getLongestForeignPhrase() { if(longestForeignPhrase < 0) { for(PhraseTableEntry pte : this) { final int l = pte.getForeign().asString().split("\\s+").length; if(l > longestForeignPhrase) { longestForeignPhrase = l; } } } return longestForeignPhrase; }
@Override public int getLongestForeignPhrase() { if(longestForeignPhrase < 0) { for(PhraseTableEntry pte : this) { final int l = pte.getForeign().asString().split("\\s+").length; if(l > longestForeignPhrase) { longestForeignPhrase = l; } } } return longestForeignPhrase; }
private List<String> getTokens(Label label) { if (label instanceof TokenizedLabel) { return ((TokenizedLabel) label).getTokens(); } else { return Arrays.asList(label.asString().split("\\s+")); } }
private static List<String> getTokens(Label label) { if (label instanceof TokenizedLabel) { return ((TokenizedLabel) label).getTokens(); } else { return Arrays.asList(label.asString().split("\\s+")); } }
@Override public int getLongestForeignPhrase() { if(longestForeignPhrase < 0) { for(PhraseTableEntry pte : this) { final int l = pte.getForeign().asString().split("\\s+").length; if(l > longestForeignPhrase) { longestForeignPhrase = l; } } } return longestForeignPhrase; }
@Override public int getLongestForeignPhrase() { if(longestForeignPhrase < 0) { for(PhraseTableEntry pte : this) { final int l = pte.getForeign().asString().split("\\s+").length; if(l > longestForeignPhrase) { longestForeignPhrase = l; } } } return longestForeignPhrase; }
@Override public int getLongestForeignPhrase() { if(longestForeignPhrase < 0) { for(PhraseTableEntry pte : this) { final int l = pte.getForeign().asString().split("\\s+").length; if(l > longestForeignPhrase) { longestForeignPhrase = l; } } } return longestForeignPhrase; }
private static double countPunctuation(Label label) { int count = 0; if (label instanceof TokenizedLabel) { for (String token : ((TokenizedLabel) label).getTokens()) { if (token.matches(puncClass)) { count++; } } } else { for (String token : label.asString().split("\\b")) { if (token.trim().matches(puncClass)) { count++; } } } return count; }
@Override public double score(PhraseTableEntry entry, Entity entity) { if (!srcLabels.containsKey(entity)) { return 0; } assert (trgLabels.containsKey(entity)); final String srcLabel = entry.getForeign().asString(); final String trgLabel = entry.getTranslation().asString(); SRC_CHECK: { for (String srcCandidate : srcLabels.get(entity)) { if (srcLabel.contains(srcCandidate)) { break SRC_CHECK; } } return 0; } for (String trgCandidate : trgLabels.get(entity)) { if (trgLabel.contains(trgCandidate)) { return 1; } } return 0; }
public void setPhraseTable(eu.monnetproject.translation.PhraseTable pt, List<String> featureNames) { ptMap = new HashMap<String, List<PhraseTableEntry>>(); for(PhraseTableEntry pte : pt) { final String key = pte.getForeign().asString(); if(!ptMap.containsKey(key)) { ptMap.put(key, new ArrayList<PhraseTableEntry>()); } ptMap.get(key).add(pte); } ptName = pt.getName(); ptLongestPhrase = pt.getLongestForeignPhrase(); scoreNames = featureNames; //log.info("New lfp " + ptLongestPhrase); }
public static double aveOccurencesInTarget(Translation translation) { final HashMap<String, Integer> occMap = new HashMap<String, Integer>(); final Label label = translation.getTargetLabel(); if (label instanceof TokenizedLabel) { for (String token : ((TokenizedLabel) label).getTokens()) { if (occMap.containsKey(token)) { occMap.put(token, 1 + occMap.get(token)); } else { occMap.put(token, 1); } } } else { for (String token : label.asString().split("\\s+")) { if (occMap.containsKey(token)) { occMap.put(token, 1 + occMap.get(token)); } else { occMap.put(token, 1); } } } double aveCount = 0.0; for (int i : occMap.values()) { aveCount += i; } return aveCount / occMap.size(); }
public static double countTksInSrc(Translation translation) { if (translation.getSourceLabel() instanceof TokenizedLabel) { return ((TokenizedLabel) translation.getSourceLabel()).getTokens().size(); } else { return translation.getSourceLabel().asString().split("\\s+").length; } }
public static double countTksInTrg(Translation translation) { if (translation.getTargetLabel() instanceof TokenizedLabel) { return ((TokenizedLabel) translation.getTargetLabel()).getTokens().size(); } else { return translation.getTargetLabel().asString().split("\\s+").length; } }
public static double aveSrcTkLen(Translation translation) { double length = 0.0; int n = 0; if (translation.getTargetLabel() instanceof TokenizedLabel) { for (String token : ((TokenizedLabel) translation.getSourceLabel()).getTokens()) { length += token.length(); n++; } } else { for (String token : translation.getSourceLabel().asString().split("\\s+")) { length += token.length(); n++; } } return length / n; } static public final double MOSES_LM_UNKNOWN_WORD_SCORE = -100;
@Override public double score(PhraseTableEntry entry, Entity entity) { String candidate = entry.getTranslation().asString(); String srcString = null; if(ontoDoc!=null) srcString = ontoDoc; if(nearByTermsAsContext) srcString = entity.toString() ; if(onlyChunkAsContext) srcString = entry.getForeign().asString(); if(sourceLabelAsContext) srcString = entity.toString(); if(srcString.equalsIgnoreCase(ontoDoc)) return clesa.scoreAgainstVector(new Pair<String, Language>(candidate, trgLang), ontoVector); else return clesa.score(new Pair<String, Language>(candidate, trgLang), new Pair<String, Language>(srcString, srcLang)); }
public static void translationSuccess(URI entity, Label targetLabel) { final Message msg = new Message(MessageType.SUCCESS, entity, targetLabel.asString(), currentJob()); fire(msg); }
private PhraseTableEntry concatPhraseTableEntries(PhraseTableEntry e1, PhraseTableEntry e2, String srcLabel) { // sum values of features double[] newScores = new double[e1.getFeatures().length]; Feature[] f1 = e1.getFeatures(); Feature[] f2 = e2.getFeatures(); for (int i = 0; i < e1.getFeatures().length; i++) { newScores[i] = f1[i].score + f2[i].score; } Feature[] newFeatures = toFeatures(newScores); // combine foreign and translation labels //Label newForeign = new StringLabel(e1.getForeign().asString() + "" + e2.getForeign().asString(), srcLang); Label newTranslation = new StringLabel(e1.getTranslation().asString() + " " + e2.getTranslation().asString(), trgLang); return new PhraseTableEntryImpl(new StringLabel(srcLabel, srcLang), newTranslation, newFeatures, null); }
final Phrase trg;// = convertPhrase(FairlyGoodTokenizer.split(pte.getTranslation().asString()), trgDict); src = convertPhrase(FairlyGoodTokenizer.split(pte.getForeign().asString()), srcWordMap); if (maxSize > 0) { if (!approxScores.containsKey(src)) { trg = convertPhrase(FairlyGoodTokenizer.split(pte.getTranslation().asString()), trgDict); final double[] wts = convertWeights(pte.getFeatures(), featureNames); final PhraseTranslation translation = new PhraseTranslation(trg.p, wts);