public String[] fieldNamesForWord (String word) { HashSet allFields = new HashSet(); assert input.size() == output.size(); for (int t = 0; t < input.size(); t++) { if (input.getToken(t).getText().equals(word)) { allFields.add(output.get(t).toString()); } } return (String[]) allFields.toArray(new String[allFields.size()]); }
public FeatureVectorSequence (Alphabet dict, TokenSequence tokens, boolean binary, boolean augmentable, boolean growAlphabet) { this.sequence = new FeatureVector[tokens.size()]; if (augmentable) for (int i = 0; i < tokens.size(); i++) sequence[i] = new AugmentableFeatureVector (dict, tokens.getToken(i).getFeatures(), binary, growAlphabet); else for (int i = 0; i < tokens.size(); i++) sequence[i] = new FeatureVector (dict, tokens.getToken(i).getFeatures(), binary, growAlphabet); }
public void addAll(TokenSequence ts) { for (int i = 0; i < ts.size(); i++) add( ts.getToken( i ) ); }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); String s = t.getText(); if (s.length() > prefixLength) t.setFeatureValue ((prefix + s.substring (0, prefixLength)).intern(), 1.0); } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); String s = t.getText(); int slen = s.length(); if (slen > suffixLength) t.setFeatureValue ((prefix + s.substring (slen - suffixLength, slen)).intern(), 1.0); } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); if (regex.matcher (t.getText()).matches ()) t.setFeatureValue (feature, 1.0); } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); String s = t.getText(); if (distinguishBorders) s = startBorderChar + s + endBorderChar; int slen = s.length(); for (int j = 0; j < gramSizes.length; j++) { int size = gramSizes[j]; for (int k = 0; k < slen - size; k++) t.setFeatureValue (s.substring (k, k+size), 1.0);//original was substring(k, size), changed by Fuchun } } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); t.setText(t.getText().toLowerCase()); } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); t.setFeatureValue (prefix == null ? t.getText().intern() : (prefix+t.getText()).intern(), 1.0); } return carrier; }
public void addFeatures(TokenSequence ts) { int i = 0; while (i < ts.size()) { int j = endOfWord(ts, i); if (j == -1) { i++; } else { for (; i <= j; i++) { Token t = ts.getToken(i); t.setFeatureValue(name, 1.0); } } } }
public Instance pipe (Instance carrier) { //Object in = carrier.getData(); Object target = carrier.getTarget(); if (target instanceof FeatureSequence) ; // Nothing to do else if (target instanceof TokenSequence) { TokenSequence ts = (TokenSequence) target; FeatureSequence fs = new FeatureSequence (getTargetAlphabet(), ts.size()); for (int i = 0; i < ts.size(); i++) fs.add (ts.getToken(i).getText()); carrier.setTarget(fs); } else { throw new IllegalArgumentException ("Unrecognized target type."); } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); String s = t.getText(); if (lexicon.contains (ignoreCase ? s.toLowerCase() : s)) t.setFeatureValue (name, 1.0); } return carrier; }
static void lattice2html (PrintWriter out, ExtorInfo info) { assert (info.target.size() == info.predicted.size()); assert (info.input.size() == info.predicted.size()); int N = info.target.size(); for (int start = 0; start < N; start += LENGTH - 1) { int end = Math.min (N, start + LENGTH); if (!allSeqMatches (info.predicted, info.target, start, end)) { error2html (out, info, start, end); } } }
public static void dualLattice2html (PrintWriter out, String desc, ExtorInfo info1, ExtorInfo info2) { assert (info1.predicted.size() == info1.target.size()); assert (info1.input.size() == info1.predicted.size()); assert (info2.input.size() == info2.predicted.size()); assert (info2.predicted.size() == info2.target.size()); int N = info1.target.size(); for (int start = 0; start < N; start += LENGTH - 1) { int end = Math.min (info1.predicted.size(), start + LENGTH); if (!allSeqMatches (info1.predicted, info2.predicted, start, end)) { error2html (out, info1, start, end); error2html (out, info2, start, end); } } }
public String viterbiCRFInstance(Instance instance, boolean sgml ) { assert(crf != null); viterbiP = crf.viterbiPath((Sequence)instance.getData());// regular viterbi viterbiSequence = viterbiP.output(); // confidence = Math.exp(viterbiP.getCost()/viterbiSequence.size()); // confidence = viterbiP.getCost()/viterbiSequence.size(); instance_accuracy= InstanceAccuracy(viterbiSequence, (Sequence)instance.getTarget(), instance); tokenSequence = (TokenSequence)instance.getSource(); assert(viterbiSequence.size() == tokenSequence.size()); return printResultInFormat(sgml); }
public Instance pipe (Instance carrier) { if (carrier.getData() instanceof CharSequence) carrier.setData(new TokenSequence (ngramify ((CharSequence)carrier.getData()))); else if (carrier.getData() instanceof TokenSequence) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence ret = new TokenSequence (); for (int i = 0; i < ts.size(); i++) ret.add (ngramify (ts.getToken(i).getText())); carrier.setData(ret); } else throw new IllegalArgumentException ("Unhandled type "+carrier.getData().getClass()); return carrier; }
public String viterbiCRFInstance(Instance instance, boolean sgml ) { assert(crf != null); viterbiP = crf.viterbiPath((Sequence)instance.getData());// regular viterbi viterbiSequence = viterbiP.output(); // confidence = Math.exp(viterbiP.getCost()/viterbiSequence.size()); // confidence = viterbiP.getCost()/viterbiSequence.size(); // viterbiSequence = crf.viterbiPath((Sequence)instance.getData()).output(); instance_accuracy= InstanceAccuracy(viterbiSequence, (Sequence)instance.getTarget()); tokenSequence = (TokenSequence)instance.getSource(); assert(viterbiSequence.size() == tokenSequence.size()); return printResultInFormat(sgml); }
public String viterbiCRFString(String line, boolean sgml) { Instance lineCarrier = new Instance(line, null, null, null, pipe); assert(pipe != null); Instance featureCarrier = pipe.pipe(lineCarrier, 0); assert(crf != null); viterbiP = crf.viterbiPath((Sequence)featureCarrier.getData()); viterbiSequence = viterbiP.output(); //confidence = Math.exp(-viterbiP.getCost()/viterbiSequence.size()); confidence = viterbiP.getCost(); tokenSequence = (TokenSequence)featureCarrier.getSource(); assert(viterbiSequence.size() == tokenSequence.size()); return printResultInFormat(sgml); }
public String viterbiCRFString(String line, boolean sgml) { Instance lineCarrier = new Instance(line, null, null, null, pipe); assert(pipe != null); Instance featureCarrier = pipe.pipe(lineCarrier, 0); assert(crf != null); viterbiP = crf.viterbiPath((Sequence)featureCarrier.getData()); viterbiSequence = viterbiP.output(); //confidence = Math.exp(-viterbiP.getCost()/viterbiSequence.size()); confidence = viterbiP.getCost(); tokenSequence = (TokenSequence)featureCarrier.getSource(); assert(viterbiSequence.size() == tokenSequence.size()); return printResultInFormat(sgml); }