public FeatureSequence (Alphabet dict, int[] features, int len) { this(dict, len); for (int i = 0; i < len; i++) add(features[i]); }
public void testNewPutSizeFreeze () { Alphabet dict = new Alphabet (); FeatureSequence fs = new FeatureSequence (dict, 10); fs.add (dict.lookupIndex ("apple")); fs.add (dict.lookupIndex ("bear")); fs.add (dict.lookupIndex ("car")); fs.add (dict.lookupIndex ("door")); assertTrue (fs.size() == 4); double[] weights = new double[4]; fs.addFeatureWeightsTo (weights); assertTrue (weights[1] == 1.0); fs.add (dict.lookupIndex ("bear")); int[] feats = fs.toFeatureIndexSequence(); assertTrue (feats[0] == 0); assertTrue (feats[1] == 1); assertTrue (feats[2] == 2); assertTrue (feats[3] == 3); assertTrue (feats[4] == 1); }
/** * Creates a FeatureSequence given all of the objects in the * sequence. * * @param dict A dictionary that maps objects in the sequence * to numeric indices. * @param features An array where features[i] gives the index * in dict of the ith element of the sequence. */ public FeatureSequence (Alphabet dict, int[] features) { this(dict, features.length); for (int i = 0; i < features.length; i++) add(features[i]); }
public void add (Object key) { int fi = dictionary.lookupIndex (key); if (fi >= 0) // This will happen if the dictionary is frozen, // and key is not already in the dictionary. add (fi); // xxx Should we raise an exception if the appending doesn't happen? }
public FeatureSequence randomFeatureSequence (Random r, int length) { if (! (dictionary instanceof Alphabet)) throw new UnsupportedOperationException ("Multinomial's dictionary must be a Alphabet"); FeatureSequence fs = new FeatureSequence ((Alphabet)dictionary, length); while (length-- > 0) fs.add (randomIndex (r)); return fs; }
public FeatureSequence toFeatureSequence(Alphabet dict) { FeatureSequence fs = new FeatureSequence( dict, tokens.size() ); for (int i = 0; i < tokens.size(); i++) fs.add( dict.lookupIndex( ((Token)tokens.get( i )).getText() ) ); return fs; }
protected void setUp () { dict = new Alphabet (); fs = new FeatureSequence (dict, 2); fs.add (dict.lookupIndex ("a")); fs.add (dict.lookupIndex ("n")); fs.add (dict.lookupIndex ("d")); fs.add (dict.lookupIndex ("r")); fs.add (dict.lookupIndex ("e")); fs.add (dict.lookupIndex ("w")); fs.add (dict.lookupIndex ("m")); fs.add (dict.lookupIndex ("c")); fs.add (dict.lookupIndex ("c")); fs.add (dict.lookupIndex ("a")); fs.add (dict.lookupIndex ("l")); fs.add (dict.lookupIndex ("l")); fs.add (dict.lookupIndex ("u")); fs.add (dict.lookupIndex ("m")); //System.out.println (fs.toString()); fv = new FeatureVector (fs); //System.out.println (fs.toString()); //System.out.println (fv.toString()); }
public void testEstimating () { Alphabet dict = new Alphabet (); Multinomial.Estimator e = new Multinomial.LaplaceEstimator (dict); FeatureSequence fs = new FeatureSequence (dict); fs.add (dict.lookupIndex ("a")); fs.add (dict.lookupIndex ("n")); fs.add (dict.lookupIndex ("d")); fs.add (dict.lookupIndex ("r")); fs.add (dict.lookupIndex ("e")); fs.add (dict.lookupIndex ("w")); fs.add (dict.lookupIndex ("m")); fs.add (dict.lookupIndex ("c")); fs.add (dict.lookupIndex ("c")); fs.add (dict.lookupIndex ("a")); fs.add (dict.lookupIndex ("l")); fs.add (dict.lookupIndex ("l")); fs.add (dict.lookupIndex ("u")); fs.add (dict.lookupIndex ("m")); //System.out.println (fs.toString()); e.increment (fs); assertTrue (e.size() == 10); Multinomial m = e.estimate (); assertTrue (m.size() == 10); assertTrue (m.probability (dict.lookupIndex ("a")) == (2.0+1)/(14.0+10)); assertTrue (m.probability ("w") == (1.0+1)/(14.0+10)); Multinomial.Logged ml = new Multinomial.Logged (m); assertTrue (m.logProbability ("w") == ml.logProbability ("w")); }
public Instance pipe (Instance carrier) { //Object in = carrier.getData(); Object target = carrier.getTarget(); if (target instanceof FeatureSequence) ; // Nothing to do else if (target instanceof TokenSequence) { TokenSequence ts = (TokenSequence) target; FeatureSequence fs = new FeatureSequence (getTargetAlphabet(), ts.size()); for (int i = 0; i < ts.size(); i++) fs.add (ts.getToken(i).getText()); carrier.setTarget(fs); } else { throw new IllegalArgumentException ("Unrecognized target type."); } return carrier; }
public Instance pipe(Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence newTs = new TokenSequence(); FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet()); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append(t.getText()); newTs.add(t); labelSeq.add(lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }
public Instance pipe(Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence newTs = new TokenSequence(); FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet()); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append(t.getText()); newTs.add(t); labelSeq.add(lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence newTs = new TokenSequence(); FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet()); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append (t.getText()); newTs.add (t); labelSeq.add (lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); TokenSequence newTs = new TokenSequence(); FeatureSequence labelSeq = new FeatureSequence(getTargetAlphabet()); boolean lastWasSpace = true; StringBuffer sb = new StringBuffer(); for (int i = 0; i < ts.size(); i++) { Token t = ts.getToken(i); if (t.getText().equals(" ")) lastWasSpace = true; else { sb.append (t.getText()); newTs.add (t); labelSeq.add (lastWasSpace ? "start" : "notstart"); lastWasSpace = false; } } if (isTargetProcessing()) carrier.setTarget(labelSeq); carrier.setData(newTs); carrier.setSource(sb.toString()); return carrier; }
public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); FeatureSequence ret = new FeatureSequence ((Alphabet)getDataAlphabet(), ts.size()); for (int i = 0; i < ts.size(); i++) { ret.add (ts.getToken(i).getText()); } carrier.setData(ret); return carrier; }
public FeatureSequenceWithBigrams (Alphabet dict, Alphabet bigramDictionary, TokenSequence ts) { super (dict, ts.size()); int len = ts.size(); this.biDictionary = bigramDictionary; this.biFeatures = new int[len]; Token t, pt = null; for (int i = 0; i < len; i++) { t = ts.getToken(i); super.add(t.getText()); if (pt != null && pt.getProperty(deletionMark) == null) biFeatures[i] = biDictionary.lookupIndex(pt.getText()+"_"+t.getText(), true); else biFeatures[i] = -1; pt = t; } }