public Record (Alphabet fieldAlph, Alphabet valueAlph, String[][] vals) { this(fieldAlph, valueAlph); for (int i = 0; i < vals.length; i++) { AugmentableFeatureVector afv = new AugmentableFeatureVector(valueAlph, false); for (int j = 1; j < vals[i].length; j++) afv.add(valueAlph.lookupIndex(vals[i][j]), 1.0); field2values.put(fieldAlph.lookupIndex(vals[i][0]), afv.toFeatureVector()); } }
public final void addTo (double[] accumulator) { addTo (accumulator, 1.0); }
public void addTo (AugmentableFeatureVector fv, double value, FeatureSelection fs) { // xxx This could be simplified for the special case of a FeatureConjunction with only one conjunct if (this.satisfiedBy (fv)) { index = fv.getAlphabet().lookupIndex (name); // Make sure that this feature is selected if (fs != null) fs.add (index); if (index >= 0 && fv.value(index) > 0) // Don't add features that are already there return; assert (index != -1); fv.add (index, value); } }
public final void setValue (int index, double value) { if (indices != null && size-1 != maxSortedIndex) sortIndices(); assert (values != null); if (indices == null) { assert (index < size); values[index] = value; } else { values[location(index)] = value; } }
public final double dotProduct (AugmentableFeatureVector v) { if (indices != null && size-1 != maxSortedIndex) sortIndices(); if (v.indices != null && v.size-1 != v.maxSortedIndex) v.sortIndices(); double ret = 0; int vl = 0; int index = v.indexAtLocation(i); if (index < size) ret += v.valueAtLocation(i) * values[index];
private FeatureVector combineFv (String word, FeatureVector fv1, FeatureVector fv2) { // System.out.println("combineFv:"); // System.out.println("FV1 values "+fv1.getValues()+" indices "+fv1.getIndices()); // System.out.println("FV1: "+fv1.toString (true)); // System.out.println("FV2 values "+fv2.getValues()+" indices "+fv2.getIndices()); // System.out.println("FV2:"+fv2.toString (true)); Alphabet dict = fv1.getAlphabet (); AugmentableFeatureVector afv = new AugmentableFeatureVector (dict, true); if (wordFeaturesOnly) { int idx = dict.lookupIndex (word); afv.add (idx, 1.0); } else if (distinguishEndpts) { afv.add (fv1, "S:"); afv.add (fv2, "E:"); } else { afv.add (fv1); afv.add (fv2); } // System.out.println("AFV: "+afv.toString (true)); return afv; }
AugmentableFeatureVector afv = new AugmentableFeatureVector(new Alphabet(), 10000, false); for (int di = 0; di < topics.length; di++) { FeatureSequenceWithBigrams fs = (FeatureSequenceWithBigrams) ilist.get(di).getData(); while (grams[di][si] == 1 && --si >= 0) gramString = uniAlphabet.lookupObject(fs.getIndexAtPosition(si)).toString() + "_" + gramString; afv.add(gramString, 1.0); int numNgrams = afv.numLocations(); wp[loc] = new WordProb (afv.indexAtLocation(loc), afv.valueAtLocation(loc)); ngramSum += wp[loc].p; +" phrases "+Math.round(afv.oneNorm())+"/"+numNgrams); for (int i = 0; i < Math.min(numNgrams,numWords); i++) System.out.println (afv.getAlphabet().lookupObject(wp[i].wi).toString() + " " + wp[i].p/ngramSum); } else { System.out.print (" (unigrams "+numUnitypeTokens+"/"+numUnitypeTypes+" bigrams "+numBitypeTokens+"/"+numBitypeTypes +" phrases "+Math.round(afv.oneNorm())+"/"+numNgrams+")\n "); System.out.print (afv.getAlphabet().lookupObject(wp[i].wi).toString() + " "); System.out.println();
PrintStream pout = new PrintStream (bout); AugmentableFeatureVector titles = new AugmentableFeatureVector (new Alphabet()); word++; if (word < 20) // consider top 20 individual words as candidate titles titles.add(alphabet.lookupObject(info.getID()), info.getWeight()); titles.add(alph.lookupObject(fi), 100*values[fi]); // prefer phrases with a factor of 100 rfv = new RankedFeatureVector (titles.getAlphabet(), titles); int numTitles = 10; for (int ri = 0; ri < numTitles && ri < rfv.numLocations(); ri++) {
LabelVector lv = c.getLabelVector(); AugmentableFeatureVector afv1 = (AugmentableFeatureVector) inst.getData(); int[] indices = afv1.getIndices(); AugmentableFeatureVector afv2 = new AugmentableFeatureVector(m_dataAlphabet, indices, afv1.getValues(), indices.length + m_predRanks2add.length); afv2.add(idx, 1);
public ConstantMatrix cloneMatrix () { return new AugmentableFeatureVector ((Alphabet)dictionary, indices, values, indices.length, size, true, false, false); }
AugmentableFeatureVector afv = new AugmentableFeatureVector (dict, binary); afv.add (index, iter.getNumericValue()); SparseVector sv = afv.toSparseVector();
public void testAddWithPrefix () { Alphabet dict = new Alphabet (); dict.lookupIndex ("ZERO"); dict.lookupIndex ("ONE"); dict.lookupIndex ("TWO"); dict.lookupIndex ("THREE"); FeatureVector fv = new FeatureVector (dict, new int[] { 1,3 }); AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet (), true); afv.add (fv, "O:"); assertEquals (4, dict.size()); assertEquals (2, afv.getAlphabet ().size()); assertEquals ("O:ONE\nO:THREE\n", afv.toString ()); }
public void add (int index) { if (values != null) throw new IllegalArgumentException ("Trying to add binary feature to real-valued vector"); assert (index >= 0); add (index, 1.0); }
@Override public Instance pipe(Instance inst) { Alignment align = (Alignment) inst.getData(); SyllStructure ss = new SyllStructure(align); AugmentableFeatureVector fv = new AugmentableFeatureVector(alpha, true); for (StressFeature feature : features) { feature.emit(fv, align, ss); } inst.setData(fv.toFeatureVector()); return inst; } }
/** * Adds all indices that are present in some other feature vector * with value 1.0. * Beware that this may have unintended effects if * <tt>fv.dictionary != this.dictionary</tt> */ public void add (FeatureVector fv) { for (int loc = 0; loc < fv.numLocations (); loc++) { int index = fv.indexAtLocation (loc); // mdredze@cs.jhu.edu 3/5/10 // use values, instead of assuming fv is binary double value = fv.valueAtLocation(loc); if (location (index) == -1) { //add (index, 1.0); add(index,value); } } }
public final double dotProduct (SparseVector v) { if (v instanceof AugmentableFeatureVector) return dotProduct((AugmentableFeatureVector)v); if (indices != null && size-1 != maxSortedIndex) sortIndices(); double ret = 0; int vl = 0;
int classIndex = labeling.getBestIndex(); if (!perClassFeaturesAlreadyThere[classIndex].contains (name)) { afv.add (name, 1.0); perClassNewFeatureSelection[classIndex].add (name); boolean featurePresent = afv.value (featureIndex) != 0; if (child0 != null && !featurePresent) child0.induceFeatures (afv, featuresAlreadyThere, perClassFeaturesAlreadyThere,
public void testDotProductBinaryToSV () { SparseVector v = makeSparseVectorToN (5); AugmentableFeatureVector afv = makeAfv (new int[] { 1, 3 }, true); double dp = afv.dotProduct (v); assertEquals (4.0, dp, 1e-5); new AugmentableFeatureVector (new Alphabet(), true); }
public void testDotProductSparseASVToSV () { SparseVector v = makeSparseVectorToN (7); AugmentableFeatureVector afv = makeAfv (new int[] { 1, 3 }, false); double dp = afv.dotProduct (v); assertEquals (4.0, dp, 1e-5); afv = makeAfv (new int[] { 2, 5 }, false); dp = afv.dotProduct (v); assertEquals (7.0, dp, 1e-5); }
AugmentableFeatureVector afv = new AugmentableFeatureVector(new Alphabet(), 10000, false); for (int di = 0; di < topics.length; di++) { FeatureSequenceWithBigrams fs = (FeatureSequenceWithBigrams) ilist.get(di).getData(); while (grams[di][si] == 1 && --si >= 0) gramString = uniAlphabet.lookupObject(fs.getIndexAtPosition(si)).toString() + "_" + gramString; afv.add(gramString, 1.0); int numNgrams = afv.numLocations(); wp[loc] = new WordProb (afv.indexAtLocation(loc), afv.valueAtLocation(loc)); ngramSum += wp[loc].p; +" phrases "+Math.round(afv.oneNorm())+"/"+numNgrams); for (int i = 0; i < Math.min(numNgrams,numWords); i++) System.out.println (afv.getAlphabet().lookupObject(wp[i].wi).toString() + " " + wp[i].p/ngramSum); } else { System.out.print (" (unigrams "+numUnitypeTokens+"/"+numUnitypeTypes+" bigrams "+numBitypeTokens+"/"+numBitypeTypes +" phrases "+Math.round(afv.oneNorm())+"/"+numNgrams+")\n "); System.out.print (afv.getAlphabet().lookupObject(wp[i].wi).toString() + " "); System.out.println();