public final int indexAtLocation (int location) { if (indices == null) return location; if (size-1 != maxSortedIndex) sortIndices(); assert (location < size); return super.indexAtLocation (location); }
/** * Promotes (by {@link #alpha alpha}) the weights * responsible for the incorrect guess * @param lpos index of incorrectly guessed label * @param fv feature vector */ private void promote(int lpos, FeatureVector fv){ int fvisize = fv.numLocations(); // learner predicted 0, correct is 1 -> promotion for(int fvi=0; fvi < fvisize; fvi++){ int fi = fv.indexAtLocation(fvi); this.weights[lpos][fi] *= this.alpha; } }
/** *Demotes (by {@link #beta beta) the weights * responsible for the incorrect guess * @param lpos index of incorrectly guessed label * @param fv feature vector */ private void demote(int lpos, FeatureVector fv){ int fvisize = fv.numLocations(); // learner predicted 1, correct is 0 -> demotion for(int fvi=0; fvi < fvisize; fvi++){ int fi = fv.indexAtLocation(fvi); this.weights[lpos][fi] /= this.beta; } } }
public EntityConfidence (double conf, boolean corr, Sequence input, int start, int end){ this.confidence = conf; this.correct = corr; StringBuffer buff = new StringBuffer(); if (input != null) { for (int j = start; j <= end; j++){ FeatureVector fv = (FeatureVector) input.get(j); for (int k = 0; k < fv.numLocations(); k++) { String featureName = fv.getAlphabet().lookupObject (fv.indexAtLocation (k)).toString(); if (featureName.startsWith ("W=") && featureName.indexOf("@") == -1){ buff.append(featureName.substring (featureName.indexOf ('=')+1) + " "); } } } } this.entity = buff.toString(); } public double confidence () {return confidence;}
public boolean satisfiedBy (FeatureVector fv) { if (fv.getAlphabet() != dictionary) throw new IllegalArgumentException ("Vocabularies do not match."); int fvsize = fv.numLocations(); int fvl = 0; for (int fcl = 0; fcl < features.length; fcl++) { int fcli = features[fcl]; while (fvl < fvsize && fv.indexAtLocation(fvl) < fcli) fvl++; if (fvl < fvsize && fv.indexAtLocation(fvl) == fcli && fv.valueAtLocation(fvl) != 0) { // The fcli'th Feature of the FeatureConjunction is present in the FeatureVector if (negations != null && negations[fcl] == false) // but this Feature was negated in the FeatureConjunction, so not satisfied return false; } else if (negations == null || negations[fcl] == true) // The fcli'th Feature of the FeatureConjunction is not present in the FeatureVector // and this Feature was unnegated in the FeatureConjunction, so not satisfied return false; } return true; }
public void increment (FeatureVector fv, double scale) { if (fv.getAlphabet() != dictionary) throw new IllegalArgumentException ("Vocabularies don't match."); for (int fvi = 0; fvi < fv.numLocations(); fvi++) increment (fv.indexAtLocation(fvi), scale); }
public void incrementCount (double count) { //System.out.println ("incrementCount "+(gatheringConstraints?"constraints":"expectations")+" dest#="+source.index+" count="+count); assert (crf.trainable || gatheringWeightsPresent); int weightsIndex = source.weightsIndices[index]; if (gatheringWeightsPresent) { if (gatheringConstraints || count >= 0.5) // xxx This 0.5 is somewhat arbitrary! // When doing this without the true output labels, don't include everything for (int i = 0; i < input.numLocations(); i++) crf.weightsPresent[weightsIndex].set (input.indexAtLocation(i)); } else if (crf.gatheringConstraints) { crf.constraints[weightsIndex].plusEqualsSparse (input, count); crf.defaultConstraints[weightsIndex] += count; } else { crf.expectations[weightsIndex].plusEqualsSparse (input, count); crf.defaultExpectations[weightsIndex] += count; } }
private static void outputFeatures (PrintWriter out, FeatureVectorSequence fvs, Sequence in, Sequence output, int start, int end) { out.println (" <tr class=\"features\">\n<td class=\"label\">Features</td>"); for (int i = start; i < end; i++) { if (!seqMatches (in, output, i)) { out.print ("<td>"); FeatureVector fv = fvs.getFeatureVector (i); for (int k = 0; k < fv.numLocations (); k++) { out.print (fv.getAlphabet ().lookupObject (fv.indexAtLocation (k))); if (fv.valueAtLocation (k) != 1.0) { out.print (" "+fv.valueAtLocation (k)); } out.println ("<br />"); } out.println ("</td>"); } else { out.println ("<td></td>"); } } out.println (" </tr>"); }
protected static void printSequence (PrintStream outf, String name, Sequence input, Sequence trueOutput, Sequence predOutput) { outf.println (name); // Print the sentence, one line per token, with the true/pred labels, the features, and the word for (int j = 0; j < input.size(); j++) { FeatureVector fv = (FeatureVector) input.get(j); char errIndicator = trueOutput.get(j).equals(predOutput.get(j)) ? ':' : '*'; outf.print (trueOutput.get(j).toString() + "/" + predOutput.get(j).toString() + errIndicator + ' ' ); String word = null; for (int k = 0; k < fv.numLocations(); k++) { String featureName = fv.getAlphabet().lookupObject(fv.indexAtLocation(k)).toString(); if (Pattern.matches ("^W=[^@]+$", featureName)) { word = featureName; } } if (word != null) outf.print ('['+word.substring(word.indexOf('=')+1)+']'+errIndicator+" "); for (int k = 0; k < fv.numLocations(); k++) { String featureName = fv.getAlphabet().lookupObject(fv.indexAtLocation(k)).toString(); outf.print (featureName + " "); } outf.println (""); } outf.println(); }
/** * Adds all indices that are present in some other feature vector * with value 1.0. * Beware that this may have unintended effects if * <tt>fv.dictionary != this.dictionary</tt> */ public void add (FeatureVector fv) { for (int loc = 0; loc < fv.numLocations (); loc++) { int index = fv.indexAtLocation (loc); if (location (index) == -1) { add (index, 1.0); } } }
/** * Adds all features from some other feature vector with weight 1.0. * The names of the added features are generated by adding a prefix to * their names in the original feature vector. * This does not require that <tt>fv.dictionary != this.dictionary</tt>. * @param fv A feature vector to add from. Its feature names must be Strings. * @param prefix String to add when generating new feature names */ public void add (FeatureVector fv, String prefix) { Alphabet otherDict = fv.getAlphabet (); for (int loc = 0; loc < fv.numLocations (); loc++) { int idx = fv.indexAtLocation (loc); String otherName = (String) otherDict.lookupObject (idx); add (prefix+otherName, 1.0); } }
public InvertedIndex (InstanceList ilist) { int numFeatures = ilist.getDataAlphabet().size(); ii = new ArrayList[numFeatures]; for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.getInstance(i); if (!(inst.getData() instanceof FeatureVector)) throw new IllegalArgumentException (this.getClass().getName() + " currently only handles FeatureVector data"); FeatureVector fv = (FeatureVector) inst.getData (); for (int fl = 0; fl < fv.numLocations(); fl++) { if (fv.valueAtLocation(fl) != 0) addEntry (fv.indexAtLocation(fl), inst); } } }
public void incrementCount (double count) { //System.out.println ("incrementCount "+(gatheringConstraints?"constraints":"expectations")+" dest#="+source.index+" count="+count); assert (crf.trainable || crf.gatheringWeightsPresent); int nwi = source.weightsIndices[index].length; for (int wi = 0; wi < nwi; wi++) { int weightsIndex = source.weightsIndices[index][wi]; if (crf.gatheringWeightsPresent) { if (crf.gatheringConstraints || count >= 0.2) // xxx This 0.2 is somewhat arbitrary! // When doing this without the true output labels, don't include everything for (int i = 0; i < input.numLocations(); i++) { int index = input.indexAtLocation(i); if ((crf.globalFeatureSelection == null || crf.globalFeatureSelection.contains(index)) && (crf.featureSelections == null || crf.featureSelections[weightsIndex] == null || crf.featureSelections[weightsIndex].contains(index))) crf.weightsPresent[weightsIndex].set (index); } } else if (crf.gatheringConstraints) { crf.constraints[weightsIndex].plusEqualsSparse (input, count); crf.defaultConstraints[weightsIndex] += count; } else { crf.expectations[weightsIndex].plusEqualsSparse (input, count); crf.defaultExpectations[weightsIndex] += count; } } }
public void incrementCount (double count) { //System.out.println ("incrementCount "+(gatheringConstraints?"constraints":"expectations")+" dest#="+source.index+" count="+count); assert (crf.trainable || crf.gatheringWeightsPresent); // Because of parameter tying there may be multiple "weight arrays" associated with a single transition. int nwi = source.weightsIndices[index].length; for (int wi = 0; wi < nwi; wi++) { int weightsIndex = source.weightsIndices[index][wi]; if (crf.gatheringWeightsPresent) { if (crf.gatheringConstraints || count >= 0.2) // xxx This 0.2 is somewhat arbitrary! // When doing this without the true output labels, don't include everything for (int i = 0; i < input.numLocations(); i++) { int index = input.indexAtLocation(i); if ((crf.globalFeatureSelection == null || crf.globalFeatureSelection.contains(index)) && (crf.featureSelections == null || crf.featureSelections[weightsIndex] == null || crf.featureSelections[weightsIndex].contains(index))) crf.weightsPresent[weightsIndex].set (index); } } else if (crf.gatheringConstraints) { crf.constraints[weightsIndex].plusEqualsSparse (input, count); crf.defaultConstraints[weightsIndex] += count; } else { crf.expectations[weightsIndex].plusEqualsSparse (input, count); crf.defaultExpectations[weightsIndex] += count; } } }
public void incrementCount (double count) { //System.out.println ("incrementCount "+(gatheringConstraints?"constraints":"expectations")+" dest#="+source.index+" count="+count); assert (crf.trainable || crf.gatheringWeightsPresent); int nwi = source.weightsIndices[index].length; for (int wi = 0; wi < nwi; wi++) { int weightsIndex = source.weightsIndices[index][wi]; if (crf.gatheringWeightsPresent) { if (crf.gatheringConstraints || count >= 0.2) // xxx This 0.2 is somewhat arbitrary! // When doing this without the true output labels, don't include everything for (int i = 0; i < input.numLocations(); i++) { int index = input.indexAtLocation(i); if ((crf.globalFeatureSelection == null || crf.globalFeatureSelection.contains(index)) && (crf.featureSelections == null || crf.featureSelections[weightsIndex] == null || crf.featureSelections[weightsIndex].contains(index))) crf.weightsPresent[weightsIndex].set (index); } } else if (crf.gatheringConstraints) { crf.constraints[weightsIndex].plusEqualsSparse (input, count); crf.defaultConstraints[weightsIndex] += count; } else { crf.expectations[weightsIndex].plusEqualsSparse (input, count); crf.defaultExpectations[weightsIndex] += count; } } }
private static double[] calcFeatureCounts (InstanceList ilist) { int numInstances = ilist.size(); int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[] counts = new double[numFeatures]; double count; for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.getInstance(i); if (!(inst.getData() instanceof FeatureVector)) throw new IllegalArgumentException ("Currently only handles FeatureVector data"); FeatureVector fv = (FeatureVector) inst.getData (); if (ilist.getInstanceWeight(i) == 0) continue; for (int j = 0; j < fv.numLocations(); j++) { if (countInstances) counts[fv.indexAtLocation(j)] += 1; else counts[fv.indexAtLocation(j)] += fv.valueAtLocation(j); } } return counts; }
/** * Adds all features from some other feature vector with weight 1.0. * The names of the added features are generated by adding a prefix to * their names in the original feature vector. * This does not require that <tt>fv.dictionary != this.dictionary</tt>. * @param fv A feature vector to add from. Its feature names must be Strings. * @param prefix String to add when generating new feature names * @param binary true if <tt>fv</tt> is binary */ public void add (FeatureVector fv, String prefix, boolean binary) { if (binary) add( fv, prefix); else { Alphabet otherDict = fv.getAlphabet (); for (int loc = 0; loc < fv.numLocations (); loc++) { int idx = fv.indexAtLocation (loc); double val = fv.valueAtLocation (loc); String otherName = (String) otherDict.lookupObject (idx); add (prefix+otherName, val); } } }
private static double[][] calcFeatureCounts (InstanceList ilist) { int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[][] featureCounts = new double[numClasses][numFeatures]; // Count features across all classes for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.getInstance(i); if (!(inst.getData() instanceof FeatureVector)) throw new IllegalArgumentException ("Currently only handles FeatureVector data"); FeatureVector fv = (FeatureVector) inst.getData (); // xxx Note that this ignores uncertain-labels. int labelIndex = inst.getLabeling ().getBestIndex(); int fli; for (int fl = 0; fl < fv.numLocations(); fl++) { fli = fv.indexAtLocation(fl); if (countInstances) featureCounts[labelIndex][fli]++; else featureCounts[labelIndex][fli] += fv.valueAtLocation(fl); } } return featureCounts; }
/** * Classifies an instance using Winnow's weights * @param instance an instance to be classified * @return an object containing the classifier's guess */ public Classification classify (Instance instance){ int numClasses = getLabelAlphabet().size(); double[] scores = new double[numClasses]; FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe); // Make sure the feature vector's feature dictionary matches // what we are expecting from our data pipe (and thus our notion // of feature probabilities. assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ()); int fvisize = fv.numLocations(); // Set the scores by summing wi*xi for (int fvi = 0; fvi < fvisize; fvi++) { int fi = fv.indexAtLocation (fvi); for (int ci = 0; ci < numClasses; ci++) scores[ci] += this.weights[ci][fi]; } // Create and return a Classification object return new Classification (instance, this, new LabelVector (getLabelAlphabet(), scores)); } }
double labelWeightDiff = Math.abs(labelWeight - classifications[i].value(li)); for (int fl = 0; fl < fv.numLocations(); fl++) { fli = fv.indexAtLocation(fl); gradientgains[fli] += fv.valueAtLocation(fl) * labelWeightDiff * instanceWeight;