protected static String getText( @SuppressWarnings("rawtypes") Sequence input, int j) { FeatureVector fv = (FeatureVector) input.get(j); for (int idx : fv.getIndices()) { Object ooo = fv.getAlphabet().lookupObject(idx); if (ooo.toString().startsWith("text=") && !ooo.toString().matches("^.+?\\/[-\\+]\\d$")) { return ooo.toString().substring(5); } } return ""; } }
public ConstantMatrix cloneMatrixZeroed () { assert (values != null); if (indices == null) return new FeatureVector (dictionary, new double[values.length]); else { int[] newIndices = new int[indices.length]; System.arraycopy (indices, 0, newIndices, 0, indices.length); return new FeatureVector (dictionary, newIndices, new double[values.length], values.length, values.length, false, false, false); } }
private static void countVector(double[] counts, FeatureVector fv) { for (int j = 0; j < fv.numLocations(); j++) { if (countInstances) counts[fv.indexAtLocation(j)] += 1; else counts[fv.indexAtLocation(j)] += fv.valueAtLocation(j); } }
public boolean contains (Object entry) { int loc = location(entry); return (loc >= 0 && valueAtLocation(loc) != 0); }
private void addPresentFeatures (BitSet wp, FeatureVector fv) { for (int i = 0; i < fv.numLocations (); i++) { int index = fv.indexAtLocation (i); wp.set (index); } }
public void increment (FeatureVector fv, double scale) { if (fv.getAlphabet() != dictionary) throw new IllegalArgumentException ("Vocabularies don't match."); for (int fvi = 0; fvi < fv.numLocations(); fvi++) // Originally, the value of the feature was not being taken into account here, // so words were only counted once per document! - gdruck // increment (fv.indexAtLocation(fvi), scale); increment(fv.indexAtLocation(fvi), scale * fv.valueAtLocation(fvi)); }
buf.append (" d.p. = "+f.format (w.dotProduct (input))+"<br />\n"); double[] vals = new double[input.numLocations ()]; double[] absVals = new double[input.numLocations ()]; for (int k = 0; k < vals.length; k++) { int index = input.indexAtLocation (k); vals[k] = w.value (index) * input.value (index); absVals[k] = Math.abs (vals[k]); RankedFeatureVector rfv = new RankedFeatureVector (crf.inputAlphabet, input.getIndices (), absVals); for (int rank = 0; rank < absVals.length; rank++) { int fidx = rfv.getIndexAtRank (rank); Object fname = crf.inputAlphabet.lookupObject (input.indexAtLocation (fidx)); if (absVals[fidx] < cutoff) break; // Break looping over features if (vals[fidx] != 0) {
for (double count : multinomialValues.getValues()) { totalLength += count; Dirichlet.digamma(sumScores); for (int loc = 0; loc < features.numLocations(); loc++) { int index = features.indexAtLocation(loc); double value = features.valueAtLocation(loc); for (int labelLoc = 0; labelLoc <multinomialValues.numLocations(); labelLoc++) { int label = multinomialValues.indexAtLocation(labelLoc); double count = multinomialValues.valueAtLocation(labelLoc); for(int labelLoc = 0; labelLoc <multinomialValues.numLocations(); labelLoc++) { int label = multinomialValues.indexAtLocation(labelLoc); double count = multinomialValues.valueAtLocation(labelLoc);
for (int i = 0; i < multinomialValues.numLocations(); i++) { int label = multinomialValues.indexAtLocation(i); double count = multinomialValues.valueAtLocation(i); value += (Dirichlet.logGammaStirling(scores[label] + count) - Dirichlet.logGammaStirling(scores[label])); "has NaN value."); for (int label: multinomialValues.getIndices()) { logger.fine ("log(scores)= " + Math.log(scores[label]) + " scores = " + scores[label]);
/** Serializes a single object without metadata * @param out * @param object * @throws IOException */ private void serializeObject (ObjectOutputStream out, Object obj) throws IOException { if (obj instanceof FeatureVector) { FeatureVector features = (FeatureVector) obj; out.writeChar (TYPE_FEATURE_VECTOR); out.writeObject (features.getIndices ()); out.writeObject (features.getValues ()); } else if (obj instanceof Label) { out.writeChar (TYPE_LABEL); out.writeObject (((Label) obj).toString ()); } else { out.writeChar (TYPE_OBJECT); out.writeObject (obj); } }
public Instance2Sample(Instance instance, int index) { this.instance = instance; this.index = index; Object data = instance.getData(); if (data instanceof FeatureVectorSequence) { FeatureVectorSequence fvs = (FeatureVectorSequence) data; put(INPUT, new MalletInputMatrix(fvs.getFeatureVector(index))); } LabelSequence labelSequence = (LabelSequence) instance.getTarget(); put(Sample.TARGET, new MalletOutputMatrix(labelSequence, index)); if (data instanceof FeatureVectorSequence) { FeatureVectorSequence fvs = (FeatureVectorSequence) data; FeatureVector fv = fvs.getFeatureVector(index); for (int i = fv.numLocations() - 1; i != -1; i--) { String word = (String) fv.getAlphabet().lookupObject(fv.getIndices()[i]); if (word.startsWith("W=")) { if (!word.matches(".*[0-9]$")) { setLabel(word.substring(2)); } } } } }
if (valsj != null) { comparisons++; for (int ii = 0; ii < valsi.numLocations(); ii++) { if (valsj.contains(valueAlph.lookupObject(valsi .indexAtLocation(ii)))) { matches++; break;
public void preProcess(FeatureVector fv) { cache.resetQuick(); int fi; // cache constrained input features for (int loc = 0; loc < fv.numLocations(); loc++) { fi = fv.indexAtLocation(loc); if (constraints.containsKey(fi)) { cache.add(fi); } } if (constraints.containsKey(fv.getAlphabet().size())) { cache.add(fv.getAlphabet().size()); } }
FeatureSequence topicSequence) { int[] possibleTopics = labels.getIndices(); int numLabels = labels.numLocations();
public Instance pipe (Instance carrier) { FeatureVector fv = (FeatureVector) carrier.getData(); carrier.setData(new FeatureVector (fv, fv.getAlphabet(), null, null)); return carrier; }
public FeatureVectorSequence (FeatureVector[] featureVectors) { this.sequence = featureVectors; this.alphabet = featureVectors[0].getAlphabet(); }
int pos = 0; int[] indices = fv.getIndices(); int lastIdx = -1; for (int idx = 0; idx < size; idx++) { double v = fv.valueAtLocation(pos); if (v == 0.0) { throw new IllegalStateException("supposed to be sparse");
FeatureVector fvi = (FeatureVector)list.get(mergedIndices[i]).getData(); FeatureVector fvj = (FeatureVector)list.get(mergedIndices[j]).getData(); if (!(fvi.contains("feature0") && fvj.contains("feature0"))) { match = false; break; pl = PropertyList.add("NoMatch", 1.0, pl); FeatureVector fv = new FeatureVector ((Alphabet)getDataAlphabet(), pl, true); carrier.setData(fv);
public Instance toInstance(List<Feature> features) throws CleartkEncoderException { List<NameNumber> nameNumbers = featuresEncoder.encodeAll(features); Iterator<NameNumber> nameNumberIterator = nameNumbers.iterator(); while (nameNumberIterator.hasNext()) { NameNumber nameNumber = nameNumberIterator.next(); if (!alphabet.contains(nameNumber.name)) nameNumberIterator.remove(); } String[] keys = new String[nameNumbers.size()]; double[] values = new double[nameNumbers.size()]; for (int i = 0; i < nameNumbers.size(); i++) { NameNumber nameNumber = nameNumbers.get(i); keys[i] = nameNumber.name; values[i] = nameNumber.number.doubleValue(); } int[] keyIndices = FeatureVector.getObjectIndices(keys, alphabet, true); FeatureVector fv = new FeatureVector(alphabet, keyIndices, values); Instance instance = new Instance(fv, null, null, null); return instance; } }
public double getDouble(long row, long column) { return Arrays.binarySearch(featureVector.getIndices(), (int) column) >= 0 ? 1.0 : 0.0; }