public FeatureSequence randomFeatureSequence (Random r, int length) { if (! (dictionary instanceof Alphabet)) throw new UnsupportedOperationException ("Multinomial's dictionary must be a Alphabet"); FeatureSequence fs = new FeatureSequence ((Alphabet)dictionary, length); while (length-- > 0) fs.add (randomIndex (r)); return fs; }
public AugmentableFeatureVector (FeatureSequence fs, boolean binary) { this (fs.getAlphabet(), binary); for (int i = fs.size()-1; i >= 0; i--) add (fs.getIndexAtPosition(i), 1.0); }
int type, oldTopic, newTopic; double topicWeightsSum; int docLen = oneDocTokens.getLength(); double tw; type = oneDocTokens.getIndexAtPosition(si); oldTopic = oneDocTopics[si];
public FeatureVector (FeatureSequence fs, boolean binary) { super (fs.toSortedFeatureIndexSequence(), false, false, true, binary); this.dictionary = (Alphabet) fs.getAlphabet(); }
public void testNewPutSizeFreeze () { Alphabet dict = new Alphabet (); FeatureSequence fs = new FeatureSequence (dict, 10); fs.add (dict.lookupIndex ("apple")); fs.add (dict.lookupIndex ("bear")); fs.add (dict.lookupIndex ("car")); fs.add (dict.lookupIndex ("door")); assertTrue (fs.size() == 4); double[] weights = new double[4]; fs.addFeatureWeightsTo (weights); assertTrue (weights[1] == 1.0); fs.add (dict.lookupIndex ("bear")); int[] feats = fs.toFeatureIndexSequence(); assertTrue (feats[0] == 0); assertTrue (feats[1] == 1); assertTrue (feats[2] == 2); assertTrue (feats[3] == 3); assertTrue (feats[4] == 1); }
private boolean[][] labelConnectionsIn (InstanceList trainingSet) { int numLabels = outputAlphabet.size(); boolean[][] connections = new boolean[numLabels][numLabels]; for (int i = 0; i < trainingSet.size(); i++) { Instance instance = trainingSet.getInstance(i); FeatureSequence output = (FeatureSequence) instance.getTarget(); for (int j = 1; j < output.size(); j++) { int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1)); int destIndex = outputAlphabet.lookupIndex (output.get(j)); assert (sourceIndex >= 0 && destIndex >= 0); connections[sourceIndex][destIndex] = true; } } return connections; }
new FeatureVector((Alphabet) crf.getInputAlphabet(), new int[]{1, 2, 3}, new double[]{1, 1, 1}), }); FeatureSequence ss = new FeatureSequence(crf.getOutputAlphabet(), new int[]{0, 1, 2, 3}); InstanceList ilist = new InstanceList(null); ilist.add(fvs, ss, null, null);
public FeatureSequence (Alphabet dict, int[] features, int len) { this(dict, len); for (int i = 0; i < len; i++) add(features[i]); }
public TokenSequence randomTokenSequence (Random r, int length) { FeatureSequence fs = randomFeatureSequence (r, length); TokenSequence ts = new TokenSequence (length); for (int i = 0; i < length; i++) ts.add (fs.getObjectAtPosition(i)); return ts; }
public TransitionIterator (State source, FeatureSequence inputSeq, int inputPosition, String output, HMM hmm) { this.source = source; this.hmm = hmm; this.input = inputSeq; this.inputPos = inputPosition; this.costs = new double[source.destinations.length]; for (int transIndex = 0; transIndex < source.destinations.length; transIndex++) { if (output == null || output.equals(source.labels[transIndex])) { costs[transIndex] = 0; // xxx should this be emission of the _next_ observation? // double logEmissionProb = hmm.emissionMultinomial[source.getIndex()].logProbability (inputSeq.get (inputPosition)); double logEmissionProb = hmm.emissionMultinomial[transIndex].logProbability (inputSeq.get (inputPosition)); double logTransitionProb = hmm.transitionMultinomial[source.getIndex()].logProbability (source.destinationNames[transIndex]); // cost = -logProbability costs[transIndex] -= (logEmissionProb + logTransitionProb); assert (!Double.isNaN(costs[transIndex])); } else costs[transIndex] = INFINITE_COST; } nextIndex = 0; while (nextIndex < source.destinations.length && costs[nextIndex] == INFINITE_COST) nextIndex++; }
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int featuresLength; int version = in.readInt (); ilist = (InstanceList) in.readObject (); numTopics = in.readInt(); alpha = in.readDouble(); beta = in.readDouble(); tAlpha = in.readDouble(); vBeta = in.readDouble(); int numDocs = ilist.size(); topics = new int[numDocs][]; for (int di = 0; di < ilist.size(); di++) { int docLen = ((FeatureSequence)ilist.getInstance(di).getData()).getLength(); topics[di] = new int[docLen]; for (int si = 0; si < docLen; si++) topics[di][si] = in.readInt(); } docTopicCounts = new int[numDocs][numTopics]; for (int di = 0; di < ilist.size(); di++) for (int ti = 0; ti < numTopics; ti++) docTopicCounts[di][ti] = in.readInt(); int numTypes = ilist.getDataAlphabet().size(); typeTopicCounts = new int[numTypes][numTopics]; for (int fi = 0; fi < numTypes; fi++) for (int ti = 0; ti < numTopics; ti++) typeTopicCounts[fi][ti] = in.readInt(); tokensPerTopic = new int[numTopics]; for (int ti = 0; ti < numTopics; ti++) tokensPerTopic[ti] = in.readInt(); }
public void add (int featureIndex) { growIfNecessary (); assert (featureIndex < dictionary.size()); features[length++] = featureIndex; }
public void printState (PrintWriter pw) { Alphabet a = ilist.getDataAlphabet(); pw.println ("#doc pos typeindex type topic"); for (int di = 0; di < topics.length; di++) { FeatureSequence fs = (FeatureSequence) ilist.getInstance(di).getData(); for (int si = 0; si < topics[di].length; si++) { int type = fs.getIndexAtPosition(si); pw.print(di); pw.print(' '); pw.print(si); pw.print(' '); pw.print(type); pw.print(' '); pw.print(a.lookupObject(type)); pw.print(' '); pw.print(topics[di][si]); pw.println(); } } }
private boolean[][] labelConnectionsIn (InstanceList trainingSet) { int numLabels = outputAlphabet.size(); boolean[][] connections = new boolean[numLabels][numLabels]; for (int i = 0; i < trainingSet.size(); i++) { Instance instance = trainingSet.getInstance(i); FeatureSequence output = (FeatureSequence) instance.getTarget(); for (int j = 1; j < output.size(); j++) { int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1)); int destIndex = outputAlphabet.lookupIndex (output.get(j)); assert (sourceIndex >= 0 && destIndex >= 0); connections[sourceIndex][destIndex] = true; } } return connections; }
new FeatureVector ((Alphabet) crf.getInputAlphabet(), new int[] {1,2,3}, new double[] {1,1,1}), }); FeatureSequence ss = new FeatureSequence (crf.getOutputAlphabet(), new int[] {0,1,2,3}); InstanceList ilist = new InstanceList(null); ilist.add (fvs, ss, null, null);
/** * Creates a FeatureSequence given all of the objects in the * sequence. * * @param dict A dictionary that maps objects in the sequence * to numeric indices. * @param features An array where features[i] gives the index * in dict of the ith element of the sequence. */ public FeatureSequence (Alphabet dict, int[] features) { this(dict, features.length); for (int i = 0; i < features.length; i++) add(features[i]); }
sb.append (' '); } if (carrier.getTarget() instanceof FeatureSequence) { sb.append (((FeatureSequence)carrier.getTarget()).getObjectAtPosition(i).toString()); sb.append (' ');
public void incrementCount (double count) { // xxx ?? want way to increment observation count and transition count separately // if (inputPos == 0) { // System.err.println ("Initial increment for " + source.destinationNames[index]); // hmm.initialEstimator.increment (source.destinationNames[index], 1.0); // } // else { //System.err.println ("Incrementing count for emission " + input.get (inputPos) + " from state " + source.getName() + " -> " + source.destinationNames[index]); // hmm.emissionEstimator[source.getIndex()].increment (hmm.inputAlphabet.lookupIndex (input.get (inputPos), false), 1.0); hmm.emissionEstimator[index].increment (hmm.inputAlphabet.lookupIndex (input.get (inputPos), false), 1.0); hmm.transitionEstimator[source.getIndex()].increment (source.destinationNames[index], 1.0); // } }
grams = new int[numDocs][]; for (int di = 0; di < ilist.size(); di++) { int docLen = ((FeatureSequence)ilist.getInstance(di).getData()).getLength(); topics[di] = new int[docLen]; for (int si = 0; si < docLen; si++) int docLen = ((FeatureSequence)ilist.getInstance(di).getData()).getLength(); grams[di] = new int[docLen]; for (int si = 0; si < docLen; si++)
public FeatureSequence toFeatureSequence(Alphabet dict) { FeatureSequence fs = new FeatureSequence( dict, tokens.size() ); for (int i = 0; i < tokens.size(); i++) fs.add( dict.lookupIndex( ((Token)tokens.get( i )).getText() ) ); return fs; }