public FeatureSequence randomFeatureSequence (Randoms r, int length) { if (! (dictionary instanceof Alphabet)) throw new UnsupportedOperationException ("Multinomial's dictionary must be a Alphabet"); FeatureSequence fs = new FeatureSequence ((Alphabet)dictionary, length); while (length-- > 0) fs.add (randomIndex (r)); return fs; }
private boolean[][] labelConnectionsIn(InstanceList trainingSet) { int numLabels = outputAlphabet.size(); boolean[][] connections = new boolean[numLabels][numLabels]; for (Instance instance : trainingSet) { FeatureSequence output = (FeatureSequence) instance.getTarget(); for (int j = 1; j < output.size(); j++) { int sourceIndex = outputAlphabet.lookupIndex(output.get(j - 1)); int destIndex = outputAlphabet.lookupIndex(output.get(j)); assert (sourceIndex >= 0 && destIndex >= 0); connections[sourceIndex][destIndex] = true; } } return connections; }
public AugmentableFeatureVector (FeatureSequence fs, boolean binary) { this (fs.getAlphabet(), binary); for (int i = fs.size()-1; i >= 0; i--) add (fs.getIndexAtPosition(i), 1.0); }
public FeatureVector (FeatureSequence fs, boolean binary) { super (fs.toSortedFeatureIndexSequence(), false, false, true, binary); this.dictionary = (Alphabet) fs.getAlphabet(); }
new FeatureSequence ((Alphabet)getDataAlphabet()); int i,j, curLen; curLen=fseq.getLength(); ret.add(fseq.getObjectAtPosition(i)); for(i = 0; i < curLen-1; i++) { for(j = i + 1; j < curLen; j++) { pre = fseq.getIndexAtPosition(i); cur = fseq.getIndexAtPosition(j); coO = pre + "_" + cur; ret.add(coO);
public void testNewPutSizeFreeze () { Alphabet dict = new Alphabet (); FeatureSequence fs = new FeatureSequence (dict, 10); fs.add (dict.lookupIndex ("apple")); fs.add (dict.lookupIndex ("bear")); fs.add (dict.lookupIndex ("car")); fs.add (dict.lookupIndex ("door")); assertTrue (fs.size() == 4); double[] weights = new double[4]; fs.addFeatureWeightsTo (weights); assertTrue (weights[1] == 1.0); fs.add (dict.lookupIndex ("bear")); int[] feats = fs.toFeatureIndexSequence(); assertTrue (feats[0] == 0); assertTrue (feats[1] == 1); assertTrue (feats[2] == 2); assertTrue (feats[3] == 3); assertTrue (feats[4] == 1); }
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence; int[] topics = topicSequence.getFeatures(); for (int position = 0; position < tokens.size(); position++) { int type = tokens.getIndexAtPosition(position);
public void countWords(InstanceList instances) { for (Instance instance: instances) { FeatureSequence tokens = (FeatureSequence) instance.getData(); int length = tokens.getLength(); for (int position = 0; position < length; position++) { int type = tokens.getIndexAtPosition(position); wordCounts[type]++; } totalWords += length; } double normalizer = 1.0f / totalWords; samplingDistribution[0] = Math.pow(normalizer * wordCounts[0], 0.75); for (int word = 1; word < numWords; word++) { samplingDistribution[word] = samplingDistribution[word-1] + Math.pow(normalizer * wordCounts[word], 0.75); } samplingSum = samplingDistribution[numWords-1]; int word = 0; for (int i = 0; i < samplingTableSize; i++) { while (samplingSum * i / samplingTableSize > samplingDistribution[word]) { word++; } samplingTable[i] = word; } System.out.println("done counting"); }
FeatureSequence fs = (FeatureSequence) instance.getData(); fs.addFeatureWeightsTo(counts); FeatureSequence fs = (FeatureSequence) instance.getData(); fs.prune(newAlphabet); for (int i = 0; i < indices.length; i++) indices[i] = ((Integer)seq.get(i)).intValue(); FeatureSequence fs = new FeatureSequence (alpha, indices); instance.unLock(); for (int i = 0; i < indices.length; i++) indices[i] = ((Integer)seq.get(i)).intValue(); FeatureSequence fs = new FeatureSequence (alpha, indices); instance.unLock();
protected void sampleTopicsForOneDoc (FeatureSequence tokenSequence, FeatureSequence topicSequence) { int[] oneDocTopics = topicSequence.getFeatures(); int docLength = tokenSequence.getLength(); type = tokenSequence.getIndexAtPosition(position); oldTopic = oneDocTopics[position];
public Instance pipe(Instance instance) { if (instance.getData() instanceof FeatureSequence) { FeatureSequence features = (FeatureSequence) instance.getData(); for (int position = 0; position < features.size(); position++) { counter.increment(features.getIndexAtPosition(position)); } } else { throw new IllegalArgumentException("Looking for a FeatureSequence, found a " + instance.getData().getClass()); } return instance; }
this.inputSequence = inputSeq; this.inputFeature = new Integer(inputSequence .getIndexAtPosition(inputPosition)); this.inputPos = inputPosition; this.weights = new double[source.destinations.length]; .logProbability(inputSeq.get(inputPosition)); double logTransitionProb = hmm.transitionMultinomial[source .getIndex()]
LabelAlphabet dict = (LabelAlphabet) getTargetAlphabet (); FeatureSequence fs = (FeatureSequence) target; Label[] lbls = new Label[fs.size()]; for (int i = 0; i < fs.size (); i++) { lbls[i] = dict.lookupLabel (fs.getObjectAtPosition (i));
public FeatureSequence (Alphabet dict, int[] features, int len) { this(dict, len); for (int i = 0; i < len; i++) add(features[i]); }
public TokenSequence randomTokenSequence (Randoms r, int length) { FeatureSequence fs = randomFeatureSequence (r, length); TokenSequence ts = new TokenSequence (length); for (int i = 0; i < length; i++) ts.add (fs.getObjectAtPosition(i).toString()); return ts; }
public Alphabet[] getAlphabets() { return new Alphabet[] {getAlphabet()}; }
int[] features = ((FeatureSequence) data).getFeatures(); insertStatement.setBytes(5, intArrayToByteArray(features)); insertStatement.setInt(6, FEATURE_SEQUENCE);
public void printState (PrintWriter pw) { Alphabet a = ilist.getDataAlphabet(); pw.println ("#doc pos typeindex type topic"); for (int di = 0; di < topics.length; di++) { FeatureSequence fs = (FeatureSequence) ilist.get(di).getData(); for (int si = 0; si < topics[di].length; si++) { int type = fs.getIndexAtPosition(si); pw.print(di); pw.print(' '); pw.print(si); pw.print(' '); pw.print(type); pw.print(' '); pw.print(a.lookupObject(type)); pw.print(' '); pw.print(topics[di][si]); pw.println(); } } }
public void add (int featureIndex) { growIfNecessary (); assert (featureIndex < dictionary.size()); features[length++] = featureIndex; }
/** * Gather statistics on the size of documents * and create histograms for use in Dirichlet hyperparameter * optimization. */ private void initializeHistograms() { int maxTokens = 0; totalTokens = 0; int seqLen; for (int doc = 0; doc < data.size(); doc++) { FeatureSequence fs = (FeatureSequence) data.get(doc).instance.getData(); seqLen = fs.getLength(); if (seqLen > maxTokens) maxTokens = seqLen; totalTokens += seqLen; } logger.info("max tokens: " + maxTokens); logger.info("total tokens: " + totalTokens); docLengthCounts = new int[maxTokens + 1]; topicDocCounts = new int[numTopics][maxTokens + 1]; }