public BitSet preProcess(InstanceList data) { // count BitSet bitSet = new BitSet(data.size()); int ii = 0; for (Instance instance : data) { FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData(); for (int ip = 1; ip < fvs.size(); ip++) { for (int fi : constraintsMap.keys()) { // binary constraint features if (fvs.get(ip).location(fi) >= 0) { constraintsList.get(constraintsMap.get(fi)).count += 1; bitSet.set(ii); } } } ii++; } return bitSet; }
private THashMultiMap constructFvByWord (FeatureVectorSequence fvs) { THashMultiMap fvByWord = new THashMultiMap (fvs.size ()); int N = fvs.size (); for (int t = 0; t < N; t++) { FeatureVector fv = fvs.getFeatureVector (t); String wordFeature = binner.computeBin (fv); if (wordFeature != null) { // could happen if the current word has been excluded fvByWord.put (wordFeature, new TokenInfo (wordFeature, fv, t)); } } return fvByWord; }
public Alphabet[] getAlphabets() { return new Alphabet[] {getAlphabet()}; }
int getMaxTime () { return fvs.size(); } int getNumFactors () { return outputAlphabets.length; }
private static void outputFeatures (PrintWriter out, FeatureVectorSequence fvs, Sequence in, Sequence output, int start, int end) { out.println (" <tr class=\"features\">\n<td class=\"label\">Features</td>"); for (int i = start; i < end; i++) { if (!seqMatches (in, output, i)) { out.print ("<td>"); FeatureVector fv = fvs.getFeatureVector (i); for (int k = 0; k < fv.numLocations (); k++) { out.print (fv.getAlphabet ().lookupObject (fv.indexAtLocation (k))); if (fv.valueAtLocation (k) != 1.0) { out.print (" "+fv.valueAtLocation (k)); } out.println ("<br />"); } out.println ("</td>"); } else { out.println ("<td></td>"); } } out.println (" </tr>"); }
assert (fvs.size() == ilist.size()); FeatureVector[] fva = new FeatureVector[fvs.size()]; carrier.setData(new FeatureVectorSequence(fva)); return carrier;
@Override public Instance pipe(Instance inst) { PartialTagging data = (PartialTagging) inst.getData(); List<FeatureVector> vectors = Lists.newArrayList(); for (Integer index : data.getPredictionIndexes()) { processPipes(data, index); vectors.add(makeVector(data, index)); } if (vectors.isEmpty()) { throw new IllegalArgumentException("Cant seqvow a word with no vowels"); } updateTarget(data, inst); inst.setData(new FeatureVectorSequence(vectors.toArray(new FeatureVector[vectors.size()]))); return inst; }
public Instance next () { if (!dataSubiterator.hasNext()) { assert (superIterator.hasNext()); superInstance = superIterator.next(); dataSubiterator = ((FeatureVectorSequence)superInstance.getData()).iterator(); targetSubiterator = ((LabelSequence)superInstance.getTarget()).iterator(); } // We are assuming sequences don't have zero length assert (dataSubiterator.hasNext()); assert (targetSubiterator.hasNext()); return new Instance (dataSubiterator.next(), targetSubiterator.next(), superInstance.getSource()+" tokensequence:"+count++, null); } public boolean hasNext () {
public TransitionIterator (State source, FeatureVectorSequence inputSeq, int inputPosition, String output, CRF crf) { this (source, inputSeq.get(inputPosition), output, crf); }
public String toString () { StringBuffer sb = new StringBuffer (); sb.append (super.toString()); sb.append ('\n'); sb.append ("String 1: " + string1Length + " String 2: " + string2Length); return sb.toString(); }
int getMaxTime () { return fvs.size(); } int getNumFactors () { return outputAlphabets.length; }
private static void outputFeatures (PrintWriter out, FeatureVectorSequence fvs, Sequence in, Sequence output, int start, int end) { out.println (" <tr class=\"features\">\n<td class=\"label\">Features</td>"); for (int i = start; i < end; i++) { if (!seqMatches (in, output, i)) { out.print ("<td>"); FeatureVector fv = fvs.getFeatureVector (i); for (int k = 0; k < fv.numLocations (); k++) { out.print (fv.getAlphabet ().lookupObject (fv.indexAtLocation (k))); if (fv.valueAtLocation (k) != 1.0) { out.print (" "+fv.valueAtLocation (k)); } out.println ("<br />"); } out.println ("</td>"); } else { out.println ("<td></td>"); } } out.println (" </tr>"); }
assert (fvs.size() == ilist.size()); FeatureVector[] fva = new FeatureVector[fvs.size()]; carrier.setData(new FeatureVectorSequence(fva)); return carrier;
public Instance pipe (Instance carrier) { carrier.setData(new FeatureVectorSequence ((Alphabet)getDataAlphabet(), (TokenSequence)carrier.getData(), binary, augmentable, growAlphabet)); return carrier; }
public Instance next () { if (!dataSubiterator.hasNext()) { assert (superIterator.hasNext()); superInstance = superIterator.next(); dataSubiterator = ((FeatureVectorSequence)superInstance.getData()).iterator(); targetSubiterator = ((LabelSequence)superInstance.getTarget()).iterator(); } // We are assuming sequences don't have zero length assert (dataSubiterator.hasNext()); assert (targetSubiterator.hasNext()); return new Instance (dataSubiterator.next(), targetSubiterator.next(), superInstance.getSource()+" tokensequence:"+count++, null); } public boolean hasNext () {
public TransitionIterator (State source, FeatureVectorSequence inputSeq, int inputPosition, String output, CRF crf) { this (source, inputSeq.get(inputPosition), output, crf); }
public String toString () { StringBuffer sb = new StringBuffer (); sb.append (super.toString()); sb.append ('\n'); sb.append ("String 1: " + string1Length + " String 2: " + string2Length); return sb.toString(); }
public BitSet preProcess(InstanceList data) { // count BitSet bitSet = new BitSet(data.size()); int ii = 0; for (Instance instance : data) { FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData(); for (int ip = 1; ip < fvs.size(); ip++) { for (int fi : constraintsMap.keys()) { // binary constraint features if (fvs.get(ip).location(fi) >= 0) { constraintsList.get(constraintsMap.get(fi)).count += 1; bitSet.set(ii); } } } ii++; } return bitSet; }
private THashMultiMap constructFvByWord (FeatureVectorSequence fvs) { THashMultiMap fvByWord = new THashMultiMap (fvs.size ()); int N = fvs.size (); for (int t = 0; t < N; t++) { FeatureVector fv = fvs.getFeatureVector (t); String wordFeature = binner.computeBin (fv); if (wordFeature != null) { // could happen if the current word has been excluded fvByWord.put (wordFeature, new TokenInfo (wordFeature, fv, t)); } } return fvByWord; }
int getMaxTime () { return fvs.size(); } int getNumFactors () { return outputAlphabets.length; }