public Segmentation getSegmentation (Instance inst) { return (Segmentation) inst2segmentation.get(inst.getName()); }
public InstanceList getCluster (Instance inst) { return (InstanceList) inst2cluster.get(inst.getName()); }
public Instance pipe (Instance carrier) { if (prefix != null) System.out.print (prefix); String targetString = "<null>"; if (carrier.getTarget() != null) targetString = carrier.getTarget().toString(); System.out.println ("name: "+carrier.getName()+"\ninput: "+carrier.getData().toString()+"\ntarget: "+targetString); return carrier; }
/** * * @param instance the instance to classify * @param useOutOfFold whether to check the instance name and use the out-of-fold classifier * if the instance name matches one in the training data * @return the token classifier's output */ public Classification classify(Instance instance, boolean useOutOfFold) { Object instName = instance.getName(); if (! useOutOfFold || ! m_table.containsKey(instName)) return m_tokenClassifier.classify(instance); Classifier classifier = (Classifier) m_table.get(instName); return classifier.classify(instance); }
public void testOne () { Iterator iter = new PatternMatchIterator( data, Pattern.compile("<p>(.+?)</p>", Pattern.DOTALL)); int i=0; while (iter.hasNext()) { Instance inst = (Instance) iter.next(); System.out.println( inst.getName() + " : " + inst.getData() ); if (i++==0) assertTrue (inst.getData().equals("Inside inside inside")); else assertTrue (inst.getData().equals("inside\ninside")); } }
private static void printClusterList (InstanceList clusters) { for (InstanceList.Iterator it = clusters.iterator(); it.hasNext();) { Instance instance = (Instance) it.next(); InstanceList thisCluster = (InstanceList) instance.getData(); System.out.println("\n\nCLUSTER *** " + instance.getName()); for (InstanceList.Iterator it2 = thisCluster.iterator(); it2.hasNext();) { Instance inner = (Instance) it2.next(); String targetString = "<null>"; if (inner.getTarget() != null) targetString = inner.getTarget().toString(); System.out.println("name: " + inner.getName() + "\ninput: " + inner.getData().toString() + "\ntarget: " + targetString); } } }
protected static void writeOutput (Transducer crf, InstanceList testing, String num) throws IOException { File resultsFile = new File (outputPrefix.value, "correct"+num+".txt"); File failuresFile = new File (outputPrefix.value, "failures"+num+".txt"); PrintStream out = new PrintStream (new FileOutputStream (resultsFile)); PrintStream outf = new PrintStream (new FileOutputStream (failuresFile)); for (int i = 0; i < testing.size(); i++) { Instance instance = testing.getInstance(i); Sequence input = (Sequence) instance.getData(); Sequence trueOutput = (Sequence) instance.getTarget(); assert (input.size() == trueOutput.size()); Sequence predOutput = crf.viterbiPath(input).output(); assert (predOutput.size() == trueOutput.size()); boolean hadError = false; for (int j = 0; j < input.size(); j++) { if (predOutput.get (j).toString() != trueOutput.get(j).toString ()) { hadError = true; break; } } if (hadError) { printSequence (outf, instance.getName().toString(), input, trueOutput, predOutput); } else { printSequence (out, instance.getName().toString(), input, trueOutput, predOutput); } out.println(); } out.close (); }
private static void printTrialClassification(Trial trial) { ArrayList classifications = trial.toArrayList(); for (int i = 0; i < classifications.size(); i++) { Instance instance = trial.getClassification(i).getInstance(); System.out.print(instance.getName() + " " + instance.getTarget() + " "); Labeling labeling = trial.getClassification(i).getLabeling(); for (int j = 0; j < labeling.numLocations(); j++){ System.out.print(labeling.getLabelAtRank(j).toString() + ":" + labeling.getValueAtRank(j) + " "); } System.out.println(); } }
public Instance pipe (Instance carrier) { TokenSequence toks = (TokenSequence) carrier.getData(); InstanceList cluster = segmentation.getCluster(carrier); for (InstanceList.Iterator it = cluster.iterator(); it.hasNext();) { Instance other = (Instance) it.next(); if (!other.getName().equals(carrier.getName())) { Segmentation otherSeg = segmentation.getSegmentation(other); for (int i = 0; i < toks.size(); i++) { Token token = toks.getToken(i); String text = token.getText(); if (!EXCLUDE.matcher(text).matches()) { String[] fields = otherSeg.fieldNamesForWord(text); for (int j = 0; j < fields.length; j++) { String fieldName = fields[j]; String featureName = ("TAGGED_AS_" + fieldName).intern(); token.setFeatureValue(featureName, 1.0); } } } } } return carrier; }
public Instance pipe (Instance carrier) { TokenSequence toks = (TokenSequence) carrier.getData(); InstanceList cluster = segmentation.getCluster(carrier); for (InstanceList.Iterator it = cluster.iterator(); it.hasNext();) { Instance other = (Instance) it.next(); if (!other.getName().equals(carrier.getName())) { Segmentation otherSeg = segmentation.getSegmentation(other); for (int i = 0; i < toks.size(); i++) { Token token = toks.getToken(i); String text = token.getText(); if (!EXCLUDE.matcher(text).matches()) { String[] fields = otherSeg.fieldNamesForWord(text); for (int j = 0; j < fields.length; j++) { String fieldName = fields[j]; String featureName = ("TAGGED_AS_" + fieldName).intern(); if (token.getFeatureValue(featureName) == 0.0) { token.setFeatureValue(featureName, 1.0); } } } } } } return carrier; }
/** * make an AllClusterSegmentation from the true segmentation * of an instancelist * * @param clusterlist */ public AllClusterSegmentation (InstanceList clusterlist, Pipe pipe) { InstanceList instances = new InstanceList(pipe); instances.add(new ClusterListIterator(clusterlist)); for (InstanceList.Iterator it = instances.iterator(); it.hasNext();) { Instance instance = (Instance) it.next(); TokenSequence input = (TokenSequence) instance.getData(); Sequence output = (Sequence) instance.getTarget(); Segmentation seg = new Segmentation(input, output); inst2segmentation.put(instance.getName(), seg); // System.out.println("Instance " + instance.getName() + " source: " + instance.getSource()); inst2cluster.put(instance.getName(), (InstanceList) instance.getProperty("CLUSTER")); } }
public Instance pipe (Instance carrier) { TokenSequence toks = (TokenSequence) carrier.getData(); InstanceList cluster = segmentation.getCluster(carrier); for (InstanceList.Iterator it = cluster.iterator(); it.hasNext();) { Instance other = (Instance) it.next(); String prefix; if (other.getName().equals(carrier.getName())) { prefix = "I_AM_TAGGED_AS_"; } else { prefix = "TAGGED_AS_"; } Segmentation otherSeg = segmentation.getSegmentation(other); for (int i = 0; i < toks.size(); i++) { Token token = toks.getToken(i); String text = token.getText(); if (!EXCLUDE.matcher(text).matches()) { String[] fields = otherSeg.fieldNamesForWord(text); for (int j = 0; j < fields.length; j++) { String fieldName = fields[j]; String featureName = (prefix + fieldName).intern(); if (token.getFeatureValue(featureName) == 0.0) { token.setFeatureValue(featureName, 1.0); } } } } } return carrier; }
if (prefix != null) sb.append (prefix); sb.append ("name: "+carrier.getName()+"\n"); for (int i = 0; i < ts.size(); i++) { if (source != null) {
public Instance nextInstance () { final Instance instance = iter.nextInstance (); Instance ret = new Instance (instance.getData(), instance.getTarget(), instance.getName(), instance.getSource()); ret.setPropertyList (instance.getPropertyList ()); return ret; }
m_table.put(inst.getName(), foldClassifier);
/** * * @param inst input instance, with FeatureVectorSequence as data. * @param alphabetsPipe a Noop pipe containing the data and target alphabets for * the resulting InstanceList and AugmentableFeatureVectors * @return list of instances, each with one AugmentableFeatureVector as data */ public static InstanceList convert(Instance inst, Noop alphabetsPipe) { InstanceList ret = new InstanceList(alphabetsPipe); Object obj = inst.getData(); assert(obj instanceof FeatureVectorSequence); FeatureVectorSequence fvs = (FeatureVectorSequence) obj; LabelSequence ls = (LabelSequence) inst.getTarget(); assert(fvs.size() == ls.size()); Object instName = (inst.getName() == null ? "NONAME" : inst.getName()); for (int j = 0; j < fvs.size(); j++) { FeatureVector fv = fvs.getFeatureVector(j); int[] indices = fv.getIndices(); FeatureVector data = new AugmentableFeatureVector (alphabetsPipe.getDataAlphabet(), indices, fv.getValues(), indices.length); Labeling target = ls.getLabelAtPosition(j); String name = instName.toString() + "_@_POS_" + (j + 1); Object source = inst.getSource(); Instance toAdd = new Instance(data, target, name, source, alphabetsPipe); ret.add(toAdd); } return ret; }
System.out.print(inst.getName() + " " + inst.getTarget()); if (formatString.charAt(0) == 'a'){ // all features for (int fvi=0; fvi<numFeatures; fvi++){
for (int i = 0; i < t.size(); i++) { Classification classification = t.getClassification(i); confidencePredictionTraining.add (classification, null, classification.getInstance().getName(), classification.getInstance().getSource());
public static PipeOutputAccumulator iteratePipe (Pipe iteratedPipe, PipeOutputAccumulator accumulator, Instance carrier) { PipeInputIterator iter = (PipeInputIterator) carrier.getData(); iter.setParentInstance (carrier); while (iter.hasNext()) { // Make sure that instance.pipe field gets set when piping instance. Instance subInstance = iter.nextInstance(); Instance pipedInstance = new Instance (subInstance.getData (), subInstance.getTarget (), subInstance.getName (), subInstance.getSource (), iteratedPipe); accumulator.pipeOutputAccumulate (pipedInstance, iteratedPipe); } return accumulator; }
public Extraction extract (PipeInputIterator source) { Extraction extraction = new Extraction (this, getTargetAlphabet ()); // Put all the instances through both pipes, then get viterbi path InstanceList tokedList = new InstanceList (tokenizationPipe); tokedList.add (source); InstanceList pipedList = new InstanceList (getFeaturePipe ()); pipedList.add (new InstanceListIterator (tokedList)); InstanceList.Iterator it1 = tokedList.iterator (); InstanceList.Iterator it2 = pipedList.iterator (); while (it1.hasNext()) { Instance toked = it1.nextInstance(); Instance piped = it2.nextInstance (); Tokenization tok = (Tokenization) toked.getData(); String name = piped.getName().toString(); Sequence input = (Sequence) piped.getData (); Sequence target = (Sequence) piped.getTarget (); Sequence output = crf.transduce (input); DocumentExtraction docseq = new DocumentExtraction (name, getTargetAlphabet (), tok, output, target, backgroundTag, filter); extraction.addDocumentExtraction (docseq); } return extraction; }