public InstanceList pipeInstances (Iterator<Instance> source) { // I think that pipes should be associated neither with InstanceLists, nor // with Instances. -cas InstanceList toked = new InstanceList (tokenizationPipe); toked.addThruPipe (source); InstanceList piped = new InstanceList (getFeaturePipe ()); piped.addThruPipe (toked.iterator()); return piped; }
/** Return an list of instances with a particular label. */ public InstanceList getCluster(int label) { InstanceList cluster = new InstanceList(instances.getPipe()); for (int n=0 ; n<instances.size() ; n++) if (labels[n] == label) cluster.add(instances.get(n)); return cluster; }
public Alphabet[] getAlphabets () { return new Alphabet[] {getDataAlphabet(), getTargetAlphabet() }; }
public InstanceList subList (int start, int end) { InstanceList other = this.cloneEmpty(); for (int i = start; i < end; i++) { other.add (get (i)); } return other; }
public void testFixedNumLabels () throws IOException, ClassNotFoundException { Pipe p = new GenericAcrfData2TokenSequence (2); InstanceList training = new InstanceList (p); training.addThruPipe (new LineGroupIterator (new StringReader (sampleFixedData), Pattern.compile ("^$"), true)); assertEquals (1, training.size ()); Instance inst1 = training.get (0); LabelsSequence ls1 = (LabelsSequence) inst1.getTarget (); assertEquals (4, ls1.size ()); }
public void testOne () { Pipe p = createPipe(); InstanceList ilist = new InstanceList (p); ilist.addThruPipe(new StringArrayIterator(data)); assertTrue (ilist.size() == 3); }
private InstanceList makeExamplesFromAligns(Collection<SWord> inputs) { Pipe pipe = makePipe(); int count = 0; InstanceList instances = new InstanceList(pipe); for (SWord word : inputs) { Instance ii = new Instance(word, null, null, null); instances.addThruPipe(ii); count += 1; } log.info("Read {} instances of training data for syll phone tag", count); return instances; }
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { InstanceList instances = new InstanceList(classifierModel.getInstancePipe()); instances.addThruPipe(new Instance(jCas.getDocumentText(), "", "from jcas", null)); Classification classify = classifierModel.classify(instances.get(0)); Metadata md = new Metadata(jCas); md.setKey(metadataKey); md.setValue(classify.getLabeling().getBestLabel().toString()); addToJCasIndex(md); }
public InstanceList subList (double proportion) { if (proportion > 1.0) throw new IllegalArgumentException ("proportion must by <= 1.0"); InstanceList other = (InstanceList) clone(); other.shuffle(new java.util.Random()); proportion *= other.size(); for (int i = 0; i < proportion; i++) other.add (get(i)); return other; }
public Sequence pipeInput (Object input) { InstanceList all = new InstanceList (getFeaturePipe ()); all.add (input, null, null, null); return (Sequence) all.get (0).getData(); } }
public InstanceList sampleWithReplacement (java.util.Random r, int numSamples) { InstanceList ret = this.cloneEmpty(); for (int i = 0; i < numSamples; i++) ret.add (this.get(r.nextInt(this.size()))); return ret; }
public LabelVector targetLabelDistribution () { if (this.size() == 0) return null; if (!(get(0).getTarget() instanceof Labeling)) throw new IllegalStateException ("Target is not a labeling."); double[] counts = new double[getTargetAlphabet().size()]; for (int i = 0; i < this.size(); i++) { Instance instance = get(i); Labeling l = (Labeling) instance.getTarget(); l.addTo (counts, getInstanceWeight(i)); } return new LabelVector ((LabelAlphabet)getTargetAlphabet(), counts); }
/** * * @param i * @param j * @return A new {@link InstanceList} containing the two argument {@link Instance}s. */ public static InstanceList makeList (Instance i, Instance j) { InstanceList list = new InstanceList(new Noop(i.getDataAlphabet(), i.getTargetAlphabet())); list.add(i); list.add(j); return list; }
public void setPerLabelFeatureSelection (FeatureSelection[] selectedFeatures) { if (selectedFeatures != null) { for (int i = 0; i < selectedFeatures.length; i++) if (selectedFeatures[i].getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); } perLabelFeatureSelection = selectedFeatures; }
/** Replaces the <code>Instance</code> at position <code>index</code> * with a new one. */ public void setInstance (int index, Instance instance) { assert (this.getDataAlphabet().equals(instance.getDataAlphabet())); assert (this.getTargetAlphabet().equals(instance.getTargetAlphabet())); this.set(index, instance); }
public BaggingClassifier train (InstanceList trainingList) { Classifier[] classifiers = new Classifier[numBags]; java.util.Random r = new java.util.Random (); for (int round = 0; round < numBags; round++) { InstanceList bag = trainingList.sampleWithReplacement (r, trainingList.size()); classifiers[round] = underlyingTrainer.newClassifierTrainer().train (bag); } this.classifier = new BaggingClassifier (trainingList.getPipe(), classifiers); return classifier; }
/** Adds the input instance to this list, after passing it through the * InstanceList's pipe. * <p> * If several instances are to be added then accumulate them in a List\<Instance\> * and use <tt>addThruPipe(Iterator<Instance>)</tt> instead. */ public void addThruPipe(Instance inst) { addThruPipe(new SingleInstanceIterator(inst)); }
public TokenClassifiers(ClassifierTrainer trainer, InstanceList trainList, int randSeed, int numCV) { super(trainList.getPipe()); m_trainer = trainer; m_randSeed = randSeed; m_numCV = numCV; m_table = new HashMap(); doTraining(trainList); }
public void testSetGetParameters () { MaxEntTrainer trainer = new MaxEntTrainer(); Alphabet fd = dictOfSize (6); String[] classNames = new String[] {"class0", "class1", "class2"}; InstanceList ilist = new InstanceList (new Randoms(1), fd, classNames, 20); Optimizable.ByGradientValue maxable = trainer.getOptimizable (ilist); TestOptimizable.testGetSetParameters (maxable); }