@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(jcasId); sb.append("-"); sb.append(sequenceId); sb.append(" - "); sb.append(sequencePosition); sb.append("\n"); for (Feature feature : getFeatures()) { sb.append(feature); sb.append("\n"); } sb.append(StringUtils.join(outcomes, "-")); return sb.toString(); }
Instance instance = new Instance(); if (addInstanceId) { instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId)); instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget)); instance.setOutcomes(getOutcomes(jcas, aTarget)); instance.setJcasId(jcasId); instance.setSequenceId(sequenceId); instance.setSequencePosition(aTarget.getId());
private String getId(Instance i) { int jcasId = i.getJcasId(); int sequenceId = i.getSequenceId(); return "" + jcasId + "_" + sequenceId; }
private List<Instance> enforceMatchingFeatures(List<Instance> instances) { if (!isTesting) { return instances; } List<Instance> out = new ArrayList<>(); for (Instance i : instances) { List<Feature> newFeatures = new ArrayList<>(); for (Feature feat : i.getFeatures()) { if (!featureMeta.getFeatureNames().contains(feat.getName())) { continue; } newFeatures.add(feat); } i.setFeatures(newFeatures); out.add(i); } return out; }
@Override public Instance getInstance(int i) { this.addingAnotherInstancesAllowed = false; List<Feature> features = new ArrayList<>(); // feature values of the required instance (mapping feature mame: featureValue) Map<String, Object> instanceFeatureValues = instanceList.get(i); for (Map.Entry<String, Object> entry : instanceFeatureValues.entrySet()) { Feature feature = new Feature(entry.getKey(), entry.getValue()); features.add(feature); } Instance result = new Instance(features, outcomeList.get(i)); result.setWeight(weightList.getDouble(i)); result.setJcasId(casIds.getInt(i)); result.setSequenceId(sequenceIds.getInt(i)); result.setSequencePosition(sequencePositions.getInt(i)); return result; }
sb.append(LabelSubstitutor.labelReplacement(i.getOutcome())); sb.append("\t"); for (Feature f : i.getFeatures()) { sb.append(f.getName() + "=" + f.getValue()); if (idx + 1 < i.getFeatures().size()) { sb.append("\t");
private Instance getSingleInstanceUnit(Instance anInstance, JCas aJCas, boolean supportsSparseFeature) throws Exception { int jcasId = JCasUtil.selectSingle(aJCas, JCasId.class).getId(); TextClassificationTarget unit = JCasUtil.selectSingle(aJCas, TextClassificationTarget.class); if (addInstanceId) { anInstance.addFeature(InstanceIdFeature.retrieve(aJCas, unit)); } for (FeatureExtractorResource_ImplBase featExt : featureExtractors) { if (supportsSparseFeature) { anInstance.addFeatures(getSparse(aJCas, unit, featExt)); } else { anInstance.addFeatures(getDense(aJCas, unit, featExt)); } anInstance.setOutcomes(getOutcomes(aJCas, unit)); anInstance.setWeight(getWeight(aJCas, unit)); anInstance.setJcasId(jcasId); } return anInstance; }
for (Feature feature : instance.getFeatures()) { String name = feature.getName(); Object value = feature.getValue(); this.outcomeList.add(instance.getOutcomes().toArray(new String[0])); this.weightList.add(instance.getWeight()); this.casIds.add(instance.getJcasId()); this.sequenceIds.add(instance.getSequenceId()); this.sequencePositions.add(instance.getSequencePosition());
for (Instance instance : instances) { if (instance.getSequenceId() != prevSeqId && prevSeqId != -1) { bw.write("\n"); for (Feature f : instance.getFeatures()) { bw.write(" "); bw.write(f.getName() + ":"
String outcome = inst.getOutcome(); if (isRegression) { wekaInstance.setClassValue(Double.parseDouble(outcome)); Double instanceWeight = inst.getWeight(); if (applyWeighting) { wekaInstance.setWeight(instanceWeight);
Instance[] ins = gson.fromJson(line, Instance[].class); for (Instance i : ins) { List<Integer> list = outcomeLineMap.get(i.getOutcome()); if (list == null) { list = new ArrayList<>(); outcomeLineMap.put(i.getOutcome(), list); for (Instance i : ins) { if (minOutcome.equals(i.getOutcome())) { writer.write(line + "\n"); lineId++; boolean write = outcomeLineMap.get(i.getOutcome()).contains(lineId); if (write) { writer.write(line + "\n");
@Override protected String injectSequenceId(Instance instance) { /* * The sequence id must continuously increase, TC's id is Cas-relative and restarts for a * new Cas at zero again */ if (lastId < 0) { lastId = instance.getJcasId(); } if (lastId > -1 && lastId != instance.getJcasId()) { currSeqId++; } return "qid:" + currSeqId + "\t"; }
@Override public int compare(Instance o1, Instance o2) { return Integer.compare(o1.getSequenceId(), o2.getSequenceId()); } });
public Instance getSingleInstance(JCas aJCas, boolean supportSparseFeatures) throws Exception { Instance instance = new Instance(); if (isDocumentMode()) { instance = getSingleInstanceDocument(instance, aJCas, supportSparseFeatures); } else if (isPairMode()) { instance = getSingleInstancePair(instance, aJCas); } else if (isUnitMode()) { instance = getSingleInstanceUnit(instance, aJCas, supportSparseFeatures); } return instance; }
sb.append(LabelSubstitutor.labelReplacement(i.getOutcome())); sb.append("\t"); for (Feature f : i.getFeatures()) { sb.append(f.getName() + "=" + f.getValue()); if (idx + 1 < i.getFeatures().size()) { sb.append("\t");
private Instance getSingleInstanceUnit(Instance anInstance, JCas aJCas, boolean supportsSparseFeature) throws Exception { int jcasId = JCasUtil.selectSingle(aJCas, JCasId.class).getId(); TextClassificationTarget unit = JCasUtil.selectSingle(aJCas, TextClassificationTarget.class); if (addInstanceId) { anInstance.addFeature(InstanceIdFeature.retrieve(aJCas, unit)); } for (FeatureExtractorResource_ImplBase featExt : featureExtractors) { if (supportsSparseFeature) { anInstance.addFeatures(getSparse(aJCas, unit, featExt)); } else { anInstance.addFeatures(getDense(aJCas, unit, featExt)); } anInstance.setOutcomes(getOutcomes(aJCas, unit)); anInstance.setWeight(getWeight(aJCas, unit)); anInstance.setJcasId(jcasId); } return anInstance; }
private List<Instance> enforceMatchingFeatures(List<Instance> instances) { if (!isTesting) { return instances; } List<Instance> out = new ArrayList<>(); for (Instance i : instances) { List<Feature> newFeatures = new ArrayList<>(); for (Feature feat : i.getFeatures()) { if (!featureMeta.getFeatureNames().contains(feat.getName())) { continue; } newFeatures.add(feat); } i.setFeatures(newFeatures); out.add(i); } return out; }
String outcome = inst.getOutcome(); if (isRegression) { wekaInstance.setClassValue(Double.parseDouble(outcome)); Double instanceWeight = inst.getWeight(); if (applyWeighting) { wekaInstance.setWeight(instanceWeight);
for (int i=0; i<store.getNumberOfInstances(); i++) { Instance instance = store.getInstance(i); String outcome = instance.getOutcome(); List<Integer> offsets; if (outcome2instanceOffset.containsKey(outcome)) {
@Override protected String injectSequenceId(Instance instance) { /* * The sequence id must continuously increase, TC's id is Cas-relative and restarts for a * new Cas at zero again */ if (lastId < 0) { lastId = instance.getJcasId(); } if (lastId > -1 && lastId != instance.getJcasId()) { currSeqId++; } return TAB + "qid:" + currSeqId; }