@Override public Object analyze(String q) { log.debug("String question: " + q); String[] split = q.split("\\s+"); int indexOfValue = attribute.indexOfValue(split[0]); if (indexOfValue < 0) { // catch the auxiliary verbs and commands as one nominal dimension if (split[0].matches(AuxVerb)) { indexOfValue = attribute.indexOfValue(AuxVerb); } else if (split[0].matches(Commands)) { indexOfValue = attribute.indexOfValue(Commands); } else { indexOfValue = attribute.indexOfValue("Misc"); } } return attribute.value(indexOfValue); }
/** * Renames the value of a nominal (or string) attribute value. This change * only affects this dataset. * * @param att the attribute * @param val the value * @param name the new name */ public void renameAttributeValue(Attribute att, String val, String name) { int v = att.indexOfValue(val); if (v == -1) { throw new IllegalArgumentException(val + " not found"); } renameAttributeValue(att.index(), v, name); }
/** * Renames the value of a nominal (or string) attribute value. This change * only affects this dataset. * * @param att the attribute * @param val the value * @param name the new name */ public void renameAttributeValue(Attribute att, String val, String name) { int v = att.indexOfValue(val); if (v == -1) { throw new IllegalArgumentException(val + " not found"); } renameAttributeValue(att.index(), v, name); }
if (classAtt == null || classAtt.indexOfValue(m_classLabel) < 0) { throw new Exception( "[ScoreDistribution] class attribute not set or class value " m_classLabelIndex = classAtt.indexOfValue(m_classLabel);
int numAttributes = trainHeader.numAttributes(); double[] vals = new double[numAttributes]; for (int i = 0; i < numAttributes - 1; i++) { Attribute attribute = trainHeader.attribute(i); //If your attribute is nominal or string: double value = attribute.indexOfValue(myStrVal); //get myStrVal from your source //If your attribute is numeric double value = myNumericVal; //get myNumericVal from your source vals[i] = value; } vals[numAttributes] = Instance.missingValue(); Instance instance = new Instance(1.0, vals); instance.setDataset(trainHeader); return instance;
/** * Apply the missing value treatment method for this field. * * @param value the incoming value to apply the treatment to * @return the value after applying the missing value treatment (if any) * @throws Exception if there is a problem */ public double applyMissingValueTreatment(double value) throws Exception { double newVal = value; if (m_missingValueTreatmentMethod != Missing.ASIS && Utils.isMissingValue(value)) { if (m_missingValueReplacementNominal != null) { Attribute att = m_miningSchemaI.attribute(m_index); int valIndex = att.indexOfValue(m_missingValueReplacementNominal); if (valIndex < 0) { throw new Exception("[MiningSchema] Nominal missing value replacement value doesn't " + "exist in the mining schema Instances!"); } newVal = valIndex; } else { newVal = m_missingValueReplacementNumeric; } } return newVal; }
/** * Apply the missing value treatment method for this field. * * @param value the incoming value to apply the treatment to * @return the value after applying the missing value treatment (if any) * @throws Exception if there is a problem */ public double applyMissingValueTreatment(double value) throws Exception { double newVal = value; if (m_missingValueTreatmentMethod != Missing.ASIS && Utils.isMissingValue(value)) { if (m_missingValueReplacementNominal != null) { Attribute att = m_miningSchemaI.attribute(m_index); int valIndex = att.indexOfValue(m_missingValueReplacementNominal); if (valIndex < 0) { throw new Exception("[MiningSchema] Nominal missing value replacement value doesn't " + "exist in the mining schema Instances!"); } newVal = valIndex; } else { newVal = m_missingValueReplacementNumeric; } } return newVal; }
/** * Creates a Weka Instance object * @param values feature vector * @param dataset example weka dataset for feature counts/types * @return */ public static Instance makeCompatible(Vector<String> values, Instances dataset) { Instance retInstance=new Instance(values.size()+1); // dataset knows about possible nominal values retInstance.setDataset(dataset); for (int i = 0; i < values.size(); i++) { // check for existence of nominal values, otherwise convert to "missing": unknown vocabulary as features if (values.get(i)==null) {retInstance.setMissing(i);} else if (dataset.attribute(i).indexOfValue(values.get(i))>=0) {// ((possValues[i]!=null)&&(possValues[i].containsKey(values[i]))) { retInstance.setValue(i, values.get(i)); } else {retInstance.setMissing(i);} } return retInstance; }
preds[classAtt.indexOfValue(m_scoreString)] = m_confidence; } else { preds[classAtt.indexOfValue(m_scoreString)] = 1.0;
@Override public double value(int attIndex) { Class<?> c = tuple.getSchema().getColumnType(attIndex); double value = -1; if (ObviousWekaUtils.isNumeric(c)) { return Double.valueOf(tuple.get(attIndex).toString()); } else if (ObviousWekaUtils.isString(c)) { return Double.valueOf(attribute(attIndex).indexOfValue( tuple.getString(attIndex))); } else if (ObviousWekaUtils.isDate(c)) { return tuple.getDate(attIndex).getTime(); } return value; }
/** * Generates a attribute evaluator. Has to initialize all fields of the * evaluator that are not being set via options. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); if (m_IRClassValS != null && m_IRClassValS.length() > 0) { // try to parse as a number first try { m_IRClassVal = Integer.parseInt(m_IRClassValS); // make zero-based m_IRClassVal--; } catch (NumberFormatException e) { // now try as a named class label m_IRClassVal = m_trainInstances.classAttribute().indexOfValue(m_IRClassValS); } } }
/** * Generates a attribute evaluator. Has to initialize all fields of the * evaluator that are not being set via options. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; m_classIndex = m_trainInstances.classIndex(); m_numAttribs = m_trainInstances.numAttributes(); if (m_IRClassValS != null && m_IRClassValS.length() > 0) { // try to parse as a number first try { m_IRClassVal = Integer.parseInt(m_IRClassValS); // make zero-based m_IRClassVal--; } catch (NumberFormatException e) { // now try as a named class label m_IRClassVal = m_trainInstances.classAttribute().indexOfValue(m_IRClassValS); } } }
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { double[] confidences = new double[numLabels]; boolean[] bipartition = new boolean[numLabels]; Instance newInstance = pt6Trans.transformInstance(instance); //calculate confidences //debug(instance.toString()); for (int i = 0; i < numLabels; i++) { newInstance.setDataset(transformed); newInstance.setValue(newInstance.numAttributes() - 2, instance.dataset().attribute(labelIndices[i]).name()); //debug(newInstance.toString()); double[] temp = baseClassifier.distributionForInstance(newInstance); //debug(temp.toString()); confidences[i] = temp[transformed.classAttribute().indexOfValue("1")]; //debug("" + confidences[i]); bipartition[i] = temp[transformed.classAttribute().indexOfValue("1")] >= temp[transformed.classAttribute().indexOfValue("0")] ? true : false; //debug("" + bipartition[i]); } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; } }
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { boolean[] bipartition = new boolean[numLabels]; double[] confidences = new double[numLabels]; Instance tempInstance = DataUtils.createInstance(instance, instance.weight(), instance.toDoubleArray()); for (int counter = 0; counter < numLabels; counter++) { double distribution[]; try { distribution = ensemble[counter].distributionForInstance(tempInstance); } catch (Exception e) { System.out.println(e); return null; } int maxIndex = (distribution[0] > distribution[1]) ? 0 : 1; // Ensure correct predictions both for class values {0,1} and {1,0} Attribute classAttribute = ensemble[counter].getFilter().getOutputFormat().classAttribute(); bipartition[chain[counter]] = (classAttribute.value(maxIndex).equals("1")) ? true : false; // The confidence of the label being equal to 1 confidences[chain[counter]] = distribution[classAttribute.indexOfValue("1")]; tempInstance.setValue(labelIndices[chain[counter]], maxIndex); } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; } }
/** * Generate an example of the dataset. * * @param format the dataset format * @param randomG the random number generator * @param stdDev the standard deviation to use * @param center the centers * @param cName the class value * @return the instance generated examples one by one is not possible, because * voting is chosen */ private Instance generateInstance(Instances format, Random randomG, double stdDev, double[] center, String cName) { Instance example; int numAtts = getNumAttributes(); if (getClassFlag()) { numAtts++; } double[] data = new double[numAtts]; for (int i = 0; i < getNumAttributes(); i++) { data[i] = randomG.nextGaussian() * stdDev + center[i]; } if (getClassFlag()) { data[format.classIndex()] = format.classAttribute().indexOfValue(cName); } example = new DenseInstance(1.0, data); example.setDataset(format); return example; }
/** * Generate an example of the dataset. * * @param format the dataset format * @param randomG the random number generator * @param stdDev the standard deviation to use * @param center the centers * @param cName the class value * @return the instance generated examples one by one is not possible, because * voting is chosen */ private Instance generateInstance(Instances format, Random randomG, double stdDev, double[] center, String cName) { Instance example; int numAtts = getNumAttributes(); if (getClassFlag()) { numAtts++; } double[] data = new double[numAtts]; for (int i = 0; i < getNumAttributes(); i++) { data[i] = randomG.nextGaussian() * stdDev + center[i]; } if (getClassFlag()) { data[format.classIndex()] = format.classAttribute().indexOfValue(cName); } example = new DenseInstance(1.0, data); example.setDataset(format); return example; }
@Override public double[] distributionForInstance(Instance x) throws Exception { //return mt.distributionForInstance(x); int L = x.classIndex(); double y[] = new double[L*2]; // Convert (x,y) to (x_,y_) int L_ = m_InstancesTemplate.classIndex(); // == L-NUM Instance x_ = MLUtils.setTemplate(x,f.getTemplate(),m_InstancesTemplate); // Get a classification y_ = h(x_) double y_[] = null; try { y_ = ((ProblemTransformationMethod)m_Classifier).distributionForInstance(x_); } catch(Exception e) { System.err.println("EXCEPTION !!! setting to "+Arrays.toString(y_)); return y; //e.printStackTrace(); //System.exit(1); } // For each super node ... for(int j = 0; j < L_; j++) { int idxs[] = SuperNodeFilter.decodeClasses(m_InstancesTemplate.attribute(j).name()); // 3,4 (partition) String vals[] = SuperNodeFilter.decodeValue(m_InstancesTemplate.attribute(j).value((int)Math.round(y_[j]))); // 1,0 (clases) for(int i = 0; i < idxs.length; i++) { y[idxs[i]] = x.dataset().attribute(idxs[i]).indexOfValue(vals[i]); // y_j = v y[idxs[i]+L] = y_[j+L_]; // P(Y_j = v), hence, MUST be a multi-target classifier } } return y; }
@Override public double[] distributionForInstance(Instance x) throws Exception { //return mt.distributionForInstance(x); int L = x.classIndex(); double y[] = new double[L*2]; // Convert (x,y) to (x_,y_) int L_ = m_InstancesTemplate.classIndex(); // == L-NUM Instance x_ = MLUtils.setTemplate(x,f.getTemplate(),m_InstancesTemplate); // Get a classification y_ = h(x_) double y_[] = null; try { y_ = ((ProblemTransformationMethod)m_Classifier).distributionForInstance(x_); } catch(Exception e) { System.err.println("EXCEPTION !!! setting to "+Arrays.toString(y_)); return y; //e.printStackTrace(); //System.exit(1); } // For each super node ... for(int j = 0; j < L_; j++) { int idxs[] = SuperNodeFilter.decodeClasses(m_InstancesTemplate.attribute(j).name()); // 3,4 (partition) String vals[] = SuperNodeFilter.decodeValue(m_InstancesTemplate.attribute(j).value((int)Math.round(y_[j]))); // 1,0 (clases) for(int i = 0; i < idxs.length; i++) { y[idxs[i]] = x.dataset().attribute(idxs[i]).indexOfValue(vals[i]); // y_j = v y[idxs[i]+L] = y_[j+L_]; // P(Y_j = v), hence, MUST be a multi-target classifier } } return y; }
/** * Input an instance for filtering. The instance is processed and made * available for output immediately. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input format has been set. */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Attribute att = outputFormatPeek().attribute(m_AttIndex.getIndex()); Instance newInstance = (Instance) instance.copy(); if (!instance.isMissing(m_AttIndex.getIndex())) { int index = att.indexOfValue(instance.stringValue(m_AttIndex.getIndex())); if (index == -1) { newInstance.setValue(m_AttIndex.getIndex(), att.indexOfValue(m_Label)); } else { newInstance.setValue(m_AttIndex.getIndex(), index); } } push(newInstance, false); // No need to copy instance return true; }
private Instance makeOutputInstance(Instances output, Instance source) { double[] newVals = new double[output.numAttributes()]; for (int i = 0; i < newVals.length; i++) { newVals[i] = Utils.missingValue(); } for (int i = 0; i < source.numAttributes(); i++) { if (!source.isMissing(i)) { Attribute s = source.attribute(i); int outputIndex = output.attribute(s.name()).index(); if (s.isNumeric()) { newVals[outputIndex] = source.value(s); } else if (s.isString()) { String sVal = source.stringValue(s); newVals[outputIndex] = output.attribute(outputIndex).addStringValue( sVal); } else if (s.isRelationValued()) { Instances rVal = source.relationalValue(s); newVals[outputIndex] = output.attribute(outputIndex) .addRelation(rVal); } else if (s.isNominal()) { String nomVal = source.stringValue(s); newVals[outputIndex] = output.attribute(outputIndex).indexOfValue( nomVal); } } } Instance newInst = new DenseInstance(source.weight(), newVals); newInst.setDataset(output); return newInst; }