public String toStringArff() { StringBuilder text = new StringBuilder(); text.append(ARFF_RELATION).append(" ") .append(Utils.quote(getRelationName())).append("\n\n"); for (int i = 0; i < numAttributes(); i++) { text.append(attribute(i).toString()).append("\n"); } text.append("\n").append(ARFF_DATA).append("\n"); text.append(toString()); return text.toString(); } }
/** * Completes the hashset with attributes indices. */ private void computeAttributesIndices() { this.hsAttributesIndices = new HashMap<String, Integer>(); // iterates through all existing attributes // and sets an unique identifier for each one of them for (int i = 0; i < this.numAttributes(); i++) { hsAttributesIndices.put(this.attribute(i).name(), i); } }
/** * Returns the dataset as a string in ARFF format. Strings are quoted if * they contain whitespace characters, or if they are a question mark. * * @return the dataset in ARFF format as a string */ public String toString() { StringBuffer text = new StringBuffer(); text.append(ARFF_RELATION).append(" "). append(Utils.quote(this.instanceInformation.getRelationName())).append("\n\n"); for (int i = 0; i < numAttributes(); i++) { text.append(attribute(i).toString()).append("\n"); } text.append("\n").append(ARFF_DATA).append("\n"); text.append(stringWithoutHeader()); return text.toString(); }
continue; result.append(m_headerInfo.attribute(w).name()).append("\t"); for (int c = 0; c < m_numClasses; c++) { double value = m_wordTotalForClass[c].getValue(w);
/** * Computes the utility of a single child with respect to this node * * @param child the child for which to compute the utility * @return the utility of the child with respect to this node * @throws Exception if something goes wrong */ protected double categoryUtilityChild(CNode child) {//throws Exception { double sum = 0; for (int i = 0; i < m_numAttributes; i++) { if (m_clusterInstances.attribute(i).isNominal()) { for (int j = 0; j < m_clusterInstances.attribute(i).numValues(); j++) { double x = child.getProbability(i, j); double y = getProbability(i, j); sum += (x * x) - (y * y); } } else { // numeric attribute sum += ((m_normal / child.getStandardDev(i)) - (m_normal / getStandardDev(i))); } } return (child.m_totalInstances / m_totalInstances) * sum; }
/** * Weka instances information. * * @param instances the instances * @return the weka.core. instances */ public weka.core.Instances wekaInstancesInformation(Instances instances) { weka.core.Instances wekaInstances; ArrayList<weka.core.Attribute> attInfo = new ArrayList<weka.core.Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { attInfo.add(wekaAttribute(i, instances.attribute(i))); } wekaInstances = new weka.core.Instances(instances.getRelationName(), attInfo, 0); if (instances.instanceInformation.numOutputAttributes() == 1){ wekaInstances.setClassIndex(instances.classIndex()); } else { //Assign a classIndex to a MultiLabel instance for compatibility reasons wekaInstances.setClassIndex(instances.instanceInformation.numOutputAttributes()-1); //instances.numAttributes()-1); //Last } //System.out.println(attInfo.get(3).name()); //System.out.println(attInfo.get(3).isNominal()); //System.out.println(wekaInstances.attribute(3).name()); //System.out.println(wekaInstances.attribute(3).isNominal()); return wekaInstances; }
public void initHeader(Instances dataset) { int numLabels = this.numOldLabelsOption.getValue(); Attribute target = dataset.classAttribute(); List<String> possibleValues = new ArrayList<String>(); int n = target.numValues(); for (int i = 0; i < n; i++) { possibleValues.add(target.value(i)); } ArrayList<Attribute> attrs = new ArrayList<Attribute>(numLabels + dataset.numAttributes()); for (int i = 0; i < numLabels; i++) { attrs.add(new Attribute(target.name() + "_" + i, possibleValues)); } for (int i = 0; i < dataset.numAttributes(); i++) { Attribute attr = dataset.attribute(i); Attribute newAttribute = null; if (attr.isNominal() == true) { newAttribute = new Attribute(attr.name(), attr.getAttributeValues()); } if (attr.isNumeric() == true) { newAttribute = new Attribute(attr.name()); } if (newAttribute != null) { attrs.add(newAttribute); } } this.header = new Instances("extended_" + dataset.getRelationName(), attrs, 0); this.header.setClassIndex(numLabels + dataset.classIndex()); }
for (int i = 0; i < m_numAttributes; i++) { m_attStats[i] = new AttributeStats(); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(i).numValues()]; } else { m_attStats[i].numericStats = new Stats(); if (!updateInstance.isMissing(i)) { double value = updateInstance.value(i); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance.weight())
if (m_Data.attribute(index).isNominal() == true){ if (isMissingValue(val1) || isMissingValue(val2) ||