/** * Fixes nominal label indices. Dl4j sorts them during training time. A mapping from weka-labels * resorted labels is stored in {@link this.labelsSortIndex}. * * @param j Original index * @param insts Test dataset * @return Remapped index if test dataset has nominal label. Else return {@code j} */ protected int fixLabelIndexIfNominal(int j, Instances insts) { if (insts.classAttribute().isNominal()) { return labelSortIndex[j]; } else { return j; } }
/** * Constructor. * * @param att the attribute that backs this item. * @param valueIndex the index of the value for this item. * @throws Exception if the backing attribute is not binary or unary. */ public BinaryItem(Attribute att, int valueIndex) throws Exception { super(att, valueIndex); if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) { throw new Exception("BinaryItem must be constructed using a nominal attribute" + " with at most 2 values!"); } }
/** * Updates the minimum, maximum, sum, sumSquare values for all the attributes * * @param instance the new instance */ private void updateMinMax(Instance instance){ for (int j = 0; j < m_Train.numAttributes(); j++) { if(m_Train.classIndex() == j || m_Train.attribute(j).isNominal()) continue; if (instance.value(j) < m_MinArray[j]) m_MinArray[j] = instance.value(j); if (instance.value(j) > m_MaxArray[j]) m_MaxArray[j] = instance.value(j); } }
/** * Returns the minsAndMaxs of the index.th subset. */ public final double[][] minsAndMaxs(Instances data, double[][] minsAndMaxs, int index) { double[][] newMinsAndMaxs = new double[data.numAttributes()][2]; for (int i = 0; i < data.numAttributes(); i++) { newMinsAndMaxs[i][0] = minsAndMaxs[i][0]; newMinsAndMaxs[i][1] = minsAndMaxs[i][1]; if (i == m_attIndex) { if (data.attribute(m_attIndex).isNominal()) { newMinsAndMaxs[m_attIndex][1] = 1; } else { newMinsAndMaxs[m_attIndex][1 - index] = m_splitPoint; } } } return newMinsAndMaxs; }
/** * Adds more colours to the colour list */ private void extendColourMap() { if (m_plotInstances.attribute(m_cIndex).isNominal()) { for (int i = m_colorList.size(); i < m_plotInstances.attribute(m_cIndex) .numValues(); i++) { Color pc = m_DefaultColors[i % 10]; int ija = i / 10; ija *= 2; for (int j = 0; j < ija; j++) { pc = pc.brighter(); } if (m_backgroundColor != null) { pc = Plot2D.checkAgainstBackground(pc, m_backgroundColor); } m_colorList.add(pc); } } }
@Override public MTask call() { for (int l = m_start; l < m_end; l++) { Instance in = m_inst.instance(l); for (int i = 0; i < m_num_clusters; i++) { for (int j = 0; j < m_num_attribs; j++) { if (m_inst.attribute(j).isNominal()) { m_taskModel[i][j].addValue(in.value(j), in.weight() * m_weights[l][i]); } else { m_taskModelNormal[i][j][0] += (in.value(j) * in.weight() * m_weights[l][i]); m_taskModelNormal[i][j][2] += in.weight() * m_weights[l][i]; m_taskModelNormal[i][j][1] += (in.value(j) * in.value(j) * in.weight() * m_weights[l][i]); } } } } // completedMTask(this, true); return this; } }
/** * Call this function to set What this end unit represents. * * @param input True if this unit is used for entering an attribute, False * if it's used for determining a class value. * @param val The attribute number or class type that this unit represents. * (for nominal attributes). */ public void setLink(boolean input, int val) throws Exception { m_input = input; if (input) { m_type = PURE_INPUT; } else { m_type = PURE_OUTPUT; } if (val < 0 || (input && val > m_instances.numAttributes()) || (!input && m_instances.classAttribute().isNominal() && val > m_instances .classAttribute().numValues())) { m_link = 0; } else { m_link = val; } }
/** * Make sure that the filter binarizes the index we specify. */ public void testSpecificIndex() { int att1 = m_Instances.attribute("NumericAtt1").index(); int att2 = m_Instances.attribute("NumericAtt2").index(); // Set the attribute index to point to NumericAtt1, so we expect that only this // attribute will be binarized. ((NumericToBinary)m_Filter).setAttributeIndices( String.valueOf(att1+1) ); Instances result = useFilter(); assertTrue("NumericAtt1 should be nominal", result.attribute(att1).isNominal()); assertTrue("NumericAtt2 should be numeric", result.attribute(att2).isNumeric()); }
@Override public List<String> getPredictionLabels() { if (m_modelHeader == null) { return null; } if (m_modelHeader.classAttribute().isNominal()) { if (m_predictionLabels == null) { m_predictionLabels = new ArrayList<String>(); for (int i = 0; i < m_modelHeader.classAttribute().numValues(); i++) { m_predictionLabels.add(m_modelHeader.classAttribute().value(i)); } } } return m_predictionLabels; }
@Override public Object getValue(int rowId, int col) { if (!isValueValid(rowId, col)) { return null; } Attribute att = instances.attribute(col); if (att.isNumeric()) { return instances.attributeToDoubleArray(col)[rowId]; } else if (att.isNominal() || att.isString()) { return instances.instance(rowId).stringValue(col); } else if (att.isDate()) { double dateValue = instances.attributeToDoubleArray(col)[rowId]; return att.formatDate(dateValue); } return null; }
/** * Determines the corresponding Obvious class for a Weka instances. * @param att * @return */ private Class<?> checkClass(Attribute att) { if (att.isDate()) { return Date.class; } else if (att.isNumeric()) { return Number.class; } else if (att.isNominal() || att.isString()) { return String.class; } return String.class; }
/** * IsMT - see if dataset D is multi-dimensional (else only multi-label) * @param D data * @return true iff D is multi-dimensional only (else false) */ public static boolean isMT(Instances D) { int L = D.classIndex(); for(int j = 0; j < L; j++) { if (D.attribute(j).isNominal()) { if (D.attribute(j).numValues() > 2) { return true; } } else { System.err.println("wtf?"); } } return false; }
/** Need to remove non-nominal attributes, set class index */ protected void setUp() throws Exception { super.setUp(); // class index m_Instances.setClassIndex(1); // only nominal attributes int i = 0; while (i < m_Instances.numAttributes()) { if (!m_Instances.attribute(i).isNominal()) m_Instances.deleteAttributeAt(i); else i++; } m_Comparator = new InstanceComparator(true); }
/** * New probability estimators for an iteration */ private void new_estimators() { for (int i = 0; i < m_num_clusters; i++) { for (int j = 0; j < m_num_attribs; j++) { if (m_theInstances.attribute(j).isNominal()) { m_modelPrev[i][j] = m_model[i][j]; m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j) .numValues(), true); } else { m_modelNormalPrev[i][j][0] = m_modelNormal[i][j][0]; m_modelNormalPrev[i][j][1] = m_modelNormal[i][j][1]; m_modelNormalPrev[i][j][2] = m_modelNormal[i][j][2]; m_modelNormal[i][j][0] = m_modelNormal[i][j][1] = m_modelNormal[i][j][2] = 0.0; } } } }
/** * Sorts the instances based on an attribute, using a stable sort. For numeric attributes, * instances are sorted in ascending order. For nominal attributes, instances * are sorted based on the attribute label ordering specified in the header. * Instances with missing values for the attribute are placed at the end of * the dataset. * * @param attIndex the attribute's index (index starts with 0) */ public void stableSort(int attIndex) { if (!attribute(attIndex).isNominal()) { // Use quicksort from Utils class for sorting double[] vals = new double[numInstances()]; Instance[] backup = new Instance[vals.length]; for (int i = 0; i < vals.length; i++) { Instance inst = instance(i); backup[i] = inst; vals[i] = inst.value(attIndex); } int[] sortOrder = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { m_Instances.set(i, backup[sortOrder[i]]); } } else { sortBasedOnNominalAttribute(attIndex); } }
/** * Prints a classification. * * @param dist the class distribution * @return the classificationn as a string * @throws Exception if the classification can't be printed */ protected String printClass(double[] dist) throws Exception { StringBuffer text = new StringBuffer(); if (m_Instances.classAttribute().isNominal()) { text.append(m_Instances.classAttribute().value(Utils.maxIndex(dist))); } else { text.append(dist[0]); } return text.toString() + "\n"; }
/** * Call this function to set What this end unit represents. * * @param input True if this unit is used for entering an attribute, False * if it's used for determining a class value. * @param val The attribute number or class type that this unit represents. * (for nominal attributes). */ public void setLink(boolean input, int val) throws Exception { m_input = input; if (input) { m_type = PURE_INPUT; } else { m_type = PURE_OUTPUT; } if (val < 0 || (input && val > m_instances.numAttributes()) || (!input && m_instances.classAttribute().isNominal() && val > m_instances .classAttribute().numValues())) { m_link = 0; } else { m_link = val; } }
/** * Make sure that the filter binarizes the index we specify. */ public void testSpecificIndex() { int att1 = m_Instances.attribute("NumericAtt1").index(); int att2 = m_Instances.attribute("NumericAtt2").index(); // Set the attribute index to point to NumericAtt1, so we expect that only this // attribute will be binarized. ((NumericToBinary)m_Filter).setAttributeIndices( String.valueOf(att1+1) ); Instances result = useFilter(); assertTrue("NumericAtt1 should be nominal", result.attribute(att1).isNominal()); assertTrue("NumericAtt2 should be numeric", result.attribute(att2).isNumeric()); }