/** * generates the XML structure for the header */ protected void headerToXML() { Element root; Element node; Element child; int i; root = m_Document.getDocumentElement(); root.setAttribute(ATT_NAME, validContent(m_Instances.relationName())); root.setAttribute(ATT_VERSION, Version.VERSION); // create "header" node node = m_Document.createElement(TAG_HEADER); root.appendChild(node); // add all attributes child = m_Document.createElement(TAG_ATTRIBUTES); node.appendChild(child); for (i = 0; i < m_Instances.numAttributes(); i++) { addAttribute(child, m_Instances.attribute(i)); } }
/** * Samoa instances information. * * @param instances the instances * @return the instances */ public Instances samoaInstancesInformation(weka.core.Instances instances) { Instances samoaInstances; List<Attribute> attInfo = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { attInfo.add(samoaAttribute(i, instances.attribute(i))); } samoaInstances = new Instances(instances.relationName(), attInfo, 0); samoaInstances.setClassIndex(instances.classIndex()); return samoaInstances; }
/** * Tells the panel to use a new set of instances. * * @param inst a set of Instances */ public void setInstances(Instances inst) { m_Instances = inst; m_RelationNameLab.setText(m_Instances.relationName()); m_RelationNameLab.setToolTipText(m_Instances.relationName()); m_NumInstancesLab .setText("" + ((m_showZeroInstancesAsUnknown && m_Instances.numInstances() == 0) ? "?" : "" + m_Instances.numInstances())); m_NumAttributesLab.setText("" + m_Instances.numAttributes()); m_sumOfWeightsLab .setText("" + ((m_showZeroInstancesAsUnknown && m_Instances.numInstances() == 0) ? "?" : "" + Utils.doubleToString(m_Instances.sumOfWeights(), 3))); }
/** * Sets up the structure for the plot instances. */ @Override protected void determineFormat() { int numClusters; ArrayList<Attribute> hv; Attribute predictedCluster; ArrayList<String> clustVals; int i; numClusters = m_Evaluation.getNumClusters(); hv = new ArrayList<Attribute>(); clustVals = new ArrayList<String>(); for (i = 0; i < numClusters; i++) { clustVals.add("cluster" + /* (i+1) */i); } predictedCluster = new Attribute("Cluster", clustVals); for (i = 0; i < m_Instances.numAttributes(); i++) { hv.add((Attribute) m_Instances.attribute(i).copy()); } hv.add(predictedCluster); m_PlotInstances = new Instances(m_Instances.relationName() + "_clustered", hv, m_Instances.numInstances()); }
/** * Applies a filter to reduce the dimension of attributes and reorders them to be used within * Meka * * @param trainData * @param removeFilter * @return a dataset to be used with Meka * @throws Exception */ public static Instances applyAttributeSelectionFilter(Instances trainData, Remove removeFilter) throws Exception { Instances filtered = Filter.useFilter(trainData, removeFilter); filtered.setClassIndex(trainData.classIndex()); // swap attributes to fit MEKA MekaClassAttributes attFilter = new MekaClassAttributes(); attFilter.setAttributeIndices(filtered.numAttributes() - trainData.classIndex() + 1 + "-last"); attFilter.setInputFormat(filtered); filtered = Filter.useFilter(filtered, attFilter); int newClassindex = filtered.classIndex(); filtered.setRelationName(filtered.relationName().replaceAll("\\-C\\s[\\d]+", "-C " + newClassindex)); return filtered; } }
@Override public void run() { MultiLabelClassifier classifier; Result result; startBusy("Cross-validating..."); try { classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue(); log(OptionUtils.toCommandLine(classifier)); log("Dataset: " + data.relationName()); log("Class-index: " + data.classIndex()); result = Evaluation.cvModel(classifier, data, m_Folds, m_TOP, m_VOP); addResultToHistory( result, new Object[]{classifier, new Instances(data, 0)}, classifier.getClass().getName().replace("meka.classifiers.", "") ); finishBusy(); } catch (Exception e) { handleException("Evaluation failed:", e); finishBusy("Evaluation failed: " + e); JOptionPane.showMessageDialog( ClassifyTab.this, "Evaluation failed (CV):\n" + e, "Error", JOptionPane.ERROR_MESSAGE); } } };
private Instances makeDataSetProbabilities(Instances insts, Instances format, weka.classifiers.Classifier classifier, String relationNameModifier) throws Exception { // adjust structure for InputMappedClassifier (if necessary) if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { format = ((weka.classifiers.misc.InputMappedClassifier) classifier) .getModelHeader(new Instances(format, 0)); } String classifierName = classifier.getClass().getName(); classifierName = classifierName.substring(classifierName.lastIndexOf('.') + 1, classifierName.length()); Instances newInstances = new Instances(insts); for (int i = 0; i < format.classAttribute().numValues(); i++) { weka.filters.unsupervised.attribute.Add addF = new weka.filters.unsupervised.attribute.Add(); addF.setAttributeIndex("last"); addF.setAttributeName(classifierName + "_prob_" + format.classAttribute().value(i)); addF.setInputFormat(newInstances); newInstances = weka.filters.Filter.useFilter(newInstances, addF); } newInstances.setRelationName(insts.relationName() + relationNameModifier); return newInstances; }
/** * Adds an instance number attribute to the plottable instances, */ public void addInstanceNumberAttribute() { String originalRelationName = m_plotInstances.relationName(); int originalClassIndex = m_plotInstances.classIndex(); try { Add addF = new Add(); addF.setAttributeName("Instance_number"); addF.setAttributeIndex("first"); addF.setInputFormat(m_plotInstances); m_plotInstances = Filter.useFilter(m_plotInstances, addF); m_plotInstances.setClassIndex(originalClassIndex + 1); for (int i = 0; i < m_plotInstances.numInstances(); i++) { m_plotInstances.instance(i).setValue(0, i); } m_plotInstances.setRelationName(originalRelationName); } catch (Exception ex) { ex.printStackTrace(); } }
/** * Gets the index of the instance with the closest threshold value to the * desired target * * @param tcurve a set of instances that have been generated by this class * @param threshold the target threshold * @return the index of the instance that has threshold closest to the target, * or -1 if this could not be found (i.e. no data, or bad threshold * target) */ public static int getThresholdInstance(Instances tcurve, double threshold) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return -1; } if (tcurve.numInstances() == 1) { return 0; } double[] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int[] sorted = Utils.sort(tvals); return binarySearch(sorted, tvals, threshold); }
/** * Calculates the area under the precision-recall curve (AUPRC). * * @param tcurve a previously extracted threshold curve Instances. * @return the PRC area, or Double.NaN if you don't pass in a ThresholdCurve * generated Instances. */ public static double getPRCArea(Instances tcurve) { final int n = tcurve.numInstances(); if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) { return Double.NaN; } final int pInd = tcurve.attribute(PRECISION_NAME).index(); final int rInd = tcurve.attribute(RECALL_NAME).index(); final double[] pVals = tcurve.attributeToDoubleArray(pInd); final double[] rVals = tcurve.attributeToDoubleArray(rInd); double area = 0; double xlast = rVals[n - 1]; // start from the first real p/r pair (not the artificial zero point) for (int i = n - 2; i >= 0; i--) { double recallDelta = rVals[i] - xlast; area += (pVals[i] * recallDelta); xlast = rVals[i]; } if (area == 0) { return Utils.missingValue(); } return area; }
/** * Gets the current settings of the C45Saver object. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> options = new Vector<String>(); if (retrieveFile() != null) { options.add("-o"); options.add("" + retrieveFile()); } else { options.add("-o"); options.add(""); } if (getInstances() != null) { options.add("-i"); options.add("" + getInstances().relationName()); options.add("-c"); options.add("" + getInstances().classIndex()); } else { options.add("-i"); options.add(""); options.add("-c"); options.add(""); } Collections.addAll(options, super.getOptions()); return options.toArray(new String[0]); }
/** * Sets up the structure for the plot instances. */ @Override protected void determineFormat() { int numClusters; ArrayList<Attribute> hv; Attribute predictedCluster; ArrayList<String> clustVals; int i; numClusters = m_Evaluation.getNumClusters(); hv = new ArrayList<Attribute>(); clustVals = new ArrayList<String>(); for (i = 0; i < numClusters; i++) { clustVals.add("cluster" + /* (i+1) */i); } predictedCluster = new Attribute("Cluster", clustVals); for (i = 0; i < m_Instances.numAttributes(); i++) { hv.add((Attribute) m_Instances.attribute(i).copy()); } hv.add(predictedCluster); m_PlotInstances = new Instances(m_Instances.relationName() + "_clustered", hv, m_Instances.numInstances()); }
/** * displays some properties of the instances */ public void showProperties() { ArffPanel panel; ListSelectorDialog dialog; Vector<String> props; Instances inst; panel = getCurrentPanel(); if (panel == null) { return; } inst = panel.getInstances(); if (inst == null) { return; } if (inst.classIndex() < 0) { inst.setClassIndex(inst.numAttributes() - 1); } // get some data props = new Vector<String>(); props.add("Filename: " + panel.getFilename()); props.add("Relation name: " + inst.relationName()); props.add("# of instances: " + inst.numInstances()); props.add("# of attributes: " + inst.numAttributes()); props.add("Class attribute: " + inst.classAttribute().name()); props.add("# of class labels: " + inst.numClasses()); dialog = new ListSelectorDialog(getParentFrame(), new JList(props)); dialog.showDialog(); }
/** * generates the XML structure for the header */ protected void headerToXML() { Element root; Element node; Element child; int i; root = m_Document.getDocumentElement(); root.setAttribute(ATT_NAME, validContent(m_Instances.relationName())); root.setAttribute(ATT_VERSION, Version.VERSION); // create "header" node node = m_Document.createElement(TAG_HEADER); root.appendChild(node); // add all attributes child = m_Document.createElement(TAG_ATTRIBUTES); node.appendChild(child); for (i = 0; i < m_Instances.numAttributes(); i++) { addAttribute(child, m_Instances.attribute(i)); } }
@Override public void run() { MultiLabelClassifier classifier; Result result; startBusy("Incremental..."); try { classifier = (MultiLabelClassifier) m_GenericObjectEditor.getValue(); log(OptionUtils.toCommandLine(classifier)); log("Dataset: " + data.relationName()); log("Class-index: " + data.classIndex()); result = IncrementalEvaluation.evaluateModelBatchWindow(classifier, data, m_Samples, 1., m_TOP, m_VOP); addResultToHistory( result, new Object[]{classifier, new Instances(data, 0)}, classifier.getClass().getName().replace("meka.classifiers.", "") ); finishBusy(); } catch (Exception e) { handleException("Evaluation failed (incremental splits):", e); finishBusy("Evaluation failed: " + e); JOptionPane.showMessageDialog( ClassifyTab.this, "Evaluation failed:\n" + e, "Error", JOptionPane.ERROR_MESSAGE); } } };
append("<html>\n"); append("<head>\n"); append("<title>Predictions for dataset " + sanitize(m_Header.relationName()) + "</title>\n"); append("</head>\n"); append("<body>\n"); append("<div align=\"center\">\n"); append("<h3>Predictions for dataset " + sanitize(m_Header.relationName()) + "</h3>\n"); append("<table border=\"1\">\n"); append("<tr>\n"); append("<td>"); boolean first = true; for (int i = 0; i < m_Header.numAttributes(); i++) { if (i == m_Header.classIndex()) continue; if (!first) append("</td><td>"); append(sanitize(m_Header.attribute(i).name())); first = false;
/** * Tells the panel to use a new set of instances. * * @param inst a set of Instances */ public void setInstances(Instances inst) { m_Instances = inst; m_RelationNameLab.setText(m_Instances.relationName()); m_RelationNameLab.setToolTipText(m_Instances.relationName()); m_NumInstancesLab .setText("" + ((m_showZeroInstancesAsUnknown && m_Instances.numInstances() == 0) ? "?" : "" + m_Instances.numInstances())); m_NumAttributesLab.setText("" + m_Instances.numAttributes()); m_sumOfWeightsLab .setText("" + ((m_showZeroInstancesAsUnknown && m_Instances.numInstances() == 0) ? "?" : "" + Utils.doubleToString(m_Instances.sumOfWeights(), 3))); }
private Instances makeDataSetProbabilities(Instances insts, Instances format, weka.classifiers.Classifier classifier, String relationNameModifier) throws Exception { // adjust structure for InputMappedClassifier (if necessary) if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) { format = ((weka.classifiers.misc.InputMappedClassifier) classifier) .getModelHeader(new Instances(format, 0)); } String classifierName = classifier.getClass().getName(); classifierName = classifierName.substring(classifierName.lastIndexOf('.') + 1, classifierName.length()); Instances newInstances = new Instances(insts); for (int i = 0; i < format.classAttribute().numValues(); i++) { weka.filters.unsupervised.attribute.Add addF = new weka.filters.unsupervised.attribute.Add(); addF.setAttributeIndex("last"); addF.setAttributeName(classifierName + "_prob_" + format.classAttribute().value(i)); addF.setInputFormat(newInstances); newInstances = weka.filters.Filter.useFilter(newInstances, addF); } newInstances.setRelationName(insts.relationName() + relationNameModifier); return newInstances; }
/** * Adds an instance number attribute to the plottable instances, */ public void addInstanceNumberAttribute() { String originalRelationName = m_plotInstances.relationName(); int originalClassIndex = m_plotInstances.classIndex(); try { Add addF = new Add(); addF.setAttributeName("Instance_number"); addF.setAttributeIndex("first"); addF.setInputFormat(m_plotInstances); m_plotInstances = Filter.useFilter(m_plotInstances, addF); m_plotInstances.setClassIndex(originalClassIndex + 1); for (int i = 0; i < m_plotInstances.numInstances(); i++) { m_plotInstances.instance(i).setValue(0, i); } m_plotInstances.setRelationName(originalRelationName); } catch (Exception ex) { ex.printStackTrace(); } }
/** * Gets the index of the instance with the closest threshold value to the * desired target * * @param tcurve a set of instances that have been generated by this class * @param threshold the target threshold * @return the index of the instance that has threshold closest to the target, * or -1 if this could not be found (i.e. no data, or bad threshold * target) */ public static int getThresholdInstance(Instances tcurve, double threshold) { if (!RELATION_NAME.equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return -1; } if (tcurve.numInstances() == 1) { return 0; } double[] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int[] sorted = Utils.sort(tvals); return binarySearch(sorted, tvals, threshold); }