/** * Creates a new dataset of the same size as this dataset using random sampling with * replacement. * * @param random a random number generator * @return the new dataset */ public Instances resample(Random random) { Instances newData = new Instances(this, numInstances()); while (newData.numInstances() < numInstances()) { newData.add(instance(random.nextInt(numInstances()))); } return newData; }
/** * Add the supplied instances to the training header * * @param toAdd the instances to add */ public void addToTrainingHeader(Instances toAdd) { for (int i = 0; i < toAdd.numInstances(); i++) { m_trainingHeader.add(toAdd.instance(i)); } }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
/** * jPMF - Joint PMF. * @return the joint PMF of the j-th and k-th labels in D. */ public static double[][] jPMF(Instances D, int j, int k) { double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()]; int N = D.numInstances(); for(int i = 0; i < N; i++) { int v_j = (int)Math.round(D.instance(i).value(j)); int v_k = (int)Math.round(D.instance(i).value(k)); JOINT[v_j][v_k] += (1.0 / (double)N); } return JOINT; }
public void testFloor() { m_Filter = getFilter("floor(a6+a3/5)"); Instances result = useFilter(); for (int i = 0; i < result.numInstances(); i++) { Instance inst = result.instance(i); assertEquals("Instance " + (i + 1), Math.floor(inst.value(5) + inst.value(2)/5), inst.value(inst.numAttributes() - 1), EXPR_DELTA); } }
/** * Tests default setup. */ public void testTypical() { Instances result = useFilter(); // Number of attributes and instances shouldn't change assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); Attribute mergedAtt = result.attribute(4); // All values should be merged for this attribute assertTrue("Attribute 5 has all values merged in result", mergedAtt .value(0).equals("a_or_b_or_c_or_d")); }
/** * bag class for getting the result of the loaded classifier */ private static class LoadedClassifier { private AbstractClassifier newClassifier = null; private Instances newHeader = null; }
public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD(D_test, result, MDType); }
/** * performs a typical test */ public void testTypical() { Instances icopy = new Instances(m_Instances); m_Filter = getFilter(); Instances result = useFilter(); assertEquals(result.numAttributes(), icopy.numInstances() + 1); }
public void testPruneMinFreq() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // min freq of 1 should keep all terms assertEquals(15, consolidated.size()); }
public static final String toDebugString(Instances D) { int L = D.classIndex(); StringBuilder sb = new StringBuilder(); sb.append("D="+D.numInstances()); sb.append(" L="+L+" {"); for(int j = 0; j < L; j++) { sb.append(D.attribute(j).name()+" "); } sb.append("}"); return sb.toString(); }
/** * jPMF - Joint PMF. * @return the joint PMF of the j-th and k-th labels in D. */ public static double[][] jPMF(Instances D, int j, int k) { double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()]; int N = D.numInstances(); for(int i = 0; i < N; i++) { int v_j = (int)Math.round(D.instance(i).value(j)); int v_k = (int)Math.round(D.instance(i).value(k)); JOINT[v_j][v_k] += (1.0 / (double)N); } return JOINT; }
/** * Creates a new dataset of the same size as this dataset using random sampling with * replacement. * * @param random a random number generator * @return the new dataset */ public Instances resample(Random random) { Instances newData = new Instances(this, numInstances()); while (newData.numInstances() < numInstances()) { newData.add(instance(random.nextInt(numInstances()))); } return newData; }
public void testExp() { m_Filter = getFilter("exp(a6-a3)"); Instances result = useFilter(); for (int i = 0; i < result.numInstances(); i++) { Instance inst = result.instance(i); assertEquals("Instance " + (i + 1), Math.exp(inst.value(5) - inst.value(2)), inst.value(inst.numAttributes() - 1), EXPR_DELTA); } }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
/** * Test merging all labels. */ public void testMergeAll() { ((MergeManyValues)m_Filter).setMergeValueRange("first-last"); Instances result = useFilter(); // Number of attributes and instances shouldn't change assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); assertEquals(1, result.attribute(1).numValues()); }
/** * bag class for getting the result of the loaded classifier */ private static class LoadedClassifier { private AbstractClassifier newClassifier = null; private Instances newHeader = null; }
/** * Tests the partial copy of a dataset. * * @see Instances#Instances(Instances, int, int) */ public void testPartialCopy() { Instances data; data = new Instances(m_Instances, 0, m_Instances.numInstances()); assertEquals("# of instances differ", m_Instances.numInstances(), data.numInstances()); data = new Instances(m_Instances, 5, 10); assertEquals("# of instances differ", 10, data.numInstances()); }
/** * performs a typical test */ public void testTypical() { Instances icopy = new Instances(m_Instances); m_Filter = getFilter(); Instances result = useFilter(); assertEquals(result.numAttributes(), icopy.numInstances() + 1); }
public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }