/** * Main method for testing this class. * * @param argv should contain arguments to the filter: use -h for help */ public static void main(String[] argv) { runFilter(new ReservoirSample(), argv); } }
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isFirstBatchDone()) { // Do the subsample, and clear the input instances. createSubsample(); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
@ProgrammaticProperty public void setSeed(int seed) { setRandomSeed(seed); }
/** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { push(instance); return true; } else { // bufferInput(instance); if (!m_containsStringAtts) { copyValues(instance, false); } processInstance(instance); return false; } }
"Reservoir sampling requested, but no sample size set."); m_reservoir = new ReservoirSample(); m_reservoir.setSampleSize(getReservoirSampleSize()); m_reservoir.setRandomSeed(seed); try { m_reservoir.setInputFormat(m_trainingHeader); } catch (Exception e) { throw new DistributedWekaException(e);
setRandomSeed(Integer.parseInt(tmpStr)); } else { setRandomSeed(1); setSampleSize(Integer.parseInt(tmpStr)); } else { setSampleSize(100);
/** Creates a default ReservoirSample */ public Filter getFilter() { ReservoirSample r = new ReservoirSample(); return r; }
public void testSubSample() { m_Filter = getFilter(); ((ReservoirSample)m_Filter).setSampleSize(10); Instances result = useFilter(); assertEquals(result.numInstances(), 10); // instances should be different from the first 10 instances in // the original data boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Comparator.compare( m_Instances.instance(i), result.instance(i)) != 0) { equal = false; break; } } if (equal) { fail("Result should be different than the first 10 instances"); } }
m_reservoir.batchFinished(); while (m_reservoir.numPendingOutput() > 0) { m_trainingHeader.add(m_reservoir.output());
/** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> result = new Vector<String>(); result.add("-S"); result.add("" + getRandomSeed()); result.add("-Z"); result.add("" + getSampleSize()); return result.toArray(new String[result.size()]); }
if (m_containsStringAtts) { sb = new StringBuilder(); sb.append(getInputFormat().stringFreeStructure()).append("\n"); if (!m_containsStringAtts) { Instance copy = (Instance) ((Instance) m_subSample[i]).copy(); push(copy, false); // No need to copy instance } else { sb.append(m_subSample[i].toString()).append("\n"); Instances stringSample = new Instances(new StringReader(sb.toString())); for (int i = 0; i < stringSample.numInstances(); i++) { push(stringSample.instance(i), false);
@ProgrammaticProperty public int getSeed() { return getRandomSeed(); }
/** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined */ @Override public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { push(instance); return true; } else { // bufferInput(instance); if (!m_containsStringAtts) { copyValues(instance, false); } processInstance(instance); return false; } }
setRandomSeed(Integer.parseInt(tmpStr)); } else { setRandomSeed(1); setSampleSize(Integer.parseInt(tmpStr)); } else { setSampleSize(100);
/** Creates a default ReservoirSample */ public Filter getFilter() { ReservoirSample r = new ReservoirSample(); return r; }
public void testSubSample() { m_Filter = getFilter(); ((ReservoirSample)m_Filter).setSampleSize(10); Instances result = useFilter(); assertEquals(result.numInstances(), 10); // instances should be different from the first 10 instances in // the original data boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Comparator.compare( m_Instances.instance(i), result.instance(i)) != 0) { equal = false; break; } } if (equal) { fail("Result should be different than the first 10 instances"); } }
/** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> result = new Vector<String>(); result.add("-S"); result.add("" + getRandomSeed()); result.add("-Z"); result.add("" + getSampleSize()); return result.toArray(new String[result.size()]); }
if (m_containsStringAtts) { sb = new StringBuilder(); sb.append(getInputFormat().stringFreeStructure()).append("\n"); if (!m_containsStringAtts) { Instance copy = (Instance) ((Instance) m_subSample[i]).copy(); push(copy, false); // No need to copy instance } else { sb.append(m_subSample[i].toString()).append("\n"); Instances stringSample = new Instances(new StringReader(sb.toString())); for (int i = 0; i < stringSample.numInstances(); i++) { push(stringSample.instance(i), false);
@ProgrammaticProperty public int getSeed() { return getRandomSeed(); }
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isFirstBatchDone()) { // Do the subsample, and clear the input instances. createSubsample(); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }