/** * Add contents of an iterable (a collection or a DataBag) * * @param iterable a Collection or DataBag to add contents of */ public void addAll(Iterable<Tuple> iterable) { synchronized (mContents) { for (Tuple t : iterable) { add(t); } } }
@Override public void addAll(DataBag b) { addAll((Iterable<Tuple>) b); }
@Override public boolean equals(Object other) { return compareTo(other) == 0; }
if (other instanceof DataBag) { DataBag bOther = (DataBag) other; if (this.size() != bOther.size()) { if (this.size() > bOther.size()) return 1; else return -1; BagFactory factory = BagFactory.getInstance(); if (this.isSorted() || this.isDistinct()) { thisClone = this; } else { thisClone = factory.newSortedBag(null); Iterator<Tuple> i = iterator(); while (i.hasNext()) thisClone.add(i.next());
@SuppressWarnings("rawtypes") protected void incSpillCount(Enum counter) { incSpillCount(counter, 1); }
/** * Add a tuple to the bag. * @param t tuple to add. */ @Override public void add(Tuple t) { synchronized (mContents) { mSize++; mContents.add(t); } markSpillableIfNecessary(); }
@Override public int hashCode() { int hash = 1; Iterator<Tuple> i = iterator(); while (i.hasNext()) { // Use 37 because we want a prime, and tuple uses 31. hash = 37 * hash + i.next().hashCode(); } return hash; }
private long totalSizeFromAvgTupleSize(long avgTupleSize, int numInMem) { long used = avgTupleSize * numInMem; long mFields_size = roundToEight(4 + numInMem*4); /* mContents fixed + per entry */ // in java hotspot 32bit vm, there seems to be a minimum bag size of 188 bytes // some of the extra bytes is probably from a minimum size of this array list mFields_size = Math.max(40, mFields_size); // the fixed overhead for this object and other object variables = 84 bytes // 8 - object header // 4 + 8 + 8 - sampled + aggSampleTupleSize + mSize // 8 + 8 - mContents ref + mSpillFiles ref // 4 - spillableRegistered +4 instead of 1 to round it to eight // 36 - mContents fixed used += 84 + mFields_size; // add up overhead for mSpillFiles ArrayList, Object[] inside ArrayList, // object variable inside ArrayList and references to spill files if (mSpillFiles != null) { used += roundToEight(36 /* mSpillFiles fixed overhead*/ + mSpillFiles.size()*4); if(mSpillFiles.size() > 0){ //a rough estimate of memory used by each file entry // the auto generated files are likely to have same length long approx_per_entry_size = roundToEight(mSpillFiles.get(0).toString().length() * 2 + 38); used += mSpillFiles.size() * approx_per_entry_size; } } return used; }
/** * All bag implementations that can get big enough to be spilled * should call this method after every time they add an element. */ protected void markSpillableIfNecessary() { if (!spillableRegistered) { long estimate = getMemorySize(); if ( estimate >= SPILL_REGISTER_THRESHOLD) { SpillableMemoryManager.getInstance().registerSpillable(this); spillableRegistered = true; } } }
/** * Read a bag from disk. * @param in DataInput to read data from. * @throws IOException (passes it on from underlying calls). */ @Override public void readFields(DataInput in) throws IOException { long size = in.readLong(); for (long i = 0; i < size; i++) { try { Object o = sedes.readDatum(in); add((Tuple)o); } catch (ExecException ee) { throw ee; } } }
public void addAll(Collection<Tuple> c) { addAll((Iterable<Tuple>) c); }