private void generatePairs(ArrayList<Tuple> input, int start, int end, DataBag outputBag) throws ExecException { int count = 0; for (int i = start; (i + 1)<= end; i++) { Tuple elem1 = input.get(i); lookahead: for (int j = i+1; j <= i + lookahead_steps; j++) { if (j > end) break lookahead; Tuple elem2 = input.get(j); if (count >= SPILL_THRESHOLD) { outputBag.spill(); count = 0; } outputBag.add(tupleFactory.newTuple(Arrays.asList(elem1, elem2))); count ++; } } }
outputBag.spill(); cnt = 0;
@Override public void accumulate(Tuple arg0) throws IOException { DataBag inputBag = (DataBag)arg0.get(0); for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i++; count++; } }
public DataBag call(DataBag inputBag) throws IOException { DataBag outputBag = BagFactory.getInstance().newDefaultBag(); long i = start, count = 0; i = inputBag.size() - 1 + start; for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i--; count++; } return outputBag; }