@Override public DataBag exec(Tuple input) throws IOException { return getDistinct(input); }
protected DataBag getDistinct(Tuple input) throws IOException { try { DataBag inputBg = (DataBag)input.get(0); DataBag result = createDataBag(); if (inputBg == null) { return result; } long progressCounter = 0; for (Tuple tuple : inputBg) { result.add(tuple); ++progressCounter; if ((progressCounter % 1000) == 0) { progress(); } } return result; } catch (ExecException e) { throw e; } }
static private DataBag getDistinctFromNestedBags(Tuple input, EvalFunc evalFunc) throws IOException { DataBag result = createDataBag(); long progressCounter = 0; try { DataBag bg = (DataBag)input.get(0); if (bg == null) { return result; } for (Tuple tuple : bg) { // Each tuple has a single column // which is a bag. Get tuples out of it // and distinct over all tuples for (Tuple t : (DataBag)tuple.get(0)) { result.add(t); ++progressCounter; if((progressCounter % 1000) == 0){ evalFunc.progress(); } } } } catch (ExecException e) { throw e; } return result; }