private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
DefaultDataBag bag = new DefaultDataBag(); bag.add(columnToTuple(entry.getValue(), cfInfo, parseType(cfDef.getComparator_type())));
DataOutputStream out = null; try { out = getSpillFile(); } catch (IOException ioe) { warn( "Unable to create tmp file to spill to disk", PigWarning.UNABLE_TO_CREATE_FILE_TO_SPILL, ioe); return 0; spilled++; if ((spilled & 0x3fff) == 0) reportProgress(); warn( "Unable to spill contents to disk", PigWarning.UNABLE_TO_SPILL, e); return 0; out.close(); } catch (IOException e) { warn("Error closing spill", PigWarning.UNABLE_TO_CLOSE_SPILL_FILE, e); incSpillCount(PigCounters.SPILLABLE_MEMORY_MANAGER_SPILL_COUNT); return spilled;
/** * This constructor creates a bag out of an existing list * of tuples by taking ownership of the list and NOT * copying the contents of the list. * @param listOfTuples List<Tuple> containing the tuples */ public DefaultDataBag(List<Tuple> listOfTuples) { mContents = listOfTuples; mSize = listOfTuples.size(); markSpillableIfNecessary(); }
ByteBuffer key = null; Tuple tuple = null; DefaultDataBag bag = new DefaultDataBag(); try bag.add(columnToTuple(entry.getValue(), cfInfo, parseType(cfDef.getComparator_type()))); for (Map.Entry<ByteBuffer, Cell> entry : lastRow.entrySet()) bag.add(columnToTuple(entry.getValue(), cfInfo, parseType(cfDef.getComparator_type()))); bag.add(columnToTuple(entry.getValue(), cfInfo, parseType(cfDef.getComparator_type()))); bag.add(columnToTuple(entry.getValue(), cfInfo, parseType(cfDef.getComparator_type())));
@Test public void testBagWithAnonTuple() { String expected = "{\"bag\":[[\"xxx\",\"yyy\"],[\"xxx\",\"yyy\"],[\"xxx\",\"yyy\"]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "xxx", "yyy" })); assertThat((pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(chararray, chararray)}")))), is(expected)); }
@Test public void testNamedBag() { String expected = "{\"bag\":[[\"one\",\"two\",\"three\"],[\"one\",\"two\",\"three\"],[\"one\",\"two\",\"three\"]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "one", "two", "three" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(first:chararray, second:chararray, third: chararray)}"))), is(expected)); }
@Test public void testNamedBag() { String expected = "{\"bag\":[[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]," + "[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]," + "[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "one", "two", "three" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(first:chararray, second:chararray, third: chararray)}"))), is(expected)); }
@Test public void testBagWithAnonTuple() { String expected = "{\"bag_0\":[[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]," + "[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]," + "[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "xxx", "yyy" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("{t:(chararray, chararray)}"))), is(expected)); }
/** * Get a default (unordered, not distinct) data bag. */ @Override public DataBag newDefaultBag() { DataBag b = new DefaultDataBag(); return b; }
/** * Get a default (unordered, not distinct) data bag from * an existing list of tuples. Note that the bag does NOT * copy the tuples but uses the provided list as its backing store. * So it takes ownership of the list. */ @Override public DataBag newDefaultBag(List<Tuple> listOfTuples) { DataBag b = new DefaultDataBag(listOfTuples); return b; }
DataBag ret = new DefaultDataBag(); String vals = raw.toString(); if (vals.length() <= 2) {
@Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) { return null; } try { List<Tuple> tuples = new ArrayList<Tuple>(); String terms = (String) input.get(0); for (String s : getStemmedPairs(terms)) { tuples.add(TupleFactory.getInstance().newTuple(s)); } return new DefaultDataBag(tuples); } catch (Exception e) { throw new IOException("Caught exception processing input row ", e); } }
@Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) { return null; } try { List<Tuple> tuples = new ArrayList<Tuple>(); String terms = (String) input.get(0); for (String s : getStemmedPairs(terms)) { tuples.add(TupleFactory.getInstance().newTuple(s)); } return new DefaultDataBag(tuples); } catch (Exception e) { throw new IOException("Caught exception processing input row ", e); } } }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private static DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
return new DefaultDataBag(alt); } catch (Exception e) { logger.error("Error in processing input row:", e);
DataBag db = new DefaultDataBag(); for (String s : ctgs) { db.add(TupleFactory.getInstance().newTuple(s));
private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }