private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
@Test public void testBagWithAnonTuple() { String expected = "{\"bag\":[[\"xxx\",\"yyy\"],[\"xxx\",\"yyy\"],[\"xxx\",\"yyy\"]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "xxx", "yyy" })); assertThat((pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(chararray, chararray)}")))), is(expected)); }
@Test public void testNamedBag() { String expected = "{\"bag\":[[\"one\",\"two\",\"three\"],[\"one\",\"two\",\"three\"],[\"one\",\"two\",\"three\"]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "one", "two", "three" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(first:chararray, second:chararray, third: chararray)}"))), is(expected)); }
@Test public void testNamedBag() { String expected = "{\"bag\":[[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]," + "[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]," + "[{\"first\":\"one\",\"second\":\"two\",\"third\":\"three\"}]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "one", "two", "three" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("bag: {t:(first:chararray, second:chararray, third: chararray)}"))), is(expected)); }
@Test public void testBagWithAnonTuple() { String expected = "{\"bag_0\":[[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]," + "[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]," + "[{\"val_0\":\"xxx\",\"val_1\":\"yyy\"}]]}"; Tuple tuple = TupleFactory.getInstance().newTuple(Arrays.asList(new String[] { "xxx", "yyy" })); assertThat(pigTypeToJson(createTuple(new DefaultDataBag(Arrays.asList(new Tuple[] { tuple, tuple, tuple })), createSchema("{t:(chararray, chararray)}"))), is(expected)); }
/** * Get a default (unordered, not distinct) data bag. */ @Override public DataBag newDefaultBag() { DataBag b = new DefaultDataBag(); return b; }
/** * Get a default (unordered, not distinct) data bag from * an existing list of tuples. Note that the bag does NOT * copy the tuples but uses the provided list as its backing store. * So it takes ownership of the list. */ @Override public DataBag newDefaultBag(List<Tuple> listOfTuples) { DataBag b = new DefaultDataBag(listOfTuples); return b; }
@Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) { return null; } try { List<Tuple> tuples = new ArrayList<Tuple>(); String terms = (String) input.get(0); for (String s : getStemmedPairs(terms)) { tuples.add(TupleFactory.getInstance().newTuple(s)); } return new DefaultDataBag(tuples); } catch (Exception e) { throw new IOException("Caught exception processing input row ", e); } }
@Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0 || input.get(0) == null) { return null; } try { List<Tuple> tuples = new ArrayList<Tuple>(); String terms = (String) input.get(0); for (String s : getStemmedPairs(terms)) { tuples.add(TupleFactory.getInstance().newTuple(s)); } return new DefaultDataBag(tuples); } catch (Exception e) { throw new IOException("Caught exception processing input row ", e); } } }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private static DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
DataBag db = new DefaultDataBag(); for (String s : ctgs) { db.add(TupleFactory.getInstance().newTuple(s));
private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
public DataBag exec(Tuple input) throws IOException { try { TupleFactory tf = TupleFactory.getInstance(); DataBag db = (DataBag) input.get(0); DataBag ret = new DefaultDataBag(); for(Tuple t : db){ Float f = getNumber(t.get(0)); f = f*f; Tuple ret_tup = tf.newTuple(); ret_tup.append(f); ret.add(ret_tup); } return ret; } catch (Exception e) { System.out.println(StackTraceExtractor.getStackTrace(e)); return null; } }
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } try { Object obj = (DataByteArray) input.get(1); DataByteArray dba = (DataByteArray) obj; DocumentMetadata dm = DocumentMetadata.parseFrom(dba.get()); String key = dm.getKey(); DataBag db = new DefaultDataBag(); for (ClassifCode code : dm.getBasicMetadata().getClassifCodeList()) { if (ProtoConstants.documentClassifCodeMsc.equals(code.getSource())) { db.add(TupleFactory.getInstance().newTuple(code.getValueList())); } } Object[] to = new Object[]{key, db}; return TupleFactory.getInstance().newTuple(Arrays.asList(to)); } catch (Exception e) { logger.error("Error in processing input row:", e); throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e)); } } }
private DataBag createOutput(Object[] extractedData) { Author author = buildAuthor((String) extractedData[0], (String) extractedData[1], (String) extractedData[2]); DataBag ret = new DefaultDataBag(); for (OrcidDTO orcidDTO : (List<OrcidDTO>) extractedData[3]) { ret.add(createDocument(author, orcidDTO)); } if (myReporter != null) { Counter cntr = myReporter.getCounter("Orcid Exctaction Summary", "TOTAL SUCCESSES"); if(cntr!= null){ cntr.increment(1); myReporter.getCounter("Orcid Exctaction Summary", "TOTAL") .increment(1); } } return ret; }
DataBag db = new DefaultDataBag(); int bagsize = 0; for (ClassifCode code : dm.getBasicMetadata().getClassifCodeList()) {
List<Author> aths = dw.getDocumentMetadata().getBasicMetadata().getAuthorList(); DataBag db = new DefaultDataBag(); for(Author a : aths){ for(KeyValue kv : a.getExtIdList()){