@Override public void putNext(Tuple tuple) throws IOException { List<Object> outgoing = new ArrayList<Object>(tuple.size()); int i = 0; for (HCatFieldSchema fSchema : computedSchema.getFields()) { outgoing.add(getJavaObj(tuple.get(i++), fSchema)); } try { writer.write(null, new DefaultHCatRecord(outgoing)); } catch (InterruptedException e) { throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); } }
private static Tuple transformToTuple(List<?> objList, HCatSchema hs) throws Exception { if (objList == null) { return null; } Tuple t = tupFac.newTuple(objList.size()); List<HCatFieldSchema> subFields = hs.getFields(); for (int i = 0; i < subFields.size(); i++) { t.set(i, extractPigObject(objList.get(i), subFields.get(i))); } return t; }
switch (type) { case BINARY: return ((DataByteArray) pigObj).get(); List<Object> all = ((Tuple) pigObj).getAll(); ArrayList<Object> converted = new ArrayList<Object>(all.size()); for (int i = 0; i < all.size(); i++) { HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); boolean needTuple = tupFS.getType() == Type.STRUCT; List<Object> bagContents = new ArrayList<Object>((int) pigBag.size()); Iterator<Tuple> bagItr = pigBag.iterator(); bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
@Override public Tuple exec(Tuple input) throws IOException { try { Tuple t = mTupleFactory.newTuple(2); // input is a bag with one tuple containing // the column we are trying to avg on DataBag bg = (DataBag) input.get(0); Long l = null; if(bg.iterator().hasNext()) { Tuple tp = bg.iterator().next(); l = (Long)(tp.get(0)); } t.set(0, l); if (l != null) t.set(1, 1L); else t.set(1, 0L); return t; } catch (ExecException ee) { throw ee; } catch (Exception e) { int errCode = 2106; String msg = "Error while computing average in " + this.getClass().getSimpleName(); throw new ExecException(msg, errCode, PigException.BUG, e); } } }
@Override public Tuple exec(Tuple tuple) throws IOException { if (tuple == null || tuple.size() != 2 || tuple.getType(0) != DataType.CHARARRAY || tuple.getType(1) != DataType.BYTEARRAY) { throw new IOException(this.getClass().getName() + " expects 2 arguments, first string, second byte array"); } String rowId = (String) tuple.get(0); DataByteArray dwByteArray = (DataByteArray) tuple.get(1); DocumentWrapper dw = DocumentWrapper.parseFrom(dwByteArray.get()); DocumentMetadata dm = dw.getDocumentMetadata(); MediaContainer mc = dw.getMediaContainer(); Tuple t = TupleFactory.getInstance().newTuple(); t.append(rowId); t.append(new DataByteArray(dm.toByteArray())); t.append(new DataByteArray(mc.toByteArray())); return t; } }
@Override public Tuple exec(Tuple input) throws IOException { DataByteArray dba = (DataByteArray) input.get(0); DocumentMetadata metadata = DocumentWrapper.parseFrom(dba.get()) .getDocumentMetadata(); Tuple output = TupleFactory.getInstance().newTuple( fieldNumberMap.size()); output = addDocumentMetatdataFields(metadata, output); return output; }
public DataBag exec(Tuple input) throws IOException { DataBag outputBag = bagFactory.newDefaultBag(); String idBase = (String)input.get(0); for (int k=0; k < numKeys; k++) { String key = idBase+k; int key_bucket = random.nextInt(maxRandom); Tuple next = tupleFactory.newTuple(2); next.set(0, key); next.set(1, key_bucket); outputBag.add(next); } return outputBag; }
@Override public void putNext(Tuple t) throws IOException { if(t.size() != 2) { throw new ExecException("Output tuple has wrong size: is " + t.size() + ", should be 2"); } byte[] keyBytes = ((DataByteArray) t.get(0)).get(); byte[] valueBytes = ((DataByteArray) t.get(1)).get(); if (keyBytes == null || valueBytes == null) { throw new ExecException("Output tuple contains null"); } ArrayList<byte[]> alk = new ArrayList<byte[]>(); alk.add(keyBytes); NullableTuple key = new NullableTuple(TupleFactory.getInstance().newTuple(alk)); ArrayList<byte[]> alv = new ArrayList<byte[]>(); alv.add(valueBytes); NullableTuple val = new NullableTuple(TupleFactory.getInstance().newTuple(alv)); try { writer.write(key, val); } catch (InterruptedException e) { throw new IOException(e); } } }
@Override public void accumulate(Tuple arg0) throws IOException { DataBag inputBag = (DataBag)arg0.get(0); for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i++; count++; } }
@Override public Tuple exec(Tuple tuple) throws IOException { if (tuple == null || tuple.size() != 2 || tuple.getType(1) != DataType.BYTEARRAY) { throw new IOException("" + this.getClass().getName() + " expects 2 arguments, 2nd must be a bytearray"); } String rowId = (String) tuple.get(0); DataByteArray protoDBA = (DataByteArray) tuple.get(1); byte[] protoBytes = protoDBA.get(); DocumentWrapper doc = DocumentProtos.DocumentWrapper.parseFrom(protoBytes); Tuple result = tupleFactory.newTuple(); result.append(rowId); result.append(doc.getDocumentMetadata().getKey()); result.append(protoDBA); return result; } }
private Tuple createTuple(Tuple[] data) throws ExecException { Tuple out = TupleFactory.getInstance().newTuple(); for (int i = 0; i < data.length; ++i) { Tuple t = data[i]; int size = t.size(); for (int j = 0; j < size; ++j) { out.append(t.get(j)); } } return illustratorMarkup(out, out, 0); }
@Override public Tuple call(Tuple input) throws Exception { Tuple output = TupleFactory.getInstance() .newTuple(input.getAll().size() - 2); for (int i = 1; i < input.getAll().size() - 2; i ++) { output.set(i, input.get(i+2)); } long offset = calculateOffset((Integer) input.get(0)); output.set(0, offset + (Long)input.get(2)); return output; }
public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() < 1 || input.isNull(0)) return null; // Output bag DataBag bagOfTokens = bagFactory.newDefaultBag(); StringReader textInput = new StringReader(input.get(0).toString()); PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), ""); for (CoreLabel label; ptbt.hasNext(); ) { label = (CoreLabel)ptbt.next(); Tuple termText = tupleFactory.newTuple(label.toString()); bagOfTokens.add(termText); } return bagOfTokens; } }
dataMap = reader.getCurrentValue(); Tuple tuple = TupleFactory.getInstance().newTuple(dataMap.size()); tuple.set(i, result); Set<Entry<?, ?>> entrySet = dataMap.entrySet(); for (Map.Entry entry : entrySet) { tuple.set(i++, entry.getValue());
@Override public Tuple exec(Tuple input) throws IOException { // Since Initial is guaranteed to be called // only in the map, it will be called with an // input of a bag with a single tuple - the // count should always be 1 if bag is non empty DataBag bag = (DataBag)input.get(0); return mTupleFactory.newTuple(bag.iterator().hasNext()? Long.valueOf(1L) : Long.valueOf(0L)); } }
@Override public Tuple exec(Tuple input) throws IOException { Tuple t = TupleFactory.getInstance().newTuple(); t.append(((DataByteArray) input.get(0)).toString()); return t; }