/** * @param tuples * @return a bag containing the provided objects */ public static DataBag bag(Tuple... tuples) { return new NonSpillableDataBag(Arrays.asList(tuples)); }
/** * Read a bag from disk. * @param in DataInput to read data from. * @throws IOException (passes it on from underlying calls). */ public void readFields(DataInput in) throws IOException { long size = in.readLong(); for (long i = 0; i < size; i++) { try { Object o = DataReaderWriter.readDatum(in); add((Tuple)o); } catch (ExecException ee) { throw ee; } } }
@Override public boolean equals(Object obj) { return compareTo(obj) == 0; }
/** * Write a bag's contents to disk. * @param out DataOutput to write data to. * @throws IOException (passes it on from underlying calls). */ public void write(DataOutput out) throws IOException { // We don't care whether this bag was sorted or distinct because // using the iterator to write it will guarantee those things come // correctly. And on the other end there'll be no reason to waste // time re-sorting or re-applying distinct. out.writeLong(size()); Iterator<Tuple> it = iterator(); while (it.hasNext()) { Tuple item = it.next(); item.write(out); } }
public Tuple next() { // This will report progress every 1024 times through next. // This should be much faster than using mod. if ((mCntr & 0x3ff) == 0) reportProgress(); return mContents.get(mCntr++); }
/** * Write the bag into a string. */ @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append('{'); Iterator<Tuple> it = iterator(); while ( it.hasNext() ) { Tuple t = it.next(); String s = t.toString(); sb.append(s); if (it.hasNext()) sb.append(","); } sb.append('}'); return sb.toString(); }
if (other instanceof DataBag) { DataBag bOther = (DataBag) other; if (this.size() != bOther.size()) { if (this.size() > bOther.size()) return 1; else return -1; DataBag otherClone; thisClone = factory.newSortedBag(null); Iterator<Tuple> i = iterator(); while (i.hasNext()) thisClone.add(i.next()); if (((DataBag) other).isSorted() || ((DataBag) other).isDistinct()) {
@Override public void end() { parent.add(new NonSpillableDataBag(new ArrayList<Tuple>(buffer))); }
private DataBag createDataBag(int numBags) { if (!initialized) { initialized = true; if (PigMapReduce.sJobConfInternal.get() != null) { String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_TYPE); if (bagType != null && bagType.equalsIgnoreCase("default")) { useDefaultBag = true; } } } return useDefaultBag ? new NonSpillableDataBag() : new InternalCachedBag(numBags); }
private static DataBag toPigBag(Field field, Collection<Object> values, boolean lazy) { List<Tuple> tuples = Lists.newArrayListWithExpectedSize(values.size()); for(Object value : values) { Object pValue = toPigObject(field, value, lazy); if (pValue instanceof Tuple) { // DataBag should contain Tuples tuples.add((Tuple)pValue); } else { tuples.add(tupleFactory.newTuple(pValue)); } } return new NonSpillableDataBag(tuples); }
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp, List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes, FileSpec[] replFiles, int fragment, boolean isLeftOuter, Tuple nullTuple, Schema[] inputSchemas, Schema[] keySchemas) throws ExecException { super(k, rp, inp); phyPlanLists = ppLists; this.fragment = fragment; this.keyTypes = keyTypes; this.replFiles = replFiles; LRs = new POLocalRearrange[ppLists.size()]; constExps = new ConstantExpression[ppLists.size()]; createJoinPlans(k); List<Tuple> tupList = new ArrayList<Tuple>(); tupList.add(nullTuple); nullBag = new NonSpillableDataBag(tupList); this.isLeftOuterJoin = isLeftOuter; if (inputSchemas != null) { this.inputSchemas = inputSchemas; } else { this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } if (keySchemas != null) { this.keySchemas = keySchemas; } else { this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } }
private HashMap<String, DataBag> doInverse(Map<String,Object> original) throws ExecException { final HashMap<String, DataBag> inverseMap = new HashMap<String, DataBag>(original.size()); for (Map.Entry<String, Object> entry : original.entrySet()) { Object o = entry.getValue(); String newKey; // Call toString for all primitive types, else throw an Exception if (!(o instanceof Tuple || o instanceof DataBag)) { newKey = o.toString(); } else { throw new ExecException("Wrong type. Value is of type " + o.getClass()); } // Create a new bag if "newKey" does not exist in Map DataBag bag = inverseMap.get(newKey); if (bag == null) { bag = new NonSpillableDataBag(); bag.add(TUPLE_FACTORY.newTuple(entry.getKey())); inverseMap.put(newKey, bag); } else { bag.add(TUPLE_FACTORY.newTuple(entry.getKey())); } } return inverseMap; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if(input == null || input.size() == 0) { return null; } Map<String, Object> m = null; //Input must be of type Map. This is verified at compile time m = (Map<String, Object>)(input.get(0)); if(m == null) { return null; } Collection c = m.values(); DataBag bag = new NonSpillableDataBag(c.size()); Iterator<Object> iter = c.iterator(); while(iter.hasNext()) { Tuple t = TUPLE_FACTORY.newTuple(iter.next()); bag.add(t); } return bag; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } Map<String, Object> m = null; // Input must be of type Map. This is verified at compile time m = (Map<String, Object>) (input.get(0)); if (m == null) { return null; } int initialSetSize = getInitialSetSize(m.values()); Set<Object> uniqueElements = new HashSet<Object>(initialSetSize); DataBag bag = new NonSpillableDataBag(); Iterator<Object> iter = m.values().iterator(); while (iter.hasNext()) { Object val = iter.next(); if (!uniqueElements.contains(val)) { uniqueElements.add(val); Tuple t = TUPLE_FACTORY.newTuple(val); bag.add(t); } } return bag; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if(input == null || input.size() == 0) { return null; } Map<String, Object> m = null; //Input must be of type Map. This is verified at compile time m = (Map<String, Object>)(input.get(0)); if(m == null) { return null; } DataBag bag = new NonSpillableDataBag(m.size()); for (String s : m.keySet()) { Tuple t = TUPLE_FACTORY.newTuple(s); bag.add(t); } return bag; }
/** * Translate a nested message to a tuple. If the field is repeated, it walks the list and adds each to a bag. * Otherwise, it just adds the given one. * @param fieldDescriptor the descriptor object for the given field. * @param fieldValue the object representing the value of this field, possibly null. * @return the object representing fieldValue in Pig -- either a bag or a tuple. */ @SuppressWarnings("unchecked") protected Object messageToTuple(FieldDescriptor fieldDescriptor, Object fieldValue) { if (fieldValue == null) { // protobufs unofficially ensures values are not null. just in case: return null; } assert fieldDescriptor.getType() == FieldDescriptor.Type.MESSAGE : "messageToTuple called with field of type " + fieldDescriptor.getType(); if (fieldDescriptor.isRepeated()) { // The protobuf contract is that if the field is repeated, then the object returned is actually a List // of the underlying datatype, which in this case is a nested message. List<Message> messageList = (List<Message>) (fieldValue != null ? fieldValue : Lists.newArrayList()); DataBag bag = new NonSpillableDataBag(messageList.size()); for (Message m : messageList) { bag.add(new ProtobufTuple(m)); } return bag; } else { return new ProtobufTuple((Message)fieldValue); } }
private Object wrap(Object value) { if (isNestedLoadEnabled && value instanceof JSONObject) { return walkJson((JSONObject) value); } else if (isNestedLoadEnabled && value instanceof JSONArray) { JSONArray a = (JSONArray) value; DataBag mapValue = new NonSpillableDataBag(a.size()); for (int i=0; i<a.size(); i++) { Tuple t = tupleFactory.newTuple(wrap(a.get(i))); mapValue.add(t); } return mapValue; } else { return value != null ? value.toString() : null; } }
DataBag bag = new NonSpillableDataBag(fieldValueList.size()); for (Object singleFieldValue : fieldValueList) { Object nonEnumFieldValue = coerceToPigTypes(fieldDescriptor, singleFieldValue);
output.put(QUANTILES_LIST, new NonSpillableDataBag(quantilesList)); output.put(WEIGHTED_PARTS, weightedParts); return output;
input.result = new NonSpillableDataBag(); input.returnStatus = POStatus.STATUS_OK;