type = pt.getTuple().getType(0); } catch (Exception ex) { throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);
@Override public byte getType(int fieldNum) throws ExecException { return t.getType(fieldNum); }
@Override public byte getType(int fieldNum) throws ExecException { return t.getType(fieldNum); }
private byte getAppendedFieldType(int i) throws ExecException { return appendedFields == null ? DataType.UNKNOWN : appendedFields.getType(i); }
@Override public byte getType(int idx) throws ExecException { get(idx); return realTuple.getType(idx); }
protected void checkCorrectness(Tuple tuple) throws IOException { if (tuple == null || tuple.size() != 4) { throw new IOException(this.getClass().getName() + " expects 5 arguments"); } byte t2 = tuple.getType(2); byte t3 = tuple.getType(3); boolean badSchema = tuple.getType(0) != DataType.CHARARRAY || tuple.getType(1) != DataType.BYTEARRAY || (t2 != DataType.CHARARRAY && t2 != DataType.NULL) || (t3 != DataType.BYTEARRAY && t3 != DataType.NULL); if(badSchema){ throw new IOException(this.getClass().getName() + " expects input schema of: " + "CHARARRAY,BYTEARRAY,CHARARRAY,BYTEARRAY\n" + "tuple schema is: " +DataType.findTypeName(tuple.getType(0)) +"," +DataType.findTypeName(tuple.getType(1)) +"," +DataType.findTypeName(tuple.getType(2)) +"," +DataType.findTypeName(tuple.getType(3)) ); } } }
private void checkCorrectness(Tuple tuple) throws IOException { if (tuple == null) { throw new IOException(this.getClass().getName() + " received null tuple"); }else if(tuple.size()%2!=0){ throw new IOException(this.getClass().getName() + " received tuple with odd( " +tuple.size()+ ") size - odd size expected"); } StringBuilder errMsg = new StringBuilder(this.getClass().getName()); for(int i=0;i<tuple.size();i++){ boolean throwMe=false; if(i%2==0 && tuple.getType(i)!=DataType.CHARARRAY){ errMsg.append(" even fields of a tuple should be of type CHARARRAY, was ") .append(DataType.findTypeName(tuple.getType(i))) .append("; The tuple content is as follows: "); } else if(i%2==1 && tuple.getType(i)!=DataType.BYTEARRAY && tuple.getType(i)!=DataType.NULL){ errMsg.append(" even fields of a tuple should be of type BYTEARRAY/NULL, was ") .append(DataType.findTypeName(tuple.getType(i))) .append("; The tuple content is as follows: \n"); } if(throwMe){ for(int j=0;j<tuple.size();j++){ errMsg.append("f").append(i).append(" -- ") .append(tuple.get(i)).append("\n"); } throw new IOException(errMsg.toString()); } } }
byte t0 = tuple.getType(0); byte t2 = tuple.getType(size-2); throw new IOException("" + this.getClass().getName() + ": type of tuple fields number 0th and @Tuple.size()-2 have to be CHARARRAY or NULL." + " Get types: "+DataType.findTypeName(tuple.getType(0))+", "+DataType.findTypeName(tuple.getType(size-2)));
@Override public void accumulate(Tuple input) throws IOException { if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); } if (input.getType(0) != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } DataBag inputBag = (DataBag)input.get(0); for (Tuple t : inputBag) { String distinctString = getDistinctString(t, this.fields); if (!seen.contains(distinctString)) { outputBag.add(t); seen.add(distinctString); } } }
/** send CQL query request using data from tuple */ private void cqlQueryFromTuple(Map<String, ByteBuffer> key, Tuple t, int offset) throws IOException { for (int i = offset; i < t.size(); i++) { if (t.getType(i) != DataType.TUPLE) throw new IOException("Output type was not a tuple"); Tuple inner = (Tuple) t.get(i); if (inner.size() > 0) { List<ByteBuffer> bindedVariables = bindedVariablesFromTuple(inner); if (bindedVariables.size() <= 0) throw new IOException("Missing binded variables"); sendCqlQuery(key, bindedVariables); } } }
/** * output: (((name, value), (name, value)), (value ... value), (value...value)) * bulk output: ((value ... value), (value...value)) * * */ public void putNext(Tuple t) throws IOException { if (t.size() < 1) { // simply nothing here, we can't even delete without a key logger.warn("Empty output skipped, filter empty tuples to suppress this warning"); return; } if (t.getType(0) != DataType.TUPLE) throw new IOException("First argument in output must be a tuple"); if (!bulkOutputFormat && t.getType(1) != DataType.TUPLE) throw new IOException("Second argument in output must be a tuple"); if (bulkOutputFormat) { cqlQueryFromTuple(null, t, 0); } else { Map<String, ByteBuffer> key = tupleToKeyMap((Tuple)t.get(0)); cqlQueryFromTuple(key, t, 1); } }
/** convert key tuple to key map */ private Map<String, ByteBuffer> tupleToKeyMap(Tuple t) throws IOException { Map<String, ByteBuffer> keys = new HashMap<String, ByteBuffer>(); for (int i = 0; i < t.size(); i++) { if (t.getType(i) != DataType.TUPLE) throw new IOException("keys was not a tuple"); Tuple inner = (Tuple) t.get(i); if (inner.size() != 2) throw new IOException("Keys were not in name and value pairs"); Object name = inner.get(0); if (name == null) throw new IOException("Key name was empty"); keys.put(name.toString(), objToBB(inner.get(1))); } return keys; }
/** write tuple data to cassandra */ private void writeColumnsFromTuple(ByteBuffer key, Tuple t, int offset) throws IOException { ArrayList<Mutation> mutationList = new ArrayList<Mutation>(); for (int i = offset; i < t.size(); i++) { if (t.getType(i) == DataType.BAG) writeColumnsFromBag(key, (DataBag) t.get(i)); else if (t.getType(i) == DataType.TUPLE) { Tuple inner = (Tuple) t.get(i); if (inner.size() > 0) // may be empty, for an indexed column that wasn't present mutationList.add(mutationFromTuple(inner)); } else if (!usePartitionFilter) { throw new IOException("Output type was not a bag or a tuple"); } } if (mutationList.size() > 0) writeMutations(key, mutationList); }
public Long exec(Tuple input) throws IOException { if (input == null || input.size() == 0 ) { return 0L; } long bytesSize = 0L; switch(input.getType(0)) { case DataType.BYTEARRAY: DataByteArray dba = (DataByteArray)input.get(0); bytesSize = dba.size(); break; case DataType.CHARARRAY: String str = (String)input.get(0); bytesSize = str.getBytes().length; break; default: break; } return bytesSize; }
/** write next row */ public void putNext(Tuple t) throws IOException { /* We support two cases for output: First, the original output: (key, (name, value), (name,value), {(name,value)}) (tuples or bag is optional) For supers, we only accept the original output. */ if (t.size() < 1) { // simply nothing here, we can't even delete without a key logger.warn("Empty output skipped, filter empty tuples to suppress this warning"); return; } ByteBuffer key = objToBB(t.get(0)); if (t.getType(1) == DataType.TUPLE) writeColumnsFromTuple(key, t, 1); else if (t.getType(1) == DataType.BAG) { if (t.size() > 2) throw new IOException("No arguments allowed after bag"); writeColumnsFromBag(key, (DataBag) t.get(1)); } else throw new IOException("Second argument in output must be a tuple or bag"); }
protected void translatePigDataTypeToWritable(Tuple t, int fieldNum, Writable writable) throws ExecException { byte dataType = t.getType(fieldNum); Object dataValue = t.get(fieldNum);
@Override public Integer exec(Tuple input) throws IOException { if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); } if (input.getType(0) != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); } // guava bloom BloomFilter<CharSequence> filter = BloomFilter.create(Funnels.stringFunnel(), n, p); // hadoop bloom //BloomFilter filter = new BloomFilter(m, k, Hash.MURMUR_HASH); int uniq = 0; DataBag db = (DataBag) input.get(0); for (Iterator<Tuple> iter = db.iterator(); iter.hasNext();) { Tuple t = iter.next(); if (!filter.mightContain((String)t.get(0))) { filter.put((String)t.get(0)); //filter.add(t); uniq++; } } return uniq; }
@Override public Tuple exec(Tuple tuple) throws IOException { if (tuple == null || tuple.size() != 2 || tuple.getType(0) != DataType.CHARARRAY || tuple.getType(1) != DataType.BYTEARRAY) { throw new IOException(this.getClass().getName() + " expects 2 arguments, first string, second byte array"); } String rowId = (String) tuple.get(0); DataByteArray dwByteArray = (DataByteArray) tuple.get(1); DocumentWrapper dw = DocumentWrapper.parseFrom(dwByteArray.get()); DocumentMetadata dm = dw.getDocumentMetadata(); MediaContainer mc = dw.getMediaContainer(); Tuple t = TupleFactory.getInstance().newTuple(); t.append(rowId); t.append(new DataByteArray(dm.toByteArray())); t.append(new DataByteArray(mc.toByteArray())); return t; } }
@Override public Tuple exec(Tuple tuple) throws IOException { if (tuple == null || tuple.size() != 2 || tuple.getType(1) != DataType.BYTEARRAY) { throw new IOException("" + this.getClass().getName() + " expects 2 arguments, 2nd must be a bytearray"); } String rowId = (String) tuple.get(0); DataByteArray protoDBA = (DataByteArray) tuple.get(1); byte[] protoBytes = protoDBA.get(); DocumentWrapper doc = DocumentProtos.DocumentWrapper.parseFrom(protoBytes); Tuple result = tupleFactory.newTuple(); result.append(rowId); result.append(doc.getDocumentMetadata().getKey()); result.append(protoDBA); return result; } }
} else if (!poCounter.isDenseRank()) { int positionBag = inp.getAll().size()-1; if (inp.getType(positionBag) == DataType.BAG) { sizeBag = ((org.apache.pig.data.DefaultAbstractBag) inp.get(positionBag)).size();