@Override public void collect(byte[] key, byte[] value, int hash) throws IOException { HiveKey keyWritable = new HiveKey(key, hash); BytesWritable valueWritable = new BytesWritable(value); collect(keyWritable, valueWritable); }
@Override public void collect(byte[] key, byte[] value, int hash) throws IOException { HiveKey keyWritable = new HiveKey(key, hash); BytesWritable valueWritable = new BytesWritable(value); doCollect(keyWritable, valueWritable); }
@Override public void collect(byte[] key, byte[] value, int hash) throws IOException { HiveKey keyWritable = new HiveKey(key, hash); BytesWritable valueWritable = new BytesWritable(value); collect(keyWritable, valueWritable); }
HiveKey.class, NullWritable.class, isCompressed, progress); keyWritable = new HiveKey(); keyIsText = valueClass.equals(Text.class); return new RecordWriter() {
public HiveKey read(Kryo kryo, Input input, Class<HiveKey> type) { int len = input.readVarInt(true); byte[] bytes = new byte[len]; input.readBytes(bytes); return new HiveKey(bytes); } }
HiveKey.class, NullWritable.class, isCompressed, progress); keyWritable = new HiveKey(); keyIsText = valueClass.equals(Text.class); return new RecordWriter() {
public void collect(String prefix, long id) { String k = prefix + "_key_" + id; String v = prefix + "_value_" + id; HiveKey key = new HiveKey(k.getBytes(), k.hashCode()); BytesWritable value = new BytesWritable(v.getBytes()); try { collect(key, value); } catch (IOException e) { e.printStackTrace(); } }
public HiveKey read(Kryo kryo, Input input, Class<HiveKey> type) { int len = input.readVarInt(true); byte[] bytes = new byte[len]; input.readBytes(bytes); return new HiveKey(bytes, input.readVarInt(false)); } }
/** * After vectorized batch is processed, can return the key that caused a particular row * to be forwarded. Because the row could only be marked to forward because it has * the same key with some row already in the heap (for GBY), we can use that key from the * heap to emit the forwarded row. * @param batchIndex index of the key in the batch. * @return The key corresponding to the index. */ public HiveKey getVectorizedKeyToForward(int batchIndex) { int index = MAY_FORWARD - batchIndexToResult[batchIndex]; HiveKey hk = new HiveKey(); hk.set(keys[index], 0, keys[index].length); hk.setHashCode(hashes[index]); hk.setDistKeyLength(distKeyLengths[index]); return hk; }
/** * After vectorized batch is processed, can return the key that caused a particular row * to be forwarded. Because the row could only be marked to forward because it has * the same key with some row already in the heap (for GBY), we can use that key from the * heap to emit the forwarded row. * @param batchIndex index of the key in the batch. * @return The key corresponding to the index. */ public HiveKey getVectorizedKeyToForward(int batchIndex) { int index = MAY_FORWARD - batchIndexToResult[batchIndex]; HiveKey hk = new HiveKey(); hk.set(keys[index], 0, keys[index].length); hk.setHashCode(hashes[index]); hk.setDistKeyLength(distKeyLengths[index]); return hk; }
public static HiveKey copyHiveKey(HiveKey key) { HiveKey copy = new HiveKey(); copy.setDistKeyLength(key.getDistKeyLength()); copy.setHashCode(key.hashCode()); copy.set(key); return copy; }
private HiveKey readHiveKey(Input input) { HiveKey hiveKey = new HiveKey(input.readBytes(input.readInt()), input.readInt()); hiveKey.setDistKeyLength(input.readInt()); return hiveKey; }
private HiveKey readHiveKey(Input input) { HiveKey hiveKey = new HiveKey( input.readBytes(input.readInt()), input.readInt()); hiveKey.setDistKeyLength(input.readInt()); return hiveKey; }
public static HiveKey copyHiveKey(HiveKey key) { HiveKey copy = new HiveKey(); copy.setDistKeyLength(key.getDistKeyLength()); copy.setHashCode(key.hashCode()); copy.set(key); return copy; }
private HiveKey readHiveKey(Input input) { HiveKey hiveKey = new HiveKey(input.readBytes(input.readInt()), input.readInt()); hiveKey.setDistKeyLength(input.readInt()); return hiveKey; }
private HiveKey readHiveKey(Input input) { HiveKey hiveKey = new HiveKey( input.readBytes(input.readInt()), input.readInt()); hiveKey.setDistKeyLength(input.readInt()); return hiveKey; }
/** Helper method which inserts numRecords and retrieves them from cache and verifies */ private void testSpillingHelper(HiveKVResultCache cache, int numRecords) { for(int i=0; i<numRecords; i++) { String key = "key_" + i; String value = "value_" + i; cache.add(new HiveKey(key.getBytes(), key.hashCode()), new BytesWritable(value.getBytes())); } int recordsSeen = 0; while(cache.hasNext()) { String key = "key_" + recordsSeen; String value = "value_" + recordsSeen; Tuple2<HiveKey, BytesWritable> row = cache.next(); assertTrue("Unexpected key at position: " + recordsSeen, new String(row._1().getBytes()).equals(key)); assertTrue("Unexpected value at position: " + recordsSeen, new String(row._2().getBytes()).equals(value)); recordsSeen++; } assertTrue("Retrieved record count doesn't match inserted record count", numRecords == recordsSeen); cache.clear(); }
public static int getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) throws HiveException { GenericUDFHash udf = new GenericUDFHash(); ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()]; GenericUDF.DeferredObject[] deferredObjects = new GenericUDF.DeferredObject[columnBindings.size()]; int i = 0; for (Entry<ObjectInspector, Object> entry : columnBindings) { objectInspectors[i] = entry.getKey(); if (entry.getValue() != null && entry.getKey() instanceof JavaHiveVarcharObjectInspector) { JavaHiveVarcharObjectInspector varcharObjectInspector = (JavaHiveVarcharObjectInspector) entry.getKey(); deferredObjects[i] = new GenericUDF.DeferredJavaObject(new HiveVarchar(((String) entry.getValue()), varcharObjectInspector.getMaxLength())); } else { deferredObjects[i] = new GenericUDF.DeferredJavaObject(entry.getValue()); } i++; } ObjectInspector udfInspector = udf.initialize(objectInspectors); IntObjectInspector inspector = (IntObjectInspector) udfInspector; Object result = udf.evaluate(deferredObjects); HiveKey hiveKey = new HiveKey(); hiveKey.setHashCode(inspector.get(result)); return new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount); }
keyWritable = new HiveKey();
@Test public void testSimple() throws Exception { // Create KV result cache object, add one (k,v) pair and retrieve them. HiveKVResultCache cache = new HiveKVResultCache(); HiveKey key = new HiveKey("key".getBytes(), "key".hashCode()); BytesWritable value = new BytesWritable("value".getBytes()); cache.add(key, value); assertTrue("KV result cache should have at least one element", cache.hasNext()); Tuple2<HiveKey, BytesWritable> row = cache.next(); assertTrue("Incorrect key", row._1().equals(key)); assertTrue("Incorrect value", row._2().equals(value)); assertTrue("Cache shouldn't have more records", !cache.hasNext()); }