@Override public void write(Writable r) throws IOException { if (keyIsText) { Text text = (Text) r; keyWritable.set(text.getBytes(), 0, text.getLength()); } else { BytesWritable bw = (BytesWritable) r; // Once we drop support for old Hadoop versions, change these // to getBytes() and getLength() to fix the deprecation warnings. // Not worth a shim. keyWritable.set(bw.get(), 0, bw.getSize()); } keyWritable.setHashCode(r.hashCode()); outStream.append(keyWritable, NULL_WRITABLE); }
private int findWriterOffset(Object row) throws HiveException { if (!multiFileSpray) { return 0; } else { Object[] bucketFieldValues = new Object[partitionEval.length]; for(int i = 0; i < partitionEval.length; i++) { bucketFieldValues[i] = partitionEval[i].evaluate(row); } int keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors); key.setHashCode(keyHashCode); int bucketNum = prtner.getBucket(key, null, totalFiles); return bucketMap.get(bucketNum); } }
/** * After vectorized batch is processed, can return the key that caused a particular row * to be forwarded. Because the row could only be marked to forward because it has * the same key with some row already in the heap (for GBY), we can use that key from the * heap to emit the forwarded row. * @param batchIndex index of the key in the batch. * @return The key corresponding to the index. */ public HiveKey getVectorizedKeyToForward(int batchIndex) { int index = MAY_FORWARD - batchIndexToResult[batchIndex]; HiveKey hk = new HiveKey(); hk.set(keys[index], 0, keys[index].length); hk.setHashCode(hashes[index]); hk.setDistKeyLength(distKeyLengths[index]); return hk; }
@Override public void write(Writable r) throws IOException { if (keyIsText) { Text text = (Text) r; keyWritable.set(text.getBytes(), 0, text.getLength()); } else { BytesWritable bw = (BytesWritable) r; // Once we drop support for old Hadoop versions, change these // to getBytes() and getLength() to fix the deprecation warnings. // Not worth a shim. keyWritable.set(bw.get(), 0, bw.getSize()); } keyWritable.setHashCode(r.hashCode()); outStream.append(keyWritable, NULL_WRITABLE); }
private void postProcess(VectorizedRowBatch batch, int batchIndex, int tag, int hashCode) throws HiveException { try { processKey(batch, batchIndex, tag); keyWritable.setHashCode(hashCode); processValue(batch, batchIndex); collect(keyWritable, valueBytesWritable); } catch (Exception e) { throw new HiveException(e); } } }
protected void initializeEmptyKey(int tag) { // Use the same logic as ReduceSinkOperator.toHiveKey. // if (tag == -1 || reduceSkipTag) { keyWritable.setSize(0); } else { keyWritable.setSize(1); keyWritable.get()[0] = reduceTagByte; } keyWritable.setDistKeyLength(0); keyWritable.setHashCode(0); }
/** * After vectorized batch is processed, can return the key that caused a particular row * to be forwarded. Because the row could only be marked to forward because it has * the same key with some row already in the heap (for GBY), we can use that key from the * heap to emit the forwarded row. * @param batchIndex index of the key in the batch. * @return The key corresponding to the index. */ public HiveKey getVectorizedKeyToForward(int batchIndex) { int index = MAY_FORWARD - batchIndexToResult[batchIndex]; HiveKey hk = new HiveKey(); hk.set(keys[index], 0, keys[index].length); hk.setHashCode(hashes[index]); hk.setDistKeyLength(distKeyLengths[index]); return hk; }
public static HiveKey copyHiveKey(HiveKey key) { HiveKey copy = new HiveKey(); copy.setDistKeyLength(key.getDistKeyLength()); copy.setHashCode(key.hashCode()); copy.set(key); return copy; }
public static HiveKey copyHiveKey(HiveKey key) { HiveKey copy = new HiveKey(); copy.setDistKeyLength(key.getDistKeyLength()); copy.setHashCode(key.hashCode()); copy.set(key); return copy; }
private int findWriterOffset(Object row) throws HiveException { if (!multiFileSpray) { return 0; } else { assert getConf().getWriteType() != AcidUtils.Operation.DELETE && getConf().getWriteType() != AcidUtils.Operation.UPDATE : "Unexpected operation type: " + getConf().getWriteType(); //this is not used for DELETE commands (partitionEval is not set up correctly // (or needed) for that Object[] bucketFieldValues = new Object[partitionEval.length]; for(int i = 0; i < partitionEval.length; i++) { bucketFieldValues[i] = partitionEval[i].evaluate(row); } int keyHashCode = hashFunc.apply(bucketFieldValues, partitionObjectInspectors); key.setHashCode(keyHashCode); int bucketNum = prtner.getBucket(key, null, totalFiles); return bucketMap.get(bucketNum); } }
key.setHashCode(idx);
key.setHashCode(idx);
public static int getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) throws HiveException { GenericUDFHash udf = new GenericUDFHash(); ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()]; GenericUDF.DeferredObject[] deferredObjects = new GenericUDF.DeferredObject[columnBindings.size()]; int i = 0; for (Entry<ObjectInspector, Object> entry : columnBindings) { objectInspectors[i] = entry.getKey(); if (entry.getValue() != null && entry.getKey() instanceof JavaHiveVarcharObjectInspector) { JavaHiveVarcharObjectInspector varcharObjectInspector = (JavaHiveVarcharObjectInspector) entry.getKey(); deferredObjects[i] = new GenericUDF.DeferredJavaObject(new HiveVarchar(((String) entry.getValue()), varcharObjectInspector.getMaxLength())); } else { deferredObjects[i] = new GenericUDF.DeferredJavaObject(entry.getValue()); } i++; } ObjectInspector udfInspector = udf.initialize(objectInspectors); IntObjectInspector inspector = (IntObjectInspector) udfInspector; Object result = udf.evaluate(deferredObjects); HiveKey hiveKey = new HiveKey(); hiveKey.setHashCode(inspector.get(result)); return new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount); }
hashCode = computeHashCode(row, bucketNumber); firstKey.setHashCode(hashCode); populateCachedDistinctKeys(row, i); HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength); hiveKey.setHashCode(hashCode); collect(hiveKey, value);
firstKey.setHashCode(hashCode); populateCachedDistinctKeys(row, i); HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength); hiveKey.setHashCode(hashCode); collect(hiveKey, value);
keyWritable.setHashCode(nullKeyHashCode); keyWritable.setHashCode(serializedKeySeries.getCurrentHashCode());
keyWritable.setHashCode(hashCode);
keyWritable.setHashCode(nullKeyHashCode); keyWritable.setHashCode(serializedKeySeries.getCurrentHashCode());
public void write(Writable r) throws IOException { if (keyIsText) { Text text = (Text) r; keyWritable.set(text.getBytes(), 0, text.getLength()); } else { BytesWritable bw = (BytesWritable) r; // Once we drop support for old Hadoop versions, change these // to getBytes() and getLength() to fix the deprecation warnings. // Not worth a shim. keyWritable.set(bw.get(), 0, bw.getSize()); } keyWritable.setHashCode(r.hashCode()); outStream.append(keyWritable, NULL_WRITABLE); }
public static HiveKey copyHiveKey(HiveKey key) { HiveKey copy = new HiveKey(); copy.setDistKeyLength(key.getDistKeyLength()); copy.setHashCode(key.hashCode()); copy.set(key); return copy; }