public static void calculateBytesArrayHashCodes(byte[][] bytesArrays, int[] starts, int[] lengths, int[] valueSelected, int[] hashCodes, final int count) { for (int i = 0; i < count; i++) { int batchIndex = valueSelected[i]; hashCodes[i] = murmurHash(bytesArrays[batchIndex], starts[batchIndex], lengths[batchIndex]); } }
public static int calculateBytesHashCode(byte[] keyBytes, int keyStart, int keyLength) { return murmurHash(keyBytes, keyStart, keyLength); }
public int hashCode(byte[] key, int offset, int length) { return HashCodeUtil.murmurHash(key, offset, length); }
@Override public int getHashFromKey() throws SerDeException { byte[] keyBytes = key.getBytes(); int keyLength = key.getLength(); return HashCodeUtil.murmurHash(keyBytes, 0, keyLength); }
@Override public int getHashFromKey() throws SerDeException { byte[] keyBytes = key.getBytes(); int keyLength = key.getLength(); return HashCodeUtil.murmurHash(keyBytes, 0, keyLength); } }
/** * Batch compute the hash codes for all the serialized keys. * * NOTE: MAJOR MAJOR ASSUMPTION: * We assume that HashCodeUtil.murmurHash produces the same result * as MurmurHash.hash with seed = 0 (the method used by ReduceSinkOperator for * UNIFORM distribution). */ protected void computeSerializedHashCodes() { int offset = 0; int keyLength; byte[] bytes = output.getData(); for (int i = 0; i < nonNullKeyCount; i++) { keyLength = serializedKeyLengths[i]; hashCodes[i] = HashCodeUtil.murmurHash(bytes, offset, keyLength); offset += keyLength; } }
public int hashCode(long offset, int length, Position readPos) { setReadPoint(offset, readPos); if (isAllInOneReadBuffer(length, readPos)) { int result = HashCodeUtil.murmurHash(readPos.buffer, readPos.offset, length); readPos.offset += length; return result; } // Rare case of buffer boundary. Unfortunately we'd have to copy some bytes. byte[] bytes = new byte[length]; int destOffset = 0; while (destOffset < length) { ponderNextBufferToRead(readPos); int toRead = Math.min(length - destOffset, wbSize - readPos.offset); System.arraycopy(readPos.buffer, readPos.offset, bytes, destOffset, toRead); readPos.offset += toRead; destOffset += toRead; } return HashCodeUtil.murmurHash(bytes, 0, bytes.length); }
@Override public int getHashFromKey() throws SerDeException { if (!(key instanceof BinaryComparable)) { throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName()); } sanityCheckKeyForTag(); BinaryComparable b = (BinaryComparable)key; return HashCodeUtil.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0)); }
@Override public int getHashFromKey() throws SerDeException { if (!(key instanceof BinaryComparable)) { throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName()); } sanityCheckKeyForTag(); BinaryComparable b = (BinaryComparable)key; return HashCodeUtil.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0)); }
@Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastBytesHashMapStore.HashMapResult fastHashMapResult = (VectorMapJoinFastBytesHashMapStore.HashMapResult) hashMapResult; fastHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); doHashMapMatch( keyBytes, keyStart, keyLength, hashCode, fastHashMapResult); return fastHashMapResult.joinResult(); }
@Override public JoinUtil.JoinResult contains(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashSetResult hashSetResult) { VectorMapJoinFastBytesHashSetStore.HashSetResult fastHashSetResult = (VectorMapJoinFastBytesHashSetStore.HashSetResult) hashSetResult; fastHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); doHashSetContains( keyBytes, keyStart, keyLength, hashCode, fastHashSetResult); return fastHashSetResult.joinResult(); }
@Override public JoinUtil.JoinResult contains(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMultiSetResult hashMultiSetResult) { VectorMapJoinFastBytesHashMultiSetStore.HashMultiSetResult fastHashMultiSetResult = (VectorMapJoinFastBytesHashMultiSetStore.HashMultiSetResult) hashMultiSetResult; fastHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); doHashMultiSetContains( keyBytes, keyStart, keyLength, hashCode, fastHashMultiSetResult); return fastHashMultiSetResult.joinResult(); }
@Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) { VectorMapJoinFastBytesHashMapStore.HashMapResult fastHashMapResult = (VectorMapJoinFastBytesHashMapStore.HashMapResult) hashMapResult; fastHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); final int slot = doHashMapMatch( keyBytes, keyStart, keyLength, hashCode, fastHashMapResult); if (slot != -1 && matchTracker != null) { matchTracker.trackMatch(slot); } return fastHashMapResult.joinResult(); }
@Override public JoinUtil.JoinResult contains(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashSetResult hashSetResult) { VectorMapJoinFastHashSet.HashSetResult optimizedHashSetResult = (VectorMapJoinFastHashSet.HashSetResult) hashSetResult; optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); long existance = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); JoinUtil.JoinResult joinResult; if (existance == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { joinResult = JoinUtil.JoinResult.MATCH; } optimizedHashSetResult.setJoinResult(joinResult); return joinResult; }
@Override public JoinUtil.JoinResult contains(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMultiSetResult hashMultiSetResult) { VectorMapJoinFastHashMultiSet.HashMultiSetResult optimizedHashMultiSetResult = (VectorMapJoinFastHashMultiSet.HashMultiSetResult) hashMultiSetResult; optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); long count = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); JoinUtil.JoinResult joinResult; if (count == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { optimizedHashMultiSetResult.set(count); joinResult = JoinUtil.JoinResult.MATCH; } optimizedHashMultiSetResult.setJoinResult(joinResult); return joinResult; }
@Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); long valueRefWord = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); JoinUtil.JoinResult joinResult; if (valueRefWord == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); optimizedHashMapResult.set(valueStore, valueRefWord); joinResult = JoinUtil.JoinResult.MATCH; } optimizedHashMapResult.setJoinResult(joinResult); return joinResult; }
public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); partitionId = keyHash & (hashPartitions.length - 1);
public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, BytesBytesMultiHashMap.Result hashMapResult) { int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); partitionId = keyHash & (hashPartitions.length - 1); if (bloom1 != null && !bloom1.testLong(keyHash)) { /* * if the keyHash is missing in the bloom filter, then the value cannot exist in any of the * spilled partition - return NOMATCH */ dummyRow = null; aliasFilter = (byte) 0xff; hashMapResult.forget(); return JoinResult.NOMATCH; } // If the target hash table is on disk, spill this row to disk as well to be processed later if (isOnDisk(partitionId)) { return JoinUtil.JoinResult.SPILL; } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(bytes, offset, length, hashMapResult); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } } }
public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { int keyHash = HashCodeUtil.murmurHash(output.getData(), 0, output.getLength());
public JoinUtil.JoinResult setFromOutput(Output output) throws HiveException { int keyHash = HashCodeUtil.murmurHash(output.getData(), 0, output.getLength()); if (bloom1 != null && !bloom1.testLong(keyHash)) { /* * if the keyHash is missing in the bloom filter, then the value cannot * exist in any of the spilled partition - return NOMATCH */ dummyRow = null; aliasFilter = (byte) 0xff; hashMapResult.forget(); return JoinResult.NOMATCH; } partitionId = keyHash & (hashPartitions.length - 1); // If the target hash table is on disk, spill this row to disk as well to be processed later if (isOnDisk(partitionId)) { toSpillPartitionId = partitionId; hashMapResult.forget(); return JoinUtil.JoinResult.SPILL; } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } } }