public int hashInt(int input) { return hashInt(input, seed); }
public int hashLong(long input) { return hashLong(input, seed); }
public int hashUnsafeWords(Object base, long offset, int lengthInBytes) { return hashUnsafeWords(base, offset, lengthInBytes, seed); }
public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) { // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method. assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)"; int h1 = hashBytesByInt(base, offset, lengthInBytes, seed); return fmix(h1, lengthInBytes); }
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) { assert (lengthInBytes >= 0): "lengthInBytes cannot be negative"; int lengthAligned = lengthInBytes - lengthInBytes % 4; int h1 = hashBytesByInt(base, offset, lengthAligned, seed); for (int i = lengthAligned; i < lengthInBytes; i++) { int halfWord = Platform.getByte(base, offset + i); int k1 = mixK1(halfWord); h1 = mixH1(h1, k1); } return fmix(h1, lengthInBytes); }
@Test public void randomizedStressTest() { final Murmur3_x86_32 hasher = new Murmur3_x86_32(0); final int size = 65536; final Random random = new Random(); // A set used to track collision rate. final Set<Integer> hashcodes = new HashSet<>(); for (int i = 0; i < size; i++) { final int vint = random.nextInt(); final long lint = random.nextLong(); assertEquals(hasher.hashInt(vint), hasher.hashInt(vint)); assertEquals(hasher.hashLong(lint), hasher.hashLong(lint)); hashcodes.add(hasher.hashLong(lint)); } // A very loose bound. assertTrue(hashcodes.size() > size * 0.95); }
@Test public void randomizedStressTestBytes() { final Murmur3_x86_32 hasher = new Murmur3_x86_32(0); final int size = 65536; final Random random = new Random(); // A set used to track collision rate. final Set<Integer> hashcodes = new HashSet<>(); for (int i = 0; i < size; i++) { final int byteArrSize = random.nextInt(100) * 8; final byte[] bytes = new byte[byteArrSize]; random.nextBytes(bytes); assertEquals( hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize), hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize)); hashcodes.add(hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize)); } // A very loose bound. assertTrue(hashcodes.size() > size * 0.95); }
@Test public void testKnownLongInputs() { final Murmur3_x86_32 hasher = new Murmur3_x86_32(0); assertEquals(1669671676, hasher.hashLong(0L)); assertEquals(-846261623, hasher.hashLong(-42L)); assertEquals(1871679806, hasher.hashLong(42L)); assertEquals(1366273829, hasher.hashLong(Long.MIN_VALUE)); assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE)); }
@Test public void testKnownIntegerInputs() { final Murmur3_x86_32 hasher = new Murmur3_x86_32(0); assertEquals(593689054, hasher.hashInt(0)); assertEquals(-189366624, hasher.hashInt(-42)); assertEquals(-1134849565, hasher.hashInt(42)); assertEquals(-1718298732, hasher.hashInt(Integer.MIN_VALUE)); assertEquals(-1653689534, hasher.hashInt(Integer.MAX_VALUE)); }
private void fillHashBuffer(byte[] element, int[] hashBuffer) { /* * Adam Kirsch and Michael Mitzenmacher. 2008. Less hashing, same performance: Building a better Bloom filter. * Random Struct. Algorithms 33, 2 (September 2008), 187-218. DOI=http://dx.doi.org/10.1002/rsa.v33:2 */ final int hashBufferLength = hashBuffer.length; final int hash1 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, 0); final int hash2 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, hash1); for (int index = 0; index < hashBufferLength; index++) { int combinedHash = hash1 + ((index + 1) * hash2); if (combinedHash < 0) { combinedHash = ~combinedHash; } hashBuffer[index] = combinedHash; } }
@Test public void randomizedStressTestPaddedStrings() { final Murmur3_x86_32 hasher = new Murmur3_x86_32(0); final int size = 64000; // A set used to track collision rate. final Set<Integer> hashcodes = new HashSet<>(); for (int i = 0; i < size; i++) { final int byteArrSize = 8; final byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8); final byte[] paddedBytes = new byte[byteArrSize]; System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length); assertEquals( hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize), hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize)); hashcodes.add(hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize)); } // A very loose bound. assertTrue(hashcodes.size() > size * 0.95); } }
private void fillHashBuffer(byte[] element, int[] hashBuffer) { /* * Adam Kirsch and Michael Mitzenmacher. 2008. Less hashing, same performance: Building a better Bloom filter. * Random Struct. Algorithms 33, 2 (September 2008), 187-218. DOI=http://dx.doi.org/10.1002/rsa.v33:2 */ final int hashBufferLength = hashBuffer.length; final int hash1 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, 0); final int hash2 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, hash1); for (int index = 0; index < hashBufferLength; index++) { int combinedHash = hash1 + ((index + 1) * hash2); if (combinedHash < 0) { combinedHash = ~combinedHash; } hashBuffer[index] = combinedHash; } }
private void fillHashBuffer(byte[] element, int[] hashBuffer) { /* * Adam Kirsch and Michael Mitzenmacher. 2008. Less hashing, same performance: Building a better Bloom filter. * Random Struct. Algorithms 33, 2 (September 2008), 187-218. DOI=http://dx.doi.org/10.1002/rsa.v33:2 */ final int hashBufferLength = hashBuffer.length; final int hash1 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, 0); final int hash2 = Murmur3_x86_32.hashUnsafeBytes(element, Platform.BYTE_ARRAY_OFFSET, element.length, hash1); for (int index = 0; index < hashBufferLength; index++) { int combinedHash = hash1 + ((index + 1) * hash2); if (combinedHash < 0) { combinedHash = ~combinedHash; } hashBuffer[index] = combinedHash; } }