/** * Generates 64 bit hash from byte array with default seed value. * * @param data byte array to hash * @param length length of the array to hash * @return 64 bit hash of the given string */ public static long hash64(final byte[] data, int length) { return hash64(data, length, 0xe17a1465); }
/** * Identical to lookup3, except initval is biased by -(length<<2). * This is equivalent to leaving out the length factor in the initial state. * {@code lookup3ycs(k, offset, length, initval) == lookup3(k,offset,length,initval-(length<<2))} * and * {@code lookup3ycs(k, offset, length, initval+(length<<2)) == lookup3(k,offset,length,initval)} */ public static int lookup3ycs(int[] k, int offset, int length, int initval) { return lookup3(k, offset, length, initval - (length << 2)); }
public static int hash(byte[] data) { return hash(data, data.length, -1); }
/** * test that the hash of the UTF-16 encoded Java String is equal to the hash of the unicode code points * * @param utf32 * @param len */ void tstEquiv(int[] utf32, int len) { int seed = 100; StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { sb.appendCodePoint(utf32[i]); } int hash = Lookup3Hash.lookup3(utf32, 0, len, seed - (len << 2)); int hash2 = Lookup3Hash.lookup3ycs(utf32, 0, len, seed); assertEquals(hash, hash2); int hash3 = Lookup3Hash.lookup3ycs(sb, 0, sb.length(), seed); assertEquals(hash, hash3); long hash4 = Lookup3Hash.lookup3ycs64(sb, 0, sb.length(), seed); assertEquals((int) hash4, hash); }
public static long lookup3ycs64(CharSequence s) { return lookup3ycs64(s, 0, s.length(), -1); }
@Test public void testEqualsLOOKUP3() { int[] hashes = new int[]{0xc4c20dd5, 0x3ab04cc3, 0xebe874a3, 0x0e770ef3, 0xec321498, 0x73845e86, 0x8a2db728, 0x03c313bb, 0xfe5b9199, 0x95965125, 0xcbc4e7c2}; /*** the hash values were generated by adding the following to lookup3.c * * char* s = "hello world"; * int len = strlen(s); * uint32_t a[len]; * for (int i=0; i<len; i++) { * a[i]=s[i]; * uint32_t result = hashword(a, i+1, i*12345); * printf("0x%.8x\n", result); * } * */ String s = "hello world"; int[] a = new int[s.length()]; for (int i = 0; i < s.length(); i++) { a[i] = s.charAt(i); int len = i + 1; int hash = Lookup3Hash.lookup3(a, 0, len, i * 12345); assertEquals(hashes[i], hash); int hash2 = Lookup3Hash.lookup3ycs(a, 0, len, i * 12345 + (len << 2)); assertEquals(hashes[i], hash2); int hash3 = Lookup3Hash.lookup3ycs(s, 0, len, i * 12345 + (len << 2)); assertEquals(hashes[i], hash3); } }
@Test public void testHash() { Random r = new Random(0); int[] utf32 = new int[20]; tstEquiv(utf32, 0); utf32[0] = 0x10000; tstEquiv(utf32, 1); utf32[0] = 0x8000; tstEquiv(utf32, 1); utf32[0] = Character.MAX_CODE_POINT; tstEquiv(utf32, 1); for (int iter = 0; iter < 10000; iter++) { int len = r.nextInt(utf32.length + 1); for (int i = 0; i < len; i++) { int codePoint; do { codePoint = r.nextInt(Character.MAX_CODE_POINT + 1); } while ((codePoint & 0xF800) == 0xD800); // avoid surrogate code points utf32[i] = codePoint; } // System.out.println("len="+len + ","+utf32[0]+","+utf32[1]); tstEquiv(utf32, len); } }
public static int hash(byte[] data, int seed) { return hash(data, data.length, seed); }
public static long hash64(Object o) { if (o == null) { return 0l; } else if (o instanceof String) { final byte[] bytes = ((String) o).getBytes(); return hash64(bytes, bytes.length); } else if (o instanceof byte[]) { final byte[] bytes = (byte[]) o; return hash64(bytes, bytes.length); } return hash64(o.toString()); }
@Override public boolean offer(Object o) { boolean modified = false; long x = Lookup3Hash.lookup3ycs64(o.toString()); int j = (int) (x >>> (Long.SIZE - k)); byte r = (byte) (Long.numberOfLeadingZeros((x << k) | (1 << (k - 1))) + 1); if (M[j] < r) { Rsum += r - M[j]; if (M[j] == 0) { b_e--; } M[j] = r; modified = true; } return modified; }
@Override public boolean offer(Object o) { boolean modified = false; long hash = (long) MurmurHash.hash(o); int bit = (int) ((hash & 0xFFFFFFFFL) % (long) length); int i = bit / 8; byte b = map[i]; byte mask = (byte) (1 << (bit % 8)); if ((mask & b) == 0) { map[i] = (byte) (b | mask); count--; modified = true; } return modified; }
@Override public boolean offer(Object o) { long x = MurmurHash.hash64(o); return offerHashed(x); }
static int[] getHashBuckets(byte[] b, int hashCount, int max) { int[] result = new int[hashCount]; int hash1 = MurmurHash.hash(b, b.length, 0); int hash2 = MurmurHash.hash(b, b.length, hash1); for (int i = 0; i < hashCount; i++) { result[i] = Math.abs((hash1 + i * hash2) % max); } return result; } }
@Override public boolean offer(Object o) { final long x = MurmurHash.hash64(o); return offerHashed(x); }
@Override public boolean offer(Object o) { final int x = MurmurHash.hash(o); return offerHashed(x); }
@Test public void testHash64ByteArrayOverload() { String input = "hashthis"; byte[] inputBytes = input.getBytes(); long hashOfString = MurmurHash.hash64(input); assertEquals("MurmurHash.hash64(byte[]) did not match MurmurHash.hash64(String)", hashOfString, MurmurHash.hash64(inputBytes)); Object bytesAsObject = inputBytes; assertEquals("MurmurHash.hash64(Object) given a byte[] did not match MurmurHash.hash64(String)", hashOfString, MurmurHash.hash64(bytesAsObject)); }
@Override public boolean offer(Object o) { int x = MurmurHash.hash(o); return offerHashed(x); }
@Test public void testHash64() throws Exception { final long actualHash = MurmurHash.hash64("hashthis"); final long expectedHash = -8896273065425798843L; assertEquals("MurmurHash.hash64(String) returns wrong hash value", expectedHash, actualHash); }
@Test public void testHashByteArrayOverload() { String input = "hashthis"; byte[] inputBytes = input.getBytes(); int hashOfString = MurmurHash.hash(input); assertEquals("MurmurHash.hash(byte[]) did not match MurmurHash.hash(String)", hashOfString, MurmurHash.hash(inputBytes)); Object bytesAsObject = inputBytes; assertEquals("MurmurHash.hash(Object) given a byte[] did not match MurmurHash.hash(String)", hashOfString, MurmurHash.hash(bytesAsObject)); }
@Test public void testHash() throws Exception { final long actualHash = MurmurHash.hash("hashthis"); final long expectedHash = -1974946086L; assertEquals("MurmurHash.hash(String) returns wrong hash value", expectedHash, actualHash); } }