/** * Calculate a hash using all bytes from the input argument, and * a seed of -1. * @param bytes input bytes * @return hash value */ public int hash(byte[] bytes) { return hash(bytes, bytes.length, -1); }
/** * This utility method converts the name of the configured * hash type to a symbolic constant. * @param conf configuration * @return one of the predefined constants */ public static int getHashType(Configuration conf) { String name = conf.get(HADOOP_UTIL_HASH_TYPE_KEY, HADOOP_UTIL_HASH_TYPE_DEFAULT); return parseHashType(name); }
/** * Get a singleton instance of hash function of a type * defined in the configuration. * @param conf current configuration * @return defined hash type, or null if type is invalid */ public static Hash getInstance(Configuration conf) { int type = getHashType(conf); return getInstance(type); }
int iterations = 30; assertTrue("testHash jenkins error !!!", Hash.JENKINS_HASH == Hash.parseHashType("jenkins")); assertTrue("testHash murmur error !!!", Hash.MURMUR_HASH == Hash.parseHashType("murmur")); assertTrue("testHash undefined", Hash.INVALID_HASH == Hash.parseHashType("undefined")); assertTrue("testHash", MurmurHash.getInstance() == Hash.getInstance(cfg)); JenkinsHash.getInstance() == Hash.getInstance(cfg)); MurmurHash.getInstance() == Hash.getInstance(cfg)); JenkinsHash.getInstance() == Hash.getInstance(Hash.JENKINS_HASH)); assertTrue("testHash error murmur getInstance !!!", MurmurHash.getInstance() == Hash.getInstance(Hash.MURMUR_HASH)); Hash.getInstance(Hash.INVALID_HASH)); int murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes()); for (int i = 0; i < iterations; i++) { assertTrue("multiple evaluation murmur hash error !!!", murmurHash == Hash.getInstance(Hash.MURMUR_HASH) .hash(LINE.getBytes())); murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes(), 67); for (int i = 0; i < iterations; i++) { assertTrue(
/** * Constructor. * <p> * Builds a hash function that must obey to a given maximum number of returned values and a highest value. * @param maxValue The maximum highest returned value. * @param nbHash The number of resulting hashed values. * @param hashType type of the hashing function (see {@link Hash}). */ public HashFunction(int maxValue, int nbHash, int hashType) { if (maxValue <= 0) { throw new IllegalArgumentException("maxValue must be > 0"); } if (nbHash <= 0) { throw new IllegalArgumentException("nbHash must be > 0"); } this.maxValue = maxValue; this.nbHash = nbHash; this.hashFunction = Hash.getInstance(hashType); if (this.hashFunction == null) throw new IllegalArgumentException("hashType must be known"); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt( IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat( IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
int iterations = 30; assertTrue("testHash jenkins error !!!", Hash.JENKINS_HASH == Hash.parseHashType("jenkins")); assertTrue("testHash murmur error !!!", Hash.MURMUR_HASH == Hash.parseHashType("murmur")); assertTrue("testHash undefined", Hash.INVALID_HASH == Hash.parseHashType("undefined")); assertTrue("testHash", MurmurHash.getInstance() == Hash.getInstance(cfg)); JenkinsHash.getInstance() == Hash.getInstance(cfg)); MurmurHash.getInstance() == Hash.getInstance(cfg)); JenkinsHash.getInstance() == Hash.getInstance(Hash.JENKINS_HASH)); assertTrue("testHash error murmur getInstance !!!", MurmurHash.getInstance() == Hash.getInstance(Hash.MURMUR_HASH)); Hash.getInstance(Hash.INVALID_HASH)); int murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes()); for (int i = 0; i < iterations; i++) { assertTrue("multiple evaluation murmur hash error !!!", murmurHash == Hash.getInstance(Hash.MURMUR_HASH) .hash(LINE.getBytes())); murmurHash = Hash.getInstance(Hash.MURMUR_HASH).hash(LINE.getBytes(), 67); for (int i = 0; i < iterations; i++) { assertTrue(
/** * Constructor. * <p> * Builds a hash function that must obey to a given maximum number of returned values and a highest value. * @param maxValue The maximum highest returned value. * @param nbHash The number of resulting hashed values. * @param hashType type of the hashing function (see {@link Hash}). */ public HashFunction(int maxValue, int nbHash, int hashType) { if (maxValue <= 0) { throw new IllegalArgumentException("maxValue must be > 0"); } if (nbHash <= 0) { throw new IllegalArgumentException("nbHash must be > 0"); } this.maxValue = maxValue; this.nbHash = nbHash; this.hashFunction = Hash.getInstance(hashType); if (this.hashFunction == null) throw new IllegalArgumentException("hashType must be known"); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
/** * Calculate a hash using all bytes from the input argument, * and a provided seed value. * @param bytes input bytes * @param initval seed value * @return hash value */ public int hash(byte[] bytes, int initval) { return hash(bytes, bytes.length, initval); }
/** * Get a singleton instance of hash function of a type * defined in the configuration. * @param conf current configuration * @return defined hash type, or null if type is invalid */ public static Hash getInstance(Configuration conf) { int type = getHashType(conf); return getInstance(type); }
.ceil(-HASH_COUNT * numKeys / Math.log(1.0 - Math.pow(errorRate, 1.0 / HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.parseHashType(acuconf.get(Property.TABLE_BLOOM_HASHTYPE)), numKeys);
/** * Constructor. * <p> * Builds a hash function that must obey to a given maximum number of returned values and a highest value. * @param maxValue The maximum highest returned value. * @param nbHash The number of resulting hashed values. * @param hashType type of the hashing function (see {@link Hash}). */ public HashFunction(int maxValue, int nbHash, int hashType) { if (maxValue <= 0) { throw new IllegalArgumentException("maxValue must be > 0"); } if (nbHash <= 0) { throw new IllegalArgumentException("nbHash must be > 0"); } this.maxValue = maxValue; this.nbHash = nbHash; this.hashFunction = Hash.getInstance(hashType); if (this.hashFunction == null) throw new IllegalArgumentException("hashType must be known"); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
/** * Hashes a specified key into several integers. * @param k The specified key. * @return The array of hashed values. */ public int[] hash(Key k){ byte[] b = k.getBytes(); if (b == null) { throw new NullPointerException("buffer reference is null"); } if (b.length == 0) { throw new IllegalArgumentException("key length must be > 0"); } int[] result = new int[nbHash]; for (int i = 0, initval = 0; i < nbHash; i++) { initval = hashFunction.hash(b, initval); result[i] = Math.abs(initval % maxValue); } return result; } }
/** * Get a singleton instance of hash function of a type * defined in the configuration. * @param conf current configuration * @return defined hash type, or null if type is invalid */ public static Hash getInstance(Configuration conf) { int type = getHashType(conf); return getInstance(type); }
/** * This utility method converts the name of the configured * hash type to a symbolic constant. * @param conf configuration * @return one of the predefined constants */ public static int getHashType(Configuration conf) { String name = conf.get("hadoop.util.hash.type", "murmur"); return parseHashType(name); }
/** * Constructor. * <p> * Builds a hash function that must obey to a given maximum number of returned values and a highest value. * @param maxValue The maximum highest returned value. * @param nbHash The number of resulting hashed values. * @param hashType type of the hashing function (see {@link Hash}). */ public HashFunction(int maxValue, int nbHash, int hashType) { if (maxValue <= 0) { throw new IllegalArgumentException("maxValue must be > 0"); } if (nbHash <= 0) { throw new IllegalArgumentException("nbHash must be > 0"); } this.maxValue = maxValue; this.nbHash = nbHash; this.hashFunction = Hash.getInstance(hashType); if (this.hashFunction == null) throw new IllegalArgumentException("hashType must be known"); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
for (int i = 0; i < numcat; ++i) { ByteBuffer buf = ByteBuffer.allocate(4); int hashval = murmur.hash(buf.putInt(cats[i]).array(), 4, (int)params.seed); // turn horizontalized categorical integer into another integer, based on seed