public BytesBytesMultiHashMap getHashMapFromDisk(int rowCount) throws IOException, ClassNotFoundException { if (hashMapSpilledOnCreation) { return new BytesBytesMultiHashMap(rowCount, loadFactor, wbSize, -1); } else { InputStream inputStream = Files.newInputStream(hashMapLocalPath); com.esotericsoftware.kryo.io.Input input = new com.esotericsoftware.kryo.io.Input(inputStream); Kryo kryo = SerializationUtilities.borrowKryo(); BytesBytesMultiHashMap restoredHashMap = null; try { restoredHashMap = kryo.readObject(input, BytesBytesMultiHashMap.class); } finally { SerializationUtilities.releaseKryo(kryo); } if (rowCount > 0) { restoredHashMap.expandAndRehashToTarget(rowCount); } // some bookkeeping rowsOnDisk = 0; hashMapOnDisk = false; input.close(); inputStream.close(); Files.delete(hashMapLocalPath); return restoredHashMap; } }
public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } } }
@Override public void put(Writable currentKey, Writable currentValue) throws SerDeException { directWriteHelper.setKeyValue(currentKey, currentValue); hashMap.put(directWriteHelper, -1); }
@Test public void testCapacityValidation() { BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, LOAD_FACTOR, WB_SIZE); assertEquals(CAPACITY, map.getCapacity()); map = new BytesBytesMultiHashMap(9, LOAD_FACTOR, WB_SIZE); assertEquals(16, map.getCapacity()); // Verify the scenario when maxProbeSize is a very small value, it doesn't fail BytesBytesMultiHashMap map1 = new BytesBytesMultiHashMap(1024, (float) 0.75, 524288, 1); }
int inMemRowCount = partition.hashMap.getNumValues(); if (inMemRowCount == 0) { LOG.warn("Trying to spill an empty hash partition! It may be due to " + ", Mem size: " + partition.hashMap.memorySize() + "): " + file); LOG.info("Memory usage before spilling: " + memoryUsed); long memFreed = partition.hashMap.memorySize(); memoryUsed -= memFreed; LOG.info("Memory usage after spilling: " + memoryUsed); partition.hashMap.clear(); partition.hashMap = null; return memFreed;
public void put(KvSource kv, int keyHashCode) throws SerDeException { if (resizeThreshold <= keysAssigned) { expandAndRehash(); int hashCode = (keyHashCode == -1) ? writeBuffers.unsafeHashCode(keyOffset, keyLength) : keyHashCode; int slot = findKeySlotToWrite(keyOffset, keyLength, hashCode); if (ref == 0) { long tailOffset = writeFirstValueRecord(kv, keyOffset, keyLength, hashCode); byte stateByte = kv.updateStateByte(null); refs[slot] = Ref.makeFirstRef(tailOffset, stateByte, hashCode, startingHashBitCount); long lrPtrOffset = createOrGetListRecord(ref); long tailOffset = writeValueAndLength(kv); addRecordToList(lrPtrOffset, tailOffset); byte oldStateByte = Ref.getStateByte(ref); byte stateByte = kv.updateStateByte(oldStateByte);
long recOffset = getFirstRecordLengthsOffset(ref, null); long tailOffset = Ref.getOffset(ref); writeBuffers.setUnsafeReadPoint(recOffset); writeBuffers.populateValue(fakeRef); System.arraycopy(fakeRef.getBytes(), (int)fakeRef.getOffset(), key, 0, keyLength); dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); getValueResult(key, 0, key.length, hashMapResult, null); List<WriteBuffers.ByteSegmentRef> results = new ArrayList<WriteBuffers.ByteSegmentRef>(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
@Test public void testPutWithFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, 1f, WB_SIZE); UniqueKeysKvSource kv = new UniqueKeysKvSource(); for (int i = 0; i < CAPACITY; ++i) { map.put(kv, -1); } for (int i = 0; i < kv.keys.size(); ++i) { verifyHashMapResult(map, kv.keys.get(i), kv.values.get(i)); } assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); map.getValueResult(new byte[0], 0, 0, hashMapResult, null); }
/** * Same as {@link #isSameKey(long, int, long, int)} but for externally stored key. */ private boolean isSameKey(byte[] key, int offset, int length, long ref, int hashCode, WriteBuffers.Position readPos) { if (!compareHashBits(ref, hashCode)) { return false; // Hash bits don't match. } writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); int valueLength = (int)writeBuffers.readVLong(readPos), keyLength = (int)writeBuffers.readVLong(readPos); long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); // See the comment in the other isSameKey if (offset == 0) { return writeBuffers.isEqual(key, length, keyOffset, keyLength); } else { return writeBuffers.isEqual(key, offset, length, keyOffset, keyLength); } }
@Test public void testGetNonExistent() throws Exception { BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, LOAD_FACTOR, WB_SIZE); RandomKvSource kv = new RandomKvSource(1, 100); map.put(kv, -1); byte[] key = kv.getLastKey(); key[0] = (byte)(key[0] + 1); FixedKeyKvSource kv2 = new FixedKeyKvSource(kv.getLastKey(), 0, 100); map.put(kv2, -1); key[0] = (byte)(key[0] + 1); BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); map.getValueResult(key, 0, key.length, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); map.getValueResult(key, 0, 0, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); }
rowCount += restoredHashMap.getNumValues(); LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition..."); LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount); Writable val = pair.getSecond(); writeHelper.setKeyValue(key, val); restoredHashMap.put(writeHelper, -1); + restoredHashMap.getNumValues()); kvContainer.clear();
@Test public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(1, 0.0000001f, WB_SIZE); UniqueKeysKvSource kv = new UniqueKeysKvSource(); for (int i = 0; i < 18; ++i) { map.put(kv, -1); for (int j = 0; j <= i; ++j) { verifyHashMapResult(map, kv.keys.get(j), kv.values.get(j)); } } assertEquals(1 << 18, map.getCapacity()); }
@Test public void testPutGetMultiple() throws Exception { BytesBytesMultiHashMap map = new BytesBytesMultiHashMap(CAPACITY, LOAD_FACTOR, WB_SIZE); RandomKvSource kv = new RandomKvSource(0, 100); map.put(kv, -1); verifyHashMapResult(map, kv.getLastKey(), kv.getLastValue()); FixedKeyKvSource kv2 = new FixedKeyKvSource(kv.getLastKey(), 0, 100); kv2.values.add(kv.getLastValue()); for (int i = 0; i < 3; ++i) { map.put(kv2, -1); verifyHashMapResult(map, kv2.key, kv2.values.toArray(new byte[kv2.values.size()][])); } }
hybridHtContainer.setTotalInMemRowCount( hybridHtContainer.getTotalInMemRowCount() - hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear();
public HashPartition(int initialCapacity, float loadFactor, int wbSize, long maxProbeSize, boolean createHashMap, String spillLocalDirs) { if (createHashMap) { // Probe space should be at least equal to the size of our designated wbSize maxProbeSize = Math.max(maxProbeSize, wbSize); hashMap = new BytesBytesMultiHashMap(initialCapacity, loadFactor, wbSize, maxProbeSize); } else { hashMapSpilledOnCreation = true; hashMapOnDisk = true; } this.spillLocalDirs = spillLocalDirs; this.initialCapacity = initialCapacity; this.loadFactor = loadFactor; this.wbSize = wbSize; }
@Override public void dumpMetrics() { for (int i = 0; i < hashPartitions.length; i++) { HashPartition hp = hashPartitions[i]; if (hp.hashMap != null) { hp.hashMap.debugDumpMetrics(); } } }
public void clear() { if (hashMap != null) { hashMap.clear(); hashMap = null; } if (hashMapLocalPath != null) { try { Files.delete(hashMapLocalPath); } catch (Throwable ignored) { } hashMapLocalPath = null; rowsOnDisk = 0; hashMapOnDisk = false; } if (sidefileKVContainer != null) { sidefileKVContainer.clear(); sidefileKVContainer = null; } if (matchfileObjContainer != null) { matchfileObjContainer.clear(); matchfileObjContainer = null; } if (matchfileRowBytesContainer != null) { matchfileRowBytesContainer.clear(); matchfileRowBytesContainer = null; } }
public void put(KvSource kv, int keyHashCode) throws SerDeException { if (resizeThreshold <= keysAssigned) { expandAndRehash(); int hashCode = (keyHashCode == -1) ? writeBuffers.unsafeHashCode(keyOffset, keyLength) : keyHashCode; int slot = findKeySlotToWrite(keyOffset, keyLength, hashCode); if (ref == 0) { long tailOffset = writeFirstValueRecord(kv, keyOffset, keyLength, hashCode); byte stateByte = kv.updateStateByte(null); refs[slot] = Ref.makeFirstRef(tailOffset, stateByte, hashCode, startingHashBitCount); long lrPtrOffset = createOrGetListRecord(ref); long tailOffset = writeValueAndLength(kv); addRecordToList(lrPtrOffset, tailOffset); byte oldStateByte = Ref.getStateByte(ref); byte stateByte = kv.updateStateByte(oldStateByte);
long recOffset = getFirstRecordLengthsOffset(ref, null); long tailOffset = Ref.getOffset(ref); writeBuffers.setUnsafeReadPoint(recOffset); writeBuffers.populateValue(fakeRef); System.arraycopy(fakeRef.getBytes(), (int)fakeRef.getOffset(), key, 0, keyLength); dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); getValueResult(key, 0, key.length, hashMapResult); List<WriteBuffers.ByteSegmentRef> results = new ArrayList<WriteBuffers.ByteSegmentRef>(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
int inMemRowCount = partition.hashMap.getNumValues(); if (inMemRowCount == 0) { LOG.warn("Trying to spill an empty hash partition! It may be due to " + ", Mem size: " + partition.hashMap.memorySize() + "): " + file); LOG.info("Memory usage before spilling: " + memoryUsed); long memFreed = partition.hashMap.memorySize(); memoryUsed -= memFreed; LOG.info("Memory usage after spilling: " + memoryUsed); partition.hashMap.clear(); partition.hashMap = null; return memFreed;