public HiveBaseFunctionResultList(Iterator<T> inputIterator) { this.inputIterator = inputIterator; this.lastRecordOutput = new HiveKVResultCache(); }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
public synchronized void add(HiveKey key, BytesWritable value) { if (writeCursor >= IN_MEMORY_NUM_ROWS) { // Write buffer is full if (!readBufferUsed) { // Read buffer isn't used, switch buffer switchBufferAndResetCursor(); } else { // Need to spill from write buffer to disk try { if (output == null) { setupOutput(); } for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) { ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[i]; writeHiveKey(output, pair.getFirst()); writeValue(output, pair.getSecond()); pair.setFirst(null); pair.setSecond(null); } writeCursor = 0; } catch (Exception e) { clear(); // Clean up the cache throw new RuntimeException("Failed to spill rows to disk", e); } } } ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[writeCursor++]; pair.setFirst(key); pair.setSecond(value); }
public synchronized Tuple2<HiveKey, BytesWritable> next() { Preconditions.checkState(hasNext()); if (!readBufferUsed) { try { pair.setFirst(readHiveKey(input)); pair.setSecond(readValue(input)); } else { switchBufferAndResetCursor(); clear(); // Clean up the cache throw new RuntimeException("Failed to load rows from disk", e);
@Test public void testSimple() throws Exception { // Create KV result cache object, add one (k,v) pair and retrieve them. HiveKVResultCache cache = new HiveKVResultCache(); HiveKey key = new HiveKey("key".getBytes(), "key".hashCode()); BytesWritable value = new BytesWritable("value".getBytes()); cache.add(key, value); assertTrue("KV result cache should have at least one element", cache.hasNext()); Tuple2<HiveKey, BytesWritable> row = cache.next(); assertTrue("Incorrect key", row._1().equals(key)); assertTrue("Incorrect value", row._2().equals(value)); assertTrue("Cache shouldn't have more records", !cache.hasNext()); }
/** Helper method which inserts numRecords and retrieves them from cache and verifies */ private void testSpillingHelper(HiveKVResultCache cache, int numRecords) { for(int i=0; i<numRecords; i++) { String key = "key_" + i; String value = "value_" + i; cache.add(new HiveKey(key.getBytes(), key.hashCode()), new BytesWritable(value.getBytes())); } int recordsSeen = 0; while(cache.hasNext()) { String key = "key_" + recordsSeen; String value = "value_" + recordsSeen; Tuple2<HiveKey, BytesWritable> row = cache.next(); assertTrue("Unexpected key at position: " + recordsSeen, new String(row._1().getBytes()).equals(key)); assertTrue("Unexpected value at position: " + recordsSeen, new String(row._2().getBytes()).equals(value)); recordsSeen++; } assertTrue("Retrieved record count doesn't match inserted record count", numRecords == recordsSeen); cache.clear(); }
@Override public Tuple2<HiveKey, BytesWritable> next() { if (hasNext()) { return lastRecordOutput.next(); } throw new NoSuchElementException("There are no more elements"); }
public synchronized Tuple2<HiveKey, BytesWritable> next() { Preconditions.checkState(hasNext()); if (!readBufferUsed) { try { pair.setFirst(readHiveKey(input)); pair.setSecond(readValue(input)); } else { switchBufferAndResetCursor(); clear(); // Clean up the cache throw new RuntimeException("Failed to load rows from disk", e);
@Override public Tuple2<HiveKey, BytesWritable> next() { if (hasNext()) { return lastRecordOutput.next(); } throw new NoSuchElementException("There are no more elements"); }
public synchronized Tuple2<HiveKey, BytesWritable> next() { Preconditions.checkState(hasNext()); if (!readBufferUsed) { try { pair.setFirst(readHiveKey(input)); pair.setSecond(readValue(input)); } else { switchBufferAndResetCursor(); clear(); // Clean up the cache throw new RuntimeException("Failed to load rows from disk", e);
public synchronized void add(HiveKey key, BytesWritable value) { if (writeCursor >= IN_MEMORY_NUM_ROWS) { // Write buffer is full if (!readBufferUsed) { // Read buffer isn't used, switch buffer switchBufferAndResetCursor(); } else { // Need to spill from write buffer to disk try { if (output == null) { setupOutput(); } for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) { ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[i]; writeHiveKey(output, pair.getFirst()); writeValue(output, pair.getSecond()); pair.setFirst(null); pair.setSecond(null); } writeCursor = 0; } catch (Exception e) { clear(); // Clean up the cache throw new RuntimeException("Failed to spill rows to disk", e); } } } ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[writeCursor++]; pair.setFirst(key); pair.setSecond(value); }
public HiveBaseFunctionResultList(Iterator<T> inputIterator) { this.inputIterator = inputIterator; this.lastRecordOutput = new HiveKVResultCache(); }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
public synchronized void add(HiveKey key, BytesWritable value) { if (writeCursor >= IN_MEMORY_NUM_ROWS) { // Write buffer is full if (!readBufferUsed) { // Read buffer isn't used, switch buffer switchBufferAndResetCursor(); } else { // Need to spill from write buffer to disk try { if (output == null) { setupOutput(); } for (int i = 0; i < IN_MEMORY_NUM_ROWS; i++) { ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[i]; writeHiveKey(output, pair.getFirst()); writeValue(output, pair.getSecond()); pair.setFirst(null); pair.setSecond(null); } writeCursor = 0; } catch (Exception e) { clear(); // Clean up the cache throw new RuntimeException("Failed to spill rows to disk", e); } } } ObjectPair<HiveKey, BytesWritable> pair = writeBuffer[writeCursor++]; pair.setFirst(key); pair.setSecond(value); }
@Test public void testSpilling() throws Exception { HiveKVResultCache cache = new HiveKVResultCache(); final int recordCount = HiveKVResultCache.IN_MEMORY_NUM_ROWS * 3; // Test using the same cache where first n rows are inserted then cache is cleared. // Next reuse the same cache and insert another m rows and verify the cache stores correctly. // This simulates reusing the same cache over and over again. testSpillingHelper(cache, recordCount); testSpillingHelper(cache, 1); testSpillingHelper(cache, recordCount); }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
public HiveBaseFunctionResultList(Iterator<T> inputIterator) { this.inputIterator = inputIterator; this.lastRecordOutput = new HiveKVResultCache(); }