@Override protected IOContext initialValue() { return new IOContext(); } };
protected void updateIOContext() throws IOException { long pointerPos = this.getPos(); if (!ioCxtRef.isBlockPointer()) { ioCxtRef.setCurrentBlockStart(pointerPos); ioCxtRef.setCurrentRow(0); return; } ioCxtRef.setCurrentRow(ioCxtRef.getCurrentRow() + 1); if (ioCxtRef.getNextBlockStart() == -1) { ioCxtRef.setNextBlockStart(pointerPos); ioCxtRef.setCurrentRow(0); } if (pointerPos != ioCxtRef.getNextBlockStart()) { // the reader pointer has moved to the end of next block, or the end of // current record. ioCxtRef.setCurrentRow(0); if (ioCxtRef.getCurrentBlockStart() == ioCxtRef.getNextBlockStart()) { ioCxtRef.setCurrentRow(1); } ioCxtRef.setCurrentBlockStart(ioCxtRef.getNextBlockStart()); ioCxtRef.setNextBlockStart(pointerPos); } }
if (conf.isSortedFilter() && ioContext.useSorted()) { if (!(conditionEvaluator instanceof ExprNodeGenericFuncEvaluator)) { LOG.error("Attempted to use the fact data is sorted when the conditionEvaluator is not " + "of type ExprNodeGenericFuncEvaluator"); ioContext.setUseSorted(false); return; } else { ioContext.setComparison(((ExprNodeGenericFuncEvaluator)conditionEvaluator).compare(row)); if (ioContext.getGenericUDFClassName() == null) { ioContext.setGenericUDFClassName( ((ExprNodeGenericFuncEvaluator)conditionEvaluator).genericUDF.getClass().getName()); if (ioContext.isBinarySearching()) { consecutiveSearches++; if (ioContext.isBinarySearching()) { return;
private void initIOContext(long startPos, boolean isBlockPointer, Path inputPath) { ioCxtRef = this.getIOContext(); ioCxtRef.setCurrentBlockStart(startPos); ioCxtRef.setBlockPointer(isBlockPointer); ioCxtRef.setInputPath(inputPath); LOG.debug("Processing file " + inputPath); // Logged at INFO in multiple other places. initDone = true; }
private void resetIOContext() { conf.set(Utilities.INPUT_NAME, "TestHiveBinarySearchRecordReader"); ioContext = IOContextMap.get(conf); ioContext.setUseSorted(false); ioContext.setBinarySearching(false); ioContext.setEndBinarySearch(false); ioContext.setComparison(null); ioContext.setGenericUDFClassName(null); }
public boolean doNext(K key, V value) throws IOException { if (this.isSorted) { if (this.getIOContext().shouldEndBinarySearch() || (!this.getIOContext().useSorted() && this.wasUsingSortedSearch)) { beginLinearSearch(); this.wasUsingSortedSearch = false; this.getIOContext().setEndBinarySearch(false); if (this.getIOContext().useSorted()) { if (this.genericUDFClassName == null && this.getIOContext().getGenericUDFClassName() != null) { setGenericUDFClassName(this.getIOContext().getGenericUDFClassName()); if (this.getIOContext().isBinarySearching()) { if (this.getIOContext().getComparison() != null) { switch (this.getIOContext().getComparison()) { case GREATER: case EQUAL: this.getIOContext().setBinarySearching(false); sync(rangeStart); if (this.ioCxtRef.getCurrentBlockStart() == 0) { Path filePath = this.ioCxtRef.getInputPath(); PartitionDesc part = null; try { filePath, IOPrepareCache.get().getPartitionDescMap());
public void testEqualOpClass() throws Exception { init(); ioContext.setGenericUDFClassName(GenericUDFOPEqual.class.getName()); Assert.assertTrue(ioContext.isBinarySearching()); Assert.assertTrue(executeDoNext(hbsReader)); ioContext.setBinarySearching(false); ioContext.setComparison(-1); Assert.assertTrue(executeDoNext(hbsReader)); ioContext.setComparison(0); Assert.assertTrue(executeDoNext(hbsReader)); ioContext.setComparison(1); Assert.assertFalse(executeDoNext(hbsReader)); }
long current = ctx.getIoCxt().getCurrentBlockStart(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { long current = ctx.getIoCxt().getCurrentRow(); LongWritable old = (LongWritable) vcValues[i]; if (old == null) { if(ctx.getIoCxt().getRecordIdentifier() == null) { vcValues[i] = null; vcValues[i] = new Object[RecordIdentifier.Field.values().length]; RecordIdentifier.StructInfo.toArray(ctx.getIoCxt().getRecordIdentifier(), (Object[])vcValues[i]); ctx.getIoCxt().setRecordIdentifier(null);//so we don't accidentally cache the value; shouldn't
public void testResetRange() throws Exception { init(); InOrder inOrder = inOrder(rcfReader); Assert.assertTrue(executeDoNext(hbsReader)); inOrder.verify(rcfReader).sync(50); ioContext.setComparison(-1); when(rcfReader.getPos()).thenReturn(75L); Assert.assertTrue(executeDoNext(hbsReader)); inOrder.verify(rcfReader).sync(75); ioContext.setEndBinarySearch(true); // This should make the search linear, sync to the beginning of the block being searched // [50, 100], set the comparison to be null, and the flag to reset the range should be unset Assert.assertTrue(executeDoNext(hbsReader)); inOrder.verify(rcfReader).sync(50); Assert.assertFalse(ioContext.isBinarySearching()); Assert.assertFalse(ioContext.shouldEndBinarySearch()); }
public void initIOContextSortedProps(FileSplit split, RecordReader recordReader, JobConf job) { this.jobConf = job; this.getIOContext().resetSortingValues(); this.isSorted = jobConf.getBoolean("hive.input.format.sorted", false); this.rangeStart = split.getStart(); this.rangeEnd = split.getStart() + split.getLength(); this.splitEnd = rangeEnd; if (recordReader instanceof RCFileRecordReader && rangeEnd != 0 && this.isSorted) { // Binary search only works if we know the size of the split, and the recordReader is an // RCFileRecordReader this.getIOContext().setUseSorted(true); this.getIOContext().setBinarySearching(true); this.wasUsingSortedSearch = true; } else { // Use the defalut methods for next in the child class this.isSorted = false; } }
public void testLessThanOpClass() throws Exception { init(); ioContext.setGenericUDFClassName(GenericUDFOPLessThan.class.getName()); Assert.assertTrue(executeDoNext(hbsReader)); Assert.assertFalse(ioContext.isBinarySearching()); ioContext.setComparison(-1); Assert.assertTrue(executeDoNext(hbsReader)); ioContext.setComparison(0); Assert.assertFalse(executeDoNext(hbsReader)); ioContext.setComparison(1); Assert.assertFalse(executeDoNext(hbsReader)); }
abort = execContext.getIoCxt().getIOExceptions();
@Override public boolean next(K key, V value) throws IOException { if(!initDone) { throw new IOException("Hive IOContext is not inited."); } updateIOContext(); try { boolean retVal = doNext(key, value); if(retVal) { if(key instanceof RecordIdentifier) { //supports AcidInputFormat which uses the KEY pass ROW__ID info ioCxtRef.setRecordIdentifier((RecordIdentifier)key); } else if(recordReader instanceof AcidInputFormat.AcidRecordReader) { //supports AcidInputFormat which do not use the KEY pass ROW__ID info ioCxtRef.setRecordIdentifier(((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier()); } } return retVal; } catch (IOException e) { ioCxtRef.setIOExceptions(true); throw e; } }
@Override public float getProgress() throws IOException { if (this.getIOContext().isBinarySearching()) { return 0; } else { return recordReader.getProgress(); } }
public void testHitSamePositionTwice() throws Exception { init(); Assert.assertTrue(executeDoNext(hbsReader)); verify(rcfReader).sync(50); ioContext.setComparison(1); // When getPos is called it should return the same value, signaling the end of the search, so // the search should continue linearly and it should sync to the beginning of the block [0, 50] Assert.assertTrue(executeDoNext(hbsReader)); InOrder inOrder = inOrder(rcfReader); inOrder.verify(rcfReader).sync(25); inOrder.verify(rcfReader).sync(0); Assert.assertFalse(ioContext.isBinarySearching()); }
@Override public boolean doNext(WritableComparable key, Writable value) throws IOException { // if current pos is larger than shrinkedLength which is calculated for // each split by table sampling, stop fetching any more (early exit) if (target.shrinkedLength > 0 && context.getIoCxt().getCurrentBlockStart() > target.shrinkedLength) { return false; } return super.doNext(key, value); } };
public void testNonLinearEqualTo() throws Exception { init(); Assert.assertTrue(executeDoNext(hbsReader)); verify(rcfReader).sync(50); ioContext.setComparison(0); when(rcfReader.getPos()).thenReturn(25L); // By setting the comparison to equal, the search should use the block [0, 50] Assert.assertTrue(executeDoNext(hbsReader)); verify(rcfReader).sync(25); }
/** * This should be called after the binary search is finished and before the linear scan begins * @throws IOException */ private void beginLinearSearch() throws IOException { sync(rangeStart); this.getIOContext().setBinarySearching(false); this.wasUsingSortedSearch = false; }
public static IOContext get(Configuration conf) { if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { return get(); } String inputName = conf.get(Utilities.INPUT_NAME); if (!inputNameIOContextMap.containsKey(inputName)) { IOContext ioContext = new IOContext(); inputNameIOContextMap.put(inputName, ioContext); } return inputNameIOContextMap.get(inputName); }
/** * @return true if there are more key-values and advances to next key-values * @throws IOException */ @Override public boolean next() throws IOException { //add the previous nextKVReader back to queue if(nextKVReader != null){ addToQueue(nextKVReader); } //get the new nextKVReader with lowest key nextKVReader = pQueue.poll(); if (nextKVReader == null) return false; if (nextKVReader != prevKVReader) { prevKVReader = nextKVReader; // update path in IOContext ioCxt.setInputPath(kvReaderPathMap.get(nextKVReader)); } return true; }