public DummyOperator() { super(new CompilationOpContext()); }
public void resetOpContext() { opContext = new CompilationOpContext(); sequencer = new AtomicInteger(); }
public void resetOpContext(){ opContext = new CompilationOpContext(); sequencer = new AtomicInteger(); }
/** * Create a Context with a given executionId. ExecutionId, together with * user name and conf, will determine the temporary directory locations. */ private Context(Configuration conf, String executionId) { this.conf = conf; this.executionId = executionId; this.rewrittenStatementContexts = new HashSet<>(); // local & non-local tmp location is configurable. however it is the same across // all external file systems nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId); localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath(); scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR); opContext = new CompilationOpContext(); viewsTokenRewriteStreams = new HashMap<>(); }
/** * Create a Context with a given executionId. ExecutionId, together with * user name and conf, will determine the temporary directory locations. */ public Context(Configuration conf, String executionId) { this.conf = conf; this.executionId = executionId; // local & non-local tmp location is configurable. however it is the same across // all external file systems nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId); localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath(); scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR); opContext = new CompilationOpContext(); viewsTokenRewriteStreams = new HashMap<>(); }
@Override protected void setUp() { mr = PlanUtils.getMapRedWork(); ctx = new CompilationOpContext(); }
public static FakeVectorDataSourceOperator addFakeVectorDataSourceParent( Iterable<VectorizedRowBatch> source, Operator<? extends OperatorDesc> op) { FakeVectorDataSourceOperator parent = new FakeVectorDataSourceOperator( new CompilationOpContext(), source); List<Operator<? extends OperatorDesc>> listParents = new ArrayList<Operator<? extends OperatorDesc>>(1); listParents.add(parent); op.setParentOperators(listParents); List<Operator<? extends OperatorDesc>> listChildren = new ArrayList<Operator<? extends OperatorDesc>>(1); listChildren.add(op); parent.setChildOperators(listChildren); return parent; }
MapredLocalTask localTask = new MapredLocalTask(localWork, job, false); HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc); sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
MapredLocalTask localTask = new MapredLocalTask(localWork, job, false); HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc); sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
public void testScriptOperatorBlacklistedEnvVarsProcessing() { ScriptOperator scriptOperator = new ScriptOperator(new CompilationOpContext()); Configuration hconf = new JobConf(ScriptOperator.class); Map<String, String> env = new HashMap<String, String>(); HiveConf.setVar(hconf, HiveConf.ConfVars.HIVESCRIPT_ENV_BLACKLIST, "foobar"); hconf.set("foobar", "foobar"); hconf.set("barfoo", "barfoo"); scriptOperator.addJobConfToEnvironment(hconf, env); Assert.assertFalse(env.containsKey("foobar")); Assert.assertTrue(env.containsKey("barfoo")); }
private void validateVectorLimitOperator(int limit, int batchSize, int expectedBatchSize) throws HiveException { @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables frboi = new FakeVectorRowBatchFromObjectIterables( batchSize, new String[] {"tinyint", "double"}, Arrays.asList(new Object[] {1, 2, 3, 4}), Arrays.asList(new Object[] {323.0, 34.5, null, 89.3})); // Get next batch VectorizedRowBatch vrb = frboi.produceNextBatch(); // Create limit desc with limit value LimitDesc ld = new LimitDesc(limit); VectorLimitDesc vectorDesc = new VectorLimitDesc(); VectorLimitOperator lo = new VectorLimitOperator( new CompilationOpContext(), ld, null, vectorDesc); lo.initialize(new Configuration(), null); // Process the batch lo.process(vrb, 0); // Verify batch size Assert.assertEquals(vrb.size, expectedBatchSize); } }
private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); List<String> columns = new ArrayList<String>(); columns.add("col1"); FilterDesc fdesc = new FilterDesc(); fdesc.setPredicate(col1Expr); VectorFilterDesc vectorDesc = new VectorFilterDesc(); Operator<? extends OperatorDesc> filterOp = OperatorFactory.get(new CompilationOpContext(), fdesc); VectorizationContext vc = new VectorizationContext("name", columns); return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc, vectorDesc); }
/** * testValidateMapJoinOperator validates that the Map join operator can be vectorized. */ @Test public void testValidateMapJoinOperator() { MapJoinOperator map = new MapJoinOperator(new CompilationOpContext()); MapJoinDesc mjdesc = new MapJoinDesc(); prepareAbstractMapJoin(map, mjdesc); map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); vectorizer.testSetCurrentBaseWork(new MapWork()); // UNDONE // Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); }
/** * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized. */ @Test public void testValidateSMBJoinOperator() { SMBMapJoinOperator map = new SMBMapJoinOperator(new CompilationOpContext()); SMBJoinDesc mjdesc = new SMBJoinDesc(); prepareAbstractMapJoin(map, mjdesc); map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); vectorizer.testSetCurrentBaseWork(new MapWork()); // UNDONE // Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); }
public void testAggregateDoubleIterable ( String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException { List<String> mapColumnNames = new ArrayList<String>(); mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Pair<GroupByDesc,VectorGroupByDesc> pair = buildGroupByDescType (ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.doubleTypeInfo); GroupByDesc desc = pair.fst; VectorGroupByDesc vectorDesc = pair.snd; CompilationOpContext cCtx = new CompilationOpContext(); Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc); VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc); FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { vgo.process(unit, 0); } vgo.close(false); List<Object> outBatchList = out.getCapturedRows(); assertNotNull(outBatchList); assertEquals(1, outBatchList.size()); Object result = outBatchList.get(0); Validator validator = getValidator(aggregateName); validator.validate("_total", expected, result); }
public void testAggregateCountReduceIterable ( Iterable<VectorizedRowBatch> data, Object expected) throws HiveException { List<String> mapColumnNames = new ArrayList<String>(); mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Pair<GroupByDesc,VectorGroupByDesc> pair = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo); GroupByDesc desc = pair.fst; VectorGroupByDesc vectorDesc = pair.snd; vectorDesc.setProcessingMode(ProcessingMode.GLOBAL); // Use GLOBAL when no key for Reduce. CompilationOpContext cCtx = new CompilationOpContext(); Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc); VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc); FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { vgo.process(unit, 0); } vgo.close(false); List<Object> outBatchList = out.getCapturedRows(); assertNotNull(outBatchList); assertEquals(1, outBatchList.size()); Object result = outBatchList.get(0); Validator validator = getValidator("count"); validator.validate("_total", expected, result); }
public void testAggregateLongIterable ( String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException { List<String> mapColumnNames = new ArrayList<String>(); mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Pair<GroupByDesc,VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo); GroupByDesc desc = pair.fst; VectorGroupByDesc vectorDesc = pair.snd; CompilationOpContext cCtx = new CompilationOpContext(); Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc); VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc); FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { vgo.process(unit, 0); } vgo.close(false); List<Object> outBatchList = out.getCapturedRows(); assertNotNull(outBatchList); assertEquals(1, outBatchList.size()); Object result = outBatchList.get(0); Validator validator = getValidator(aggregateName); validator.validate("_total", expected, result); }
public void testAggregateDecimalIterable ( String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException { List<String> mapColumnNames = new ArrayList<String>(); mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Pair<GroupByDesc,VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); GroupByDesc desc = pair.fst; VectorGroupByDesc vectorDesc = pair.snd; CompilationOpContext cCtx = new CompilationOpContext(); Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc); VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc); FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); for (VectorizedRowBatch unit : data) { vgo.process(unit, 0); } vgo.close(false); List<Object> outBatchList = out.getCapturedRows(); assertNotNull(outBatchList); assertEquals(1, outBatchList.size()); Object result = outBatchList.get(0); Validator validator = getValidator(aggregateName); validator.validate("_total", expected, result); }
public void testAggregateCountStarIterable ( Iterable<VectorizedRowBatch> data, Object expected) throws HiveException { List<String> mapColumnNames = new ArrayList<String>(); mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Pair<GroupByDesc,VectorGroupByDesc> pair = buildGroupByDescCountStar (ctx); GroupByDesc desc = pair.fst; VectorGroupByDesc vectorDesc = pair.snd; vectorDesc.setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc); VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc); FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo); vgo.initialize(hconf, null); for (VectorizedRowBatch unit: data) { vgo.process(unit, 0); } vgo.close(false); List<Object> outBatchList = out.getCapturedRows(); assertNotNull(outBatchList); assertEquals(1, outBatchList.size()); Object result = outBatchList.get(0); Validator validator = getValidator("count"); validator.validate("_total", expected, result); }
/** * This method drives the test. It takes the data from getBaseTable() and * feeds it through a SELECT operator with a COLLECT operator after. Each * row that is produced by the collect operator is compared to getExpectedResult() * and if every row is the expected result the method completes without asserting. * @throws HiveException */ public final void testUdf() throws HiveException { InspectableObject [] data = getBaseTable(); List<ExprNodeDesc> expressionList = getExpressionList(); SelectDesc selectCtx = new SelectDesc(expressionList, OperatorTestUtils.createOutputColumnNames(expressionList)); Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class); op.setConf(selectCtx); CollectDesc cd = new CollectDesc(Integer.valueOf(10)); CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op); op.initialize(new JobConf(OperatorTestUtils.class), new ObjectInspector[] {data[0].oi}); OperatorTestUtils.assertResults(op, cdop, data, getExpectedResult()); }