public PTFDesc translate(PTFInvocationSpec qSpec, SemanticAnalyzer semAly, HiveConf hCfg, RowResolver inputRR, UnparseTranslator unparseT) throws SemanticException { init(semAly, hCfg, inputRR, unparseT); ptfInvocation = qSpec; ptfDesc = new PTFDesc(); ptfDesc.setCfg(hCfg); ptfDesc.setLlInfo(llInfo); translatePTFChain(); PTFDeserializer.alterOutputOIForStreaming(ptfDesc); return ptfDesc; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; PTFOperator op = (PTFOperator) nd; PTFDesc conf = op.getConf(); //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. PartitionedTableFunctionDef funcDef = conf.getFuncDef(); List<String> referencedColumns = funcDef.getReferencedColumns(); if (!conf.forWindowing() && !conf.forNoop() && referencedColumns == null) { return super.process(nd, stack, cppCtx, nodeOutputs); } List<FieldNode> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); if (conf.forWindowing()) { WindowTableFunctionDef def = (WindowTableFunctionDef) funcDef; prunedCols = mergeFieldNodes(prunedCols, getWindowFunctionColumns(def)); } else if (conf.forNoop()) { prunedCols = new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); } else { prunedCols = fromColumnNames(referencedColumns); } List<ColumnInfo> newRS = prunedColumnsList(prunedCols, op.getSchema(), funcDef); op.getSchema().setSignature(new ArrayList<ColumnInfo>(newRS)); ShapeDetails outputShape = funcDef.getStartOfChain().getInput().getOutputShape(); cppCtx.getPrunedColLists().put(op, fromColumnNames(outputShape.getColumnNames())); return null; }
PartitionedTableFunctionDef tabDef = tabFn.getTableDef(); PTFInputDef inputDef = tabDef.getInput(); ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? inputObjInspectors[0] : inputDef.getOutputShape().getOI(); AbstractSerDe serde = conf.isMapSide() ? tabDef.getInput().getOutputShape().getSerde() : tabDef.getRawInputShape().getSerde(); StructObjectInspector outputOI = conf.isMapSide() ? tabDef.getInput().getOutputShape().getOI() : tabDef.getRawInputShape().getOI(); inputPart = PTFPartition.create(conf.getCfg(), serde, (StructObjectInspector) inputOI,
@Override protected void initializeOp(Configuration jobConf) throws HiveException { super.initializeOp(jobConf); hiveConf = jobConf; isMapOperator = conf.isMapSide(); currentKeys = null; reconstructQueryDef(hiveConf); if (isMapOperator) { PartitionedTableFunctionDef tDef = conf.getStartOfChain(); outputObjInspector = tDef.getRawInputShape().getOI(); } else { outputObjInspector = conf.getFuncDef().getOutputShape().getOI(); } setupKeysWrapper(inputObjInspectors[0]); ptfInvocation = setupChain(); ptfInvocation.initializeStreaming(jobConf, isMapOperator); firstMapRow = true; }
init(semAly, hCfg, inputRR, unparseT); windowingSpec = wdwSpec; ptfDesc = new PTFDesc(); ptfDesc.setCfg(hCfg); ptfDesc.setLlInfo(llInfo); WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef(); ptfDesc.setFuncDef(wdwTFnDef);
boolean isMapSide = ptfDesc.isMapSide(); if (isMapSide) { setOperatorIssue("PTF Mapper not supported"); boolean forNoop = ptfDesc.forNoop(); if (forNoop) { setOperatorIssue("NOOP not supported"); return false; boolean forWindowing = ptfDesc.forWindowing(); if (!forWindowing) { setOperatorIssue("Windowing required"); return false; PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef); if (!isWindowTableFunctionDef) {
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain(); ptfDesc.setMapSide(true); input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR); RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()),
public PTFPartition execute(PTFPartition iPart) throws HiveException { if ( ptfDesc.isMapSide() ) { return transformRawInput(iPart); } PTFPartitionIterator<Object> pItr = iPart.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc.getLlInfo(), pItr); if ( outputPartition == null ) { outputPartition = PTFPartition.create(ptfDesc.getCfg(), tableDef.getOutputShape().getSerde(), OI, tableDef.getOutputShape().getOI()); } else { outputPartition.reset(); } execute(pItr, outputPartition); return outputPartition; }
/** * Initialize the visitor to use the QueryDefDeserializer Use the order * defined in QueryDefWalker to visit the QueryDef * * @param hiveConf * @throws HiveException */ protected void reconstructQueryDef(Configuration hiveConf) throws HiveException { PTFDeserializer dS = new PTFDeserializer(conf, (StructObjectInspector)inputObjInspectors[0], hiveConf); dS.initializePTFChain(conf.getFuncDef()); }
private void pushRankLimit(PTFOperator ptfOp, OpWalkerInfo owi) throws SemanticException { PTFDesc conf = ptfOp.getConf(); if ( !conf.forWindowing() ) { return; WindowTableFunctionDef wTFn = (WindowTableFunctionDef) conf.getFuncDef(); List<Integer> rFnIdxs = rankingFunctions(wTFn);
private Object evaluateWindowFunction(WindowFunctionDef wFn, int rowToProcess, PTFPartition partition) throws HiveException { BasePartitionEvaluator partitionEval = wFn.getWFnEval() .getPartitionWindowingEvaluator(wFn.getWindowFrame(), partition, wFn.getArgs(), wFn.getOI(), nullsLast); return partitionEval.iterate(rowToProcess, ptfDesc.getLlInfo()); }
void initializeStreaming(Configuration cfg, boolean isMapSide) throws HiveException { PartitionedTableFunctionDef tabDef = tabFn.getTableDef(); PTFInputDef inputDef = tabDef.getInput(); ObjectInspector inputOI = conf.getStartOfChain() == tabDef ? inputObjInspectors[0] : inputDef.getOutputShape().getOI(); tabFn.initializeStreaming(cfg, (StructObjectInspector) inputOI, isMapSide); if ( next != null ) { next.initializeStreaming(cfg, isMapSide); } }
public PTFPartition execute(PTFPartition iPart) throws HiveException { if ( ptfDesc.isMapSide() ) { return transformRawInput(iPart); } PTFPartitionIterator<Object> pItr = iPart.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc, pItr); if ( outputPartition == null ) { outputPartition = PTFPartition.create(ptfDesc.getCfg(), tableDef.getOutputShape().getSerde(), OI, tableDef.getOutputShape().getOI()); } else { outputPartition.reset(); } execute(pItr, outputPartition); return outputPartition; }
public Iterator<Object> iterator(PTFPartitionIterator<Object> pItr) throws HiveException { if ( ptfDesc.isMapSide() ) { return transformRawInputIterator(pItr); } if (!canIterateOutput()) { throw new HiveException( "Internal error: iterator called on a PTF that cannot provide its output as an Iterator"); } throw new HiveException(String.format( "Internal error: PTF %s, provides no iterator method", getClass().getName())); }
private void translatePTFChain() throws SemanticException { Deque<PTFInputSpec> ptfChain = new ArrayDeque<PTFInvocationSpec.PTFInputSpec>(); PTFInputSpec currentSpec = ptfInvocation.getFunction(); while (currentSpec != null) { ptfChain.push(currentSpec); currentSpec = currentSpec.getInput(); } int inputNum = 0; PTFInputDef currentDef = null; while (!ptfChain.isEmpty()) { currentSpec = ptfChain.pop(); if (currentSpec instanceof PTFQueryInputSpec) { currentDef = translate((PTFQueryInputSpec) currentSpec, inputNum); } else { currentDef = translate((PartitionedTableFunctionSpec) currentSpec, currentDef, inputNum); } inputNum++; } ptfDesc.setFuncDef((PartitionedTableFunctionDef) currentDef); }
init(semAly, hCfg, inputRR, unparseT); windowingSpec = wdwSpec; ptfDesc = new PTFDesc(); ptfDesc.setCfg(hCfg); ptfDesc.setLlInfo(llInfo); WindowTableFunctionDef wdwTFnDef = new WindowTableFunctionDef(); ptfDesc.setFuncDef(wdwTFnDef);
@Override protected void initializeOp(Configuration jobConf) throws HiveException { super.initializeOp(jobConf); hiveConf = jobConf; isMapOperator = conf.isMapSide(); currentKeys = null; reconstructQueryDef(hiveConf); if (isMapOperator) { PartitionedTableFunctionDef tDef = conf.getStartOfChain(); outputObjInspector = tDef.getRawInputShape().getOI(); } else { outputObjInspector = conf.getFuncDef().getOutputShape().getOI(); } setupKeysWrapper(inputObjInspectors[0]); ptfInvocation = setupChain(); ptfInvocation.initializeStreaming(jobConf, isMapOperator); firstMapRow = true; }
PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain(); ptfDesc.setMapSide(true); input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR); RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr(); input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()),
public PTFPartition execute(PTFPartition iPart) throws HiveException { if ( ptfDesc.isMapSide() ) { return transformRawInput(iPart); } PTFPartitionIterator<Object> pItr = iPart.iterator(); PTFOperator.connectLeadLagFunctionsToPartition(ptfDesc.getLlInfo(), pItr); if ( outputPartition == null ) { outputPartition = PTFPartition.create(ptfDesc.getCfg(), tableDef.getOutputShape().getSerde(), OI, tableDef.getOutputShape().getOI()); } else { outputPartition.reset(); } execute(pItr, outputPartition); return outputPartition; }