public LoadFileDesc(final LoadFileDesc o) { super(o.getSourcePath()); this.targetDir = o.targetDir; this.isDfsDir = o.isDfsDir; this.columns = o.columns; this.columnTypes = o.columnTypes; this.destinationCreateTable = o.destinationCreateTable; }
public LoadFileDesc(final LoadFileDesc o) { super(o.getSourcePath(), o.getWriteType()); this.targetDir = o.targetDir; this.isDfsDir = o.isDfsDir; this.columns = o.columns; this.columnTypes = o.columnTypes; this.isMmCtas = o.isMmCtas; this.ctasCreateTableDesc = o.ctasCreateTableDesc; this.createViewDesc = o.createViewDesc; }
public static Task<MoveWork> findMoveTaskForFsopOutput( List<Task<MoveWork>> mvTasks, Path fsopFinalDir, boolean isMmFsop) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); Path srcDir = null; boolean isLfd = false; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourcePath(); isLfd = true; if (isMmFsop) { srcDir = srcDir.getParent(); } } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourcePath(); } if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Observing MoveWork " + System.identityHashCode(mvWork) + " with " + srcDir + "(from " + (isLfd ? "LFD" : "LTD") + ") while looking for " + fsopFinalDir + "(mm = " + isMmFsop + ")"); } if ((srcDir != null) && srcDir.equals(fsopFinalDir)) { return mvTsk; } } return null; }
private void setLoadFileLocation( final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException { // CTAS; make the movetask's destination directory the table's destination. Long txnIdForCtas = null; int stmtId = 0; // CTAS cannot be part of multi-txn stmt FileSinkDesc dataSinkForCtas = null; String loc = null; if (pCtx.getQueryProperties().isCTAS()) { CreateTableDesc ctd = pCtx.getCreateTable(); dataSinkForCtas = ctd.getAndUnsetWriter(); txnIdForCtas = ctd.getInitialMmWriteId(); loc = ctd.getLocation(); } else { loc = pCtx.getCreateViewDesc().getLocation(); } Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc); if (txnIdForCtas != null) { dataSinkForCtas.setDirName(location); location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId)); lfd.setSourcePath(location); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location); } } if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath()); } lfd.setTargetDir(location); }
MoveTask mt = (MoveTask)ti.task; if (mt.getWork().getLoadFileWork() != null) { ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString();
fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
/** * Merges the given Conditional input path and the linked MoveWork into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames or copies that are not necessary. * * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @param lineageState A LineageState used to track what changes. * @return A new MoveWork that has the Conditional input path as source and the linkedMoveWork as target. */ @VisibleForTesting protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMoveWork, LineageState lineageState) { MoveWork newWork = new MoveWork(linkedMoveWork); LoadFileDesc fileDesc = null; LoadTableDesc tableDesc = null; if (linkedMoveWork.getLoadFileWork() != null) { fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork()); fileDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadFileWork().getSourcePath()); } else if (linkedMoveWork.getLoadTableWork() != null) { tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork()); tableDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadTableWork().getSourcePath()); } else { throw new IllegalArgumentException("Merging a path with a MoveWork with multi-files work is not allowed."); } newWork.setLoadFileWork(fileDesc); newWork.setLoadTableWork(tableDesc); return newWork; }
public static Task<MoveWork> findMoveTask( List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); Path srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourcePath(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourcePath(); } if ((srcDir != null) && (srcDir.equals(fsOp.getConf().getFinalDirName()))) { return mvTsk; } } return null; }
if (lfd != null) { Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); if (targetPath.equals(sourcePath)) { Utilities.FILE_OP_LOGGER.debug("MoveTask not moving " + sourcePath);
private void verifyMoveTask(Task<? extends Serializable> task, Path source, Path target) { MoveTask moveTask = (MoveTask)task; assertEquals(source, moveTask.getWork().getLoadFileWork().getSourcePath()); assertEquals(target, moveTask.getWork().getLoadFileWork().getTargetDir()); } }
fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
@Test public void testMergePathValidMoveWorkReturnsNewMoveWork() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); final LineageState lineageState = new LineageState(); MoveWork newWork; // test using loadFileWork when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath()); assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir()); // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( condOutputPath, tableDesc, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath()); assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc)); }
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); fetch.setHiveServerQuery(isHiveServerQuery);
if (lfd != null) { Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); fetch.setHiveServerQuery(isHiveServerQuery);
TableDesc resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat); fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
public static Task<MoveWork> findMoveTask( List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) { // find the move task for (Task<MoveWork> mvTsk : mvTasks) { MoveWork mvWork = mvTsk.getWork(); Path srcDir = null; if (mvWork.getLoadFileWork() != null) { srcDir = mvWork.getLoadFileWork().getSourcePath(); } else if (mvWork.getLoadTableWork() != null) { srcDir = mvWork.getLoadTableWork().getSourcePath(); } if ((srcDir != null) && (srcDir.equals(fsOp.getConf().getFinalDirName()))) { return mvTsk; } } return null; }
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); fetch.setSource(pCtx.getFetchSource()); fetch.setSink(pCtx.getFetchSink());