private Task<MoveWork> createMoveTask(Path source, Path destination) { Task<MoveWork> moveTask = mock(MoveTask.class); MoveWork moveWork = new MoveWork(); moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null, false)); when(moveTask.getWork()).thenReturn(moveWork); return moveTask; }
Path targetDir = lfd.getTargetDir(); List<Path> targetDirs = new ArrayList<Path>(toMove.size()); targetDirs, lfd.getIsDfsDir(), lfd.getColumns(), lfd.getColumnTypes()); mvWork.setLoadFileWork(null); mvWork.setLoadTableWork(null);
boolean isTblLevel = analyzeRewrite.isTblLvl(); String cols = loadFileWork.get(0).getColumns(); String colTypes = loadFileWork.get(0).getColumnTypes(); fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
public LoadFileDesc(final LoadFileDesc o) { super(o.getSourcePath(), o.getWriteType()); this.targetDir = o.targetDir; this.isDfsDir = o.isDfsDir; this.columns = o.columns; this.columnTypes = o.columnTypes; this.isMmCtas = o.isMmCtas; this.ctasCreateTableDesc = o.ctasCreateTableDesc; this.createViewDesc = o.createViewDesc; }
private void setLoadFileLocation( final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException { // CTAS; make the movetask's destination directory the table's destination. Long txnIdForCtas = null; int stmtId = 0; // CTAS cannot be part of multi-txn stmt FileSinkDesc dataSinkForCtas = null; String loc = null; if (pCtx.getQueryProperties().isCTAS()) { CreateTableDesc ctd = pCtx.getCreateTable(); dataSinkForCtas = ctd.getAndUnsetWriter(); txnIdForCtas = ctd.getInitialMmWriteId(); loc = ctd.getLocation(); } else { loc = pCtx.getCreateViewDesc().getLocation(); } Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc); if (txnIdForCtas != null) { dataSinkForCtas.setDirName(location); location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId)); lfd.setSourcePath(location); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location); } } if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath()); } lfd.setTargetDir(location); }
String cols = loadFileDesc.getColumns(); String colTypes = loadFileDesc.getColumnTypes(); FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); fetch.setHiveServerQuery(isHiveServerQuery); location = new Path(loc); lfd.setTargetDir(location);
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
private void verifyMoveTask(Task<? extends Serializable> task, Path source, Path target) { MoveTask moveTask = (MoveTask)task; assertEquals(source, moveTask.getWork().getLoadFileWork().getSourcePath()); assertEquals(target, moveTask.getWork().getLoadFileWork().getTargetDir()); } }
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); if (targetPath.equals(sourcePath)) { Utilities.FILE_OP_LOGGER.debug("MoveTask not moving " + sourcePath); } else { Utilities.FILE_OP_LOGGER.debug("MoveTask moving " + sourcePath + " to " + targetPath); if(lfd.getWriteType() == AcidUtils.Operation.INSERT) { assert lfd.getIsDfsDir(); FileSystem srcFs = sourcePath.getFileSystem(conf); FileStatus[] srcs = srcFs.globStatus(sourcePath); moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
@Test public void testMergePathValidMoveWorkReturnsNewMoveWork() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); final LineageState lineageState = new LineageState(); MoveWork newWork; // test using loadFileWork when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath()); assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir()); // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( condOutputPath, tableDesc, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath()); assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc)); }
throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg()); String cols = loadFileWork.get(0).getColumns(); String colTypes = loadFileWork.get(0).getColumnTypes(); fetch = new FetchWork(new Path(loadFileWork.get(0).getSourceDir()).toString(), resultTab, qb.getParseInfo().getOuterQueryLimit()); lfd.setTargetDir(location);
/** * Merges the given Conditional input path and the linked MoveWork into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames or copies that are not necessary. * * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @param lineageState A LineageState used to track what changes. * @return A new MoveWork that has the Conditional input path as source and the linkedMoveWork as target. */ @VisibleForTesting protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMoveWork, LineageState lineageState) { MoveWork newWork = new MoveWork(linkedMoveWork); LoadFileDesc fileDesc = null; LoadTableDesc tableDesc = null; if (linkedMoveWork.getLoadFileWork() != null) { fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork()); fileDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadFileWork().getSourcePath()); } else if (linkedMoveWork.getLoadTableWork() != null) { tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork()); tableDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadTableWork().getSourcePath()); } else { throw new IllegalArgumentException("Merging a path with a MoveWork with multi-files work is not allowed."); } newWork.setLoadFileWork(fileDesc); newWork.setLoadTableWork(tableDesc); return newWork; }
Path targetPath = new Path(lfd.getTargetDir()); Path sourcePath = new Path(lfd.getSourceDir()); moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
public LoadFileDesc(final LoadFileDesc o) { super(o.getSourcePath()); this.targetDir = o.targetDir; this.isDfsDir = o.isDfsDir; this.columns = o.columns; this.columnTypes = o.columnTypes; this.destinationCreateTable = o.destinationCreateTable; }
public boolean isLocal() { LoadTableDesc tbd = work.getLoadTableWork(); if (tbd != null) { return false; } LoadFileDesc lfd = work.getLoadFileWork(); if (lfd != null) { if (lfd.getIsDfsDir()) { return false; } else { return true; } } return false; }
/** * Merges the given Conditional input path and the linked MoveWork into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames or copies that are not necessary. * * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return A new MoveWork that has the Conditional input path as source and the linkedMoveWork as target. */ @VisibleForTesting protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMoveWork) { MoveWork newWork = new MoveWork(linkedMoveWork); LoadFileDesc fileDesc = null; LoadTableDesc tableDesc = null; if (linkedMoveWork.getLoadFileWork() != null) { fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork()); fileDesc.setSourcePath(condInputPath); } else if (linkedMoveWork.getLoadTableWork() != null) { tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork()); tableDesc.setSourcePath(condInputPath); } else { throw new IllegalArgumentException("Merging a path with a MoveWork with multi-files work is not allowed."); } newWork.setLoadFileWork(fileDesc); newWork.setLoadTableWork(tableDesc); return newWork; }
tableName = work.getTableSpecs().tableName; } else { tableName = work.getLoadFileDesc().getDestinationCreateTable();
String cols = loadFileDesc.getColumns(); String colTypes = loadFileDesc.getColumnTypes(); FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); fetch.setSource(pCtx.getFetchSource()); fetch.setSink(pCtx.getFetchSink()); location = new Path(loc); lfd.setTargetDir(location);
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); path = ((MoveTask)task).getWork().getLoadFileWork().getSourcePath().toUri().toString();
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }