/** * Create a temporary directory depending of the path specified. * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS) * - If path is on HDFS, then create a staging directory inside the path * * @param path Path used to verify the Filesystem to use for temporary directory * @return A path to the new temporary directory */ public Path getTempDirForFinalJobPath(Path path) { return getExtTmpPathRelTo(path); }
/** * Get a path to store tmp data destined for external Path. * * @param path external Path to which the tmp data has to be eventually moved * @return next available tmp path on the file system corresponding extURI */ public Path getExternalTmpPath(Path path) { URI extURI = path.toUri(); if ("viewfs".equals(extURI.getScheme())) { // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir // on same namespace as tbl dir. return getExtTmpPathRelTo(path.getParent()); } return new Path(getExternalScratchDir(extURI), EXT_PREFIX + nextPathId()); }
/** * Get a path to store tmp data destined for external Path. * * @param path external Path to which the tmp data has to be eventually moved * @return next available tmp path on the file system corresponding extURI */ public Path getExternalTmpPath(Path path) { URI extURI = path.toUri(); if (extURI.getScheme().equals("viewfs")) { // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir // on same namespace as tbl dir. return getExtTmpPathRelTo(path.getParent()); } return new Path(getExternalScratchDir(extURI), EXT_PREFIX + nextPathId()); }
/** * Create a temporary directory depending of the path specified. * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS), unless isFinalJob and * {@link BlobStorageUtils#areOptimizationsEnabled(Configuration)} are both true, then return a path on * the blobstore. * - If path is on HDFS, then create a staging directory inside the path * * @param path Path used to verify the Filesystem to use for temporary directory * * @return A path to the new temporary directory */ public Path getTempDirForInterimJobPath(Path path) { // For better write performance, we use HDFS for temporary data when object store is used. // Note that the scratch directory configuration variable must use HDFS or any other // non-blobstorage system to take advantage of this performance. boolean isBlobstorageOptimized = BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(conf) && BlobStorageUtils.areOptimizationsEnabled(conf); if (isPathLocal(path) || isBlobstorageOptimized) { return getMRTmpPath(); } return getExtTmpPathRelTo(path); }
/** * Create a temporary directory depending of the path specified. * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS), unless isFinalJob and * {@link BlobStorageUtils#areOptimizationsEnabled(Configuration)} are both true, then return a path on * the blobstore. * - If path is on HDFS, then create a staging directory inside the path * * @param path Path used to verify the Filesystem to use for temporary directory * @param isFinalJob true if the required {@link Path} will be used for the final job (e.g. the final FSOP) * * @return A path to the new temporary directory */ public Path getTempDirForPath(Path path, boolean isFinalJob) { if (((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir( conf)) || isPathLocal(path))) { if (!(isFinalJob && BlobStorageUtils.areOptimizationsEnabled(conf))) { // For better write performance, we use HDFS for temporary data when object store is used. // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system // to take advantage of this performance. return getMRTmpPath(); } } return getExtTmpPathRelTo(path); }
@Test public void testGetScratchDirectoriesForPaths() throws IOException { Context spyContext = spy(context); // When Object store paths are used, then getMRTmpPatch() is called to get a temporary // directory on the default scratch diretory location (usually /temp) Path mrTmpPath = new Path("hdfs://hostname/tmp/scratch"); doReturn(mrTmpPath).when(spyContext).getMRTmpPath(); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir"))); // When local filesystem paths are used, then getMRTmpPatch() should be called to // get a temporary directory assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:/user"))); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user"))); // When Non-Object store paths are used, then getExtTmpPathRelTo is called to get a temporary // directory on the same path passed as a parameter Path tmpPathRelTo = new Path("hdfs://hostname/user"); doReturn(tmpPathRelTo).when(spyContext).getExtTmpPathRelTo(any(Path.class)); assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("/user"))); conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, false); assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir"))); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user"))); conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, true); } }
fname = FileUtils.makeQualified(location, conf).toString(); } else { fname = ctx.getExtTmpPathRelTo( FileUtils.makeQualified(location, conf)).toString();
fname = ctx.getExtTmpPathRelTo( FileUtils.makeQualified(location, conf)).toString(); } catch (Exception e) {
/** * Get a path to store tmp data destined for external Path. * * @param path external Path to which the tmp data has to be eventually moved * @return next available tmp path on the file system corresponding extURI */ public Path getExternalTmpPath(Path path) { URI extURI = path.toUri(); if (extURI.getScheme().equals("viewfs")) { // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/.. // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir // on same namespace as tbl dir. return getExtTmpPathRelTo(path.getParent()); } return new Path(getExternalScratchDir(extURI), EXT_PREFIX + nextPathId()); }
} else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { String statsTmpLoc = ctx.getExtTmpPathRelTo(tab.getPath()).toString(); LOG.info("Set stats collection dir : " + statsTmpLoc); conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
queryTmpdir = dest_path; } else { queryTmpdir = ctx.getExtTmpPathRelTo(dest_path); queryTmpdir = ctx.getExtTmpPathRelTo(qPath); } catch (Exception e) { throw new SemanticException("Error creating temporary folder on: " String statsTmpLoc = ctx.getExtTmpPathRelTo(queryTmpdir).toString(); LOG.info("Set stats collection dir : " + statsTmpLoc); conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc);
fname = ctx.getExtTmpPathRelTo( FileUtils.makeQualified(location, conf)).toString(); } catch (Exception e) {