private void initializeLogDirs(FileContext lfs) { List<String> logDirs = dirsHandler.getLogDirs(); for (String logDir : logDirs) { initializeLogDir(lfs, logDir); } }
private void initializeLogDirs(FileContext lfs) { List<String> logDirs = dirsHandler.getLogDirs(); for (String logDir : logDirs) { initializeLogDir(lfs, logDir); } }
private void initializeLogDirs(FileContext lfs) { List<String> logDirs = dirsHandler.getLogDirs(); for (String logDir : logDirs) { initializeLogDir(lfs, logDir); } }
private void initializeLogDirs(FileContext lfs) { List<String> logDirs = dirsHandler.getLogDirs(); for (String logDir : logDirs) { initializeLogDir(lfs, logDir); } }
/** * Synchronized method to get a list of initialized log dirs. Method will * check each local dir to ensure it has been setup correctly and will attempt * to fix any issues it finds. * * @return list of initialized log dirs */ synchronized private List<String> getInitializedLogDirs() { List<String> dirs = dirsHandler.getLogDirs(); initializeLogDirs(lfs); return dirs; } }
/** * Synchronized method to get a list of initialized log dirs. Method will * check each local dir to ensure it has been setup correctly and will attempt * to fix any issues it finds. * * @return list of initialized log dirs */ synchronized private List<String> getInitializedLogDirs() { List<String> dirs = dirsHandler.getLogDirs(); initializeLogDirs(lfs); return dirs; } }
public boolean isGoodLogDir(String path) { return isInGoodDirs(getLogDirs(), path); }
/** * Set good local dirs and good log dirs in the configuration so that the * LocalDirAllocator objects will use this updated configuration only. */ private void updateDirsAfterTest() { Configuration conf = getConfig(); List<String> localDirs = getLocalDirs(); conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, localDirs.toArray(new String[localDirs.size()])); List<String> logDirs = getLogDirs(); conf.setStrings(YarnConfiguration.NM_LOG_DIRS, logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. LOG.error("Most of the disks failed. " + getDisksHealthReport(false)); } }
/** * Set good local dirs and good log dirs in the configuration so that the * LocalDirAllocator objects will use this updated configuration only. */ private void updateDirsAfterTest() { Configuration conf = getConfig(); List<String> localDirs = getLocalDirs(); conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, localDirs.toArray(new String[localDirs.size()])); List<String> logDirs = getLogDirs(); conf.setStrings(YarnConfiguration.NM_LOG_DIRS, logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. LOG.error("Most of the disks failed. " + getDisksHealthReport(false)); } }
/** * Set good local dirs and good log dirs in the configuration so that the * LocalDirAllocator objects will use this updated configuration only. */ private void updateDirsAfterTest() { Configuration conf = getConfig(); List<String> localDirs = getLocalDirs(); conf.setStrings(NM_GOOD_LOCAL_DIRS, localDirs.toArray(new String[localDirs.size()])); List<String> logDirs = getLogDirs(); conf.setStrings(NM_GOOD_LOG_DIRS, logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. LOG.error("Most of the disks failed. " + getDisksHealthReport(false)); } }
/** * Set good local dirs and good log dirs in the configuration so that the * LocalDirAllocator objects will use this updated configuration only. */ private void updateDirsAfterTest() { Configuration conf = getConfig(); List<String> localDirs = getLocalDirs(); conf.setStrings(NM_GOOD_LOCAL_DIRS, localDirs.toArray(new String[localDirs.size()])); List<String> logDirs = getLogDirs(); conf.setStrings(NM_GOOD_LOG_DIRS, logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. LOG.error("Most of the disks failed. " + getDisksHealthReport(false)); } }
/** * The minimum fraction of number of disks needed to be healthy for a node to * be considered healthy in terms of disks is configured using * {@link YarnConfiguration#NM_MIN_HEALTHY_DISKS_FRACTION}, with a default * value of {@link YarnConfiguration#DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION}. * @return <em>false</em> if either (a) more than the allowed percentage of * nm-local-dirs failed or (b) more than the allowed percentage of * nm-log-dirs failed. */ public boolean areDisksHealthy() { if (!isDiskHealthCheckerEnabled) { return true; } int goodDirs = getLocalDirs().size(); int failedDirs = localDirs.getFailedDirs().size(); int totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy local-dirs } goodDirs = getLogDirs().size(); failedDirs = logDirs.getFailedDirs().size(); totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy log-dirs } return true; }
/** * The minimum fraction of number of disks needed to be healthy for a node to * be considered healthy in terms of disks is configured using * {@link YarnConfiguration#NM_MIN_HEALTHY_DISKS_FRACTION}, with a default * value of {@link YarnConfiguration#DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION}. * @return <em>false</em> if either (a) more than the allowed percentage of * nm-local-dirs failed or (b) more than the allowed percentage of * nm-log-dirs failed. */ public boolean areDisksHealthy() { if (!isDiskHealthCheckerEnabled) { return true; } int goodDirs = getLocalDirs().size(); int failedDirs = localDirs.getFailedDirs().size(); int totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy local-dirs } goodDirs = getLogDirs().size(); failedDirs = logDirs.getFailedDirs().size(); totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy log-dirs } return true; }
/** * The minimum fraction of number of disks needed to be healthy for a node to * be considered healthy in terms of disks is configured using * {@link YarnConfiguration#NM_MIN_HEALTHY_DISKS_FRACTION}, with a default * value of {@link YarnConfiguration#DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION}. * @return <em>false</em> if either (a) more than the allowed percentage of * nm-local-dirs failed or (b) more than the allowed percentage of * nm-log-dirs failed. */ public boolean areDisksHealthy() { if (!isDiskHealthCheckerEnabled) { return true; } int goodDirs = getLocalDirs().size(); int failedDirs = localDirs.getFailedDirs().size(); int totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy local-dirs } goodDirs = getLogDirs().size(); failedDirs = logDirs.getFailedDirs().size(); totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy log-dirs } return true; }
/** * The minimum fraction of number of disks needed to be healthy for a node to * be considered healthy in terms of disks is configured using * {@link YarnConfiguration#NM_MIN_HEALTHY_DISKS_FRACTION}, with a default * value of {@link YarnConfiguration#DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION}. * @return <em>false</em> if either (a) more than the allowed percentage of * nm-local-dirs failed or (b) more than the allowed percentage of * nm-log-dirs failed. */ public boolean areDisksHealthy() { if (!isDiskHealthCheckerEnabled) { return true; } int goodDirs = getLocalDirs().size(); int failedDirs = localDirs.getFailedDirs().size(); int totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy local-dirs } goodDirs = getLogDirs().size(); failedDirs = logDirs.getFailedDirs().size(); totalConfiguredDirs = goodDirs + failedDirs; if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) { return false; // Not enough healthy log-dirs } return true; }
/** * Make a local and log directory inaccessible during initialization * and verify those bad directories are recognized and removed from * the list of available local and log directories. * @throws IOException */ @Test public void testDirFailuresOnStartup() throws IOException { Configuration conf = new YarnConfiguration(); String localDir1 = new File(testDir, "localDir1").getPath(); String localDir2 = new File(testDir, "localDir2").getPath(); String logDir1 = new File(testDir, "logDir1").getPath(); String logDir2 = new File(testDir, "logDir2").getPath(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2); conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2); prepareDirToFail(localDir1); prepareDirToFail(logDir2); LocalDirsHandlerService dirSvc = new LocalDirsHandlerService(); dirSvc.init(conf); List<String> localDirs = dirSvc.getLocalDirs(); Assert.assertEquals(1, localDirs.size()); Assert.assertEquals(new Path(localDir2).toString(), localDirs.get(0)); List<String> logDirs = dirSvc.getLogDirs(); Assert.assertEquals(1, logDirs.size()); Assert.assertEquals(new Path(logDir1).toString(), logDirs.get(0)); }
/** * Make a local and log directory inaccessible during initialization * and verify those bad directories are recognized and removed from * the list of available local and log directories. * @throws IOException */ @Test public void testDirFailuresOnStartup() throws IOException { Configuration conf = new YarnConfiguration(); String localDir1 = new File(testDir, "localDir1").getPath(); String localDir2 = new File(testDir, "localDir2").getPath(); String logDir1 = new File(testDir, "logDir1").getPath(); String logDir2 = new File(testDir, "logDir2").getPath(); conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2); conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2); prepareDirToFail(localDir1); prepareDirToFail(logDir2); LocalDirsHandlerService dirSvc = new LocalDirsHandlerService(); dirSvc.init(conf); List<String> localDirs = dirSvc.getLocalDirs(); Assert.assertEquals(1, localDirs.size()); Assert.assertEquals(new Path(localDir2).toString(), localDirs.get(0)); List<String> logDirs = dirSvc.getLogDirs(); Assert.assertEquals(1, logDirs.size()); Assert.assertEquals(new Path(logDir1).toString(), logDirs.get(0)); }
@Override public synchronized void startLocalizer(Path nmPrivateContainerTokensPath, InetSocketAddress nmAddr, String user, String appId, String locId, LocalDirsHandlerService dirsHandler) throws IOException, InterruptedException { List<String> localDirs = dirsHandler.getLocalDirs(); List<String> logDirs = dirsHandler.getLogDirs(); ContainerLocalizer localizer = new ContainerLocalizer(lfs, user, appId, locId, getPaths(localDirs), RecordFactoryProvider.getRecordFactory(getConf())); createUserLocalDirs(localDirs, user); createUserCacheDirs(localDirs, user); createAppDirs(localDirs, user, appId); createAppLogDirs(appId, logDirs, user); // randomly choose the local directory Path appStorageDir = getWorkingDir(localDirs, user, appId); String tokenFn = String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, locId); Path tokenDst = new Path(appStorageDir, tokenFn); copyFile(nmPrivateContainerTokensPath, tokenDst, user); LOG.info("Copying from " + nmPrivateContainerTokensPath + " to " + tokenDst); lfs.setWorkingDirectory(appStorageDir); LOG.info("CWD set to " + appStorageDir + " = " + lfs.getWorkingDirectory()); // TODO: DO it over RPC for maintaining similarity? localizer.runLocalization(nmAddr); }
@Override public synchronized void startLocalizer(Path nmPrivateContainerTokensPath, InetSocketAddress nmAddr, String user, String appId, String locId, LocalDirsHandlerService dirsHandler) throws IOException, InterruptedException { List<String> localDirs = dirsHandler.getLocalDirs(); List<String> logDirs = dirsHandler.getLogDirs(); ContainerLocalizer localizer = new ContainerLocalizer(lfs, user, appId, locId, getPaths(localDirs), RecordFactoryProvider.getRecordFactory(getConf())); createUserLocalDirs(localDirs, user); createUserCacheDirs(localDirs, user); createAppDirs(localDirs, user, appId); createAppLogDirs(appId, logDirs, user); // randomly choose the local directory Path appStorageDir = getWorkingDir(localDirs, user, appId); String tokenFn = String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, locId); Path tokenDst = new Path(appStorageDir, tokenFn); copyFile(nmPrivateContainerTokensPath, tokenDst, user); LOG.info("Copying from " + nmPrivateContainerTokensPath + " to " + tokenDst); lfs.setWorkingDirectory(appStorageDir); LOG.info("CWD set to " + appStorageDir + " = " + lfs.getWorkingDirectory()); // TODO: DO it over RPC for maintaining similarity? localizer.runLocalization(nmAddr); }
@Override public void startLocalizer(Path nmPrivateContainerTokensPath, InetSocketAddress nmAddr, String user, String appId, String locId, LocalDirsHandlerService dirsHandler) throws IOException, InterruptedException { List<String> localDirs = dirsHandler.getLocalDirs(); List<String> logDirs = dirsHandler.getLogDirs(); createUserLocalDirs(localDirs, user); createUserCacheDirs(localDirs, user); createAppDirs(localDirs, user, appId); createAppLogDirs(appId, logDirs, user); // randomly choose the local directory Path appStorageDir = getWorkingDir(localDirs, user, appId); String tokenFn = String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT, locId); Path tokenDst = new Path(appStorageDir, tokenFn); copyFile(nmPrivateContainerTokensPath, tokenDst, user); LOG.info("Copying from " + nmPrivateContainerTokensPath + " to " + tokenDst); FileContext localizerFc = FileContext.getFileContext( lfs.getDefaultFileSystem(), getConf()); localizerFc.setUMask(lfs.getUMask()); localizerFc.setWorkingDirectory(appStorageDir); LOG.info("Localizer CWD set to " + appStorageDir + " = " + localizerFc.getWorkingDirectory()); ContainerLocalizer localizer = new ContainerLocalizer(localizerFc, user, appId, locId, getPaths(localDirs), RecordFactoryProvider.getRecordFactory(getConf())); // TODO: DO it over RPC for maintaining similarity? localizer.runLocalization(nmAddr); }