public List<CompleteFileWork> generateFileWork(List<FileStatus> files, boolean blockify) throws IOException { List<TimedRunnable<List<CompleteFileWork>>> readers = Lists.newArrayList(); for(FileStatus status : files){ readers.add(new BlockMapReader(status, blockify)); } List<List<CompleteFileWork>> work = TimedRunnable.run("Get block maps", logger, readers, 16); List<CompleteFileWork> singleList = Lists.newArrayList(); for(List<CompleteFileWork> innerWorkList : work){ singleList.addAll(innerWorkList); } return singleList; }
/** * Get a list of file metadata for a list of parquet files * * @param fileStatuses * @return * @throws IOException */ private List<ParquetFileMetadata> getParquetFileMetadata(List<FileStatus> fileStatuses) throws IOException { List<TimedRunnable<ParquetFileMetadata>> gatherers = Lists.newArrayList(); for (FileStatus file : fileStatuses) { gatherers.add(new MetadataGatherer(file)); } List<ParquetFileMetadata> metaDataList = Lists.newArrayList(); metaDataList.addAll(TimedRunnable.run("Fetch parquet metadata", logger, gatherers, 16)); return metaDataList; }
private boolean hasFSPermission(String user, NamespaceKey key, List<FileSystemPartitionUpdateKey> updateKeys, HiveTableXattr tableXattr) { List<TimedRunnable<Boolean>> permissionCheckers = Lists.newArrayList(); for (FileSystemPartitionUpdateKey updateKey : updateKeys) { permissionCheckers.add(new FsTask(user, updateKey, tableXattr, TaskType.FS_PERMISSION)); } try { Stopwatch stopwatch = Stopwatch.createStarted(); final List<Boolean> accessPermissions = TimedRunnable.run("check access permission for " + key, logger, permissionCheckers, 16); stopwatch.stop(); logger.debug("Checking access permission for {} took {} ms", key, stopwatch.elapsed(TimeUnit.MILLISECONDS)); for (Boolean permission : accessPermissions) { if (!permission) { return false; } } } catch (IOException ioe) { throw UserException.dataReadError(ioe).build(logger); } return true; }
@Override public boolean hasAccessPermission(String user, NamespaceKey key, DatasetConfig datasetConfig) { if (config.isImpersonationEnabled()) { if (datasetConfig.getReadDefinition() != null) { // allow accessing partial datasets final FileSystemWrapper userFs = getFS(user); final List<TimedRunnable<Boolean>> permissionCheckTasks = Lists.newArrayList(); permissionCheckTasks.addAll(getUpdateKeyPermissionTasks(datasetConfig, userFs)); permissionCheckTasks.addAll(getSplitPermissiomTasks(datasetConfig, userFs, user)); try { Stopwatch stopwatch = Stopwatch.createStarted(); final List<Boolean> accessPermissions = TimedRunnable.run("check access permission for " + key, logger, permissionCheckTasks, 16); stopwatch.stop(); logger.debug("Checking access permission for {} took {} ms", key, stopwatch.elapsed(TimeUnit.MILLISECONDS)); for (Boolean permission : accessPermissions) { if (!permission) { return false; } } } catch (IOException ioe) { throw UserException.dataReadError(ioe).build(logger); } } } return true; }
@Test public void withoutAnyTasksTriggeringTimeout() throws Exception { List<TimedRunnable<Void>> tasks = Lists.newArrayList(); for(int i=0; i<100; i++){ tasks.add(new TestTask(2000)); } TimedRunnable.run("Execution without triggering timeout", logger, tasks, 16); }
@Test public void withManyTasks() throws Exception { List<TimedRunnable<Void>> tasks = Lists.newArrayList(); for (int i = 0; i < 150000; i++) { tasks.add(new TestTask(0)); } TimedRunnable.run("Execution with lots of tasks", logger, tasks, 16); } }
if(runnables.size() == 1){ parallelism = 1; runnables.get(0).run(); }else{ parallelism = Math.min(parallelism, runnables.size());
@Test public void withTasksExceedingTimeout() throws Exception { UserException ex = null; try { List<TimedRunnable<Void>> tasks = Lists.newArrayList(); for (int i = 0; i < 100; i++) { if ((i & (i + 1)) == 0) { tasks.add(new TestTask(2000)); } else { tasks.add(new TestTask(20000)); } } TimedRunnable.run("Execution with some tasks triggering timeout", logger, tasks, 16); } catch (UserException e) { ex = e; } assertNotNull("Expected a UserException", ex); assertThat(ex.getMessage(), containsString("Waited for 93750ms, but tasks for 'Execution with some tasks triggering timeout' are not " + "complete. Total runnable size 100, parallelism 16.")); }
final List<Boolean> validations = TimedRunnable.run("check read signature for " + key, logger, signatureValidators, 16); stopwatch.stop(); logger.debug("Checking read signature for {} took {} ms", key, stopwatch.elapsed(TimeUnit.MILLISECONDS));
final List<HiveSplitWork> hiveSplitWorks = TimedRunnable.run("Get splits for hive table " + tableName, logger, splitsGenerators, HIVE_SPLITS_GENERATOR_PARALLELISM); for (HiveSplitWork splitWork : hiveSplitWorks) { splits.addAll(splitWork.getSplits());