/** * Gets the local temporary file copy of a distributed cache files. * * @param name distributed cache file name * @return local temporary file copy of a distributed cache file. */ public File getCachedFile(String name) { return context.getDistributedCache().getFile(name); }
/** * Registers a file at the distributed cache under the given name. The file will be accessible * from any user-defined function in the (distributed) runtime under a local path. Files * may be local files (which will be distributed via BlobServer), or files in a distributed file system. * The runtime will copy the files temporarily to a local cache, if needed. * * <p>The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via * {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access * {@link org.apache.flink.api.common.cache.DistributedCache} via * {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}. * * @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path") * @param name The name under which the file is registered. * @param executable flag indicating whether the file should be executable */ public void registerCachedFile(String filePath, String name, boolean executable) { this.cacheFile.add(new Tuple2<>(name, new DistributedCache.DistributedCacheEntry(filePath, executable))); } }
public AbstractRuntimeUDFContext(TaskInfo taskInfo, ClassLoader userCodeClassLoader, ExecutionConfig executionConfig, Map<String, Accumulator<?, ?>> accumulators, Map<String, Future<Path>> cpTasks, MetricGroup metrics) { this.taskInfo = checkNotNull(taskInfo); this.userCodeClassLoader = userCodeClassLoader; this.executionConfig = executionConfig; this.distributedCache = new DistributedCache(checkNotNull(cpTasks)); this.accumulators = checkNotNull(accumulators); this.metrics = metrics; }
public void writeUserArtifactEntriesToConfiguration() { for (Map.Entry<String, DistributedCache.DistributedCacheEntry> userArtifact : userArtifacts.entrySet()) { DistributedCache.writeFileInfoToConfig( userArtifact.getKey(), userArtifact.getValue(), jobConfiguration ); } } }
/** * Registers a file at the distributed cache under the given name. The file will be accessible * from any user-defined function in the (distributed) runtime under a local path. Files * may be local files (which will be distributed via BlobServer), or files in a distributed file system. * The runtime will copy the files temporarily to a local cache, if needed. * * <p>The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via * {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access * {@link org.apache.flink.api.common.cache.DistributedCache} via * {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}. * * @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path") * @param name The name under which the file is registered. * @param executable flag indicating whether the file should be executable */ public void registerCachedFile(String filePath, String name, boolean executable){ this.cacheFile.add(new Tuple2<>(name, new DistributedCacheEntry(filePath, executable))); }
@Override public String map(Integer value) throws Exception { final Path testFile = getRuntimeContext().getDistributedCache().getFile("test_data").toPath(); final Path testDir = getRuntimeContext().getDistributedCache().getFile("test_dir").toPath(); if (testFile.toAbsolutePath().toString().equals(initialPath)) { throw new RuntimeException(String.format("Operator should access copy from cache rather than the " + "initial file. Input file path: %s. Cache file path: %s", initialPath, testFile)); } long testFileSize = Files.size(testFile); if (testFileSize != fileSize) { throw new RuntimeException(String.format("File size does not match. Expected:%s Actual:%s", fileSize, testFileSize)); } if (testDir.toAbsolutePath().toString().equals(initialDirPath)) { throw new RuntimeException(String.format("Operator should access copy from cache rather than the " + "initial dir. Input dir path: %s. Cache dir path: %s", initialDirPath, testDir)); } try (Stream<Path> files = Files.list(testDir)) { if (files.map(Path::getFileName).map(Path::toString).noneMatch(path -> path.equals(containedFileName))) { throw new RuntimeException(String.format("Cached directory %s should not be empty.", testDir)); } } return Files.readAllLines(testFile) .stream() .collect(Collectors.joining("\n")); } }
public void writeUserArtifactEntriesToConfiguration() { for (Map.Entry<String, DistributedCache.DistributedCacheEntry> userArtifact : userArtifacts.entrySet()) { DistributedCache.writeFileInfoToConfig( userArtifact.getKey(), userArtifact.getValue(), jobConfiguration ); } } }
public AbstractRuntimeUDFContext(TaskInfo taskInfo, ClassLoader userCodeClassLoader, ExecutionConfig executionConfig, Map<String, Accumulator<?, ?>> accumulators, Map<String, Future<Path>> cpTasks, MetricGroup metrics) { this.taskInfo = checkNotNull(taskInfo); this.userCodeClassLoader = userCodeClassLoader; this.executionConfig = executionConfig; this.distributedCache = new DistributedCache(checkNotNull(cpTasks)); this.accumulators = checkNotNull(accumulators); this.metrics = metrics; }
public static Set<Entry<String, DistributedCacheEntry>> readFileInfoFromConfig(Configuration conf) { int num = conf.getInteger(CACHE_FILE_NUM, 0); if (num == 0) { return Collections.emptySet(); } Map<String, DistributedCacheEntry> cacheFiles = new HashMap<String, DistributedCacheEntry>(); for (int i = 1; i <= num; i++) { String name = conf.getString(CACHE_FILE_NAME + i, null); String filePath = conf.getString(CACHE_FILE_PATH + i, null); boolean isExecutable = conf.getBoolean(CACHE_FILE_EXE + i, false); boolean isDirectory = conf.getBoolean(CACHE_FILE_DIR + i, false); byte[] blobKey = conf.getBytes(CACHE_FILE_BLOB_KEY + i, null); cacheFiles.put(name, new DistributedCacheEntry(filePath, isExecutable, blobKey, isDirectory)); } return cacheFiles.entrySet(); }
@Override public void open(Configuration conf) throws IOException { File file = getRuntimeContext().getDistributedCache().getFile("cache_test"); try (BufferedReader reader = new BufferedReader(new FileReader(file))) { String tempString; while ((tempString = reader.readLine()) != null) { wordList.add(tempString); } } }
public void writeUserArtifactEntriesToConfiguration() { for (Map.Entry<String, DistributedCache.DistributedCacheEntry> userArtifact : userArtifacts.entrySet()) { DistributedCache.writeFileInfoToConfig( userArtifact.getKey(), userArtifact.getValue(), jobConfiguration ); } } }
public AbstractRuntimeUDFContext(TaskInfo taskInfo, ClassLoader userCodeClassLoader, ExecutionConfig executionConfig, AbstractAccumulatorRegistry accumulatorRegistry, Map<String, Future<Path>> cpTasks, MetricGroup metrics) { this.taskInfo = checkNotNull(taskInfo); this.userCodeClassLoader = userCodeClassLoader; this.executionConfig = executionConfig; this.distributedCache = new DistributedCache(checkNotNull(cpTasks)); this.accumulatorRegistry = checkNotNull(accumulatorRegistry); this.metrics = metrics; }
public static void addUserArtifactEntries(Collection<Tuple2<String, DistributedCache.DistributedCacheEntry>> userArtifacts, JobGraph jobGraph) { if (!userArtifacts.isEmpty()) { try { java.nio.file.Path tmpDir = Files.createTempDirectory("flink-distributed-cache-" + jobGraph.getJobID()); for (Tuple2<String, DistributedCache.DistributedCacheEntry> originalEntry : userArtifacts) { Path filePath = new Path(originalEntry.f1.filePath); boolean isLocalDir = false; try { FileSystem sourceFs = filePath.getFileSystem(); isLocalDir = !sourceFs.isDistributedFS() && sourceFs.getFileStatus(filePath).isDir(); } catch (IOException ioe) { LOG.warn("Could not determine whether {} denotes a local path.", filePath, ioe); } // zip local directories because we only support file uploads DistributedCache.DistributedCacheEntry entry; if (isLocalDir) { Path zip = FileUtils.compressDirectory(filePath, new Path(tmpDir.toString(), filePath.getName() + ".zip")); entry = new DistributedCache.DistributedCacheEntry(zip.toString(), originalEntry.f1.isExecutable, true); } else { entry = new DistributedCache.DistributedCacheEntry(filePath.toString(), originalEntry.f1.isExecutable, false); } jobGraph.addUserArtifact(originalEntry.f0, entry); } } catch (IOException ioe) { throw new FlinkRuntimeException("Could not compress distributed-cache artifacts.", ioe); } } }
@Override public void open(Configuration parameters) throws Exception{ File file = getRuntimeContext().getDistributedCache().getFile(testName); BufferedReader reader = new BufferedReader(new FileReader(file)); String output = reader.readLine(); reader.close(); assertEquals(output, testString); }
@Test public void testArtifactCompression() throws IOException { Path plainFile1 = tmp.newFile("plainFile1").toPath(); Path plainFile2 = tmp.newFile("plainFile2").toPath(); Path directory1 = tmp.newFolder("directory1").toPath(); Files.createDirectory(directory1.resolve("containedFile1")); Path directory2 = tmp.newFolder("directory2").toPath(); Files.createDirectory(directory2.resolve("containedFile2")); JobGraph jb = new JobGraph(); final String executableFileName = "executableFile"; final String nonExecutableFileName = "nonExecutableFile"; final String executableDirName = "executableDir"; final String nonExecutableDirName = "nonExecutableDIr"; Collection<Tuple2<String, DistributedCache.DistributedCacheEntry>> originalArtifacts = Arrays.asList( Tuple2.of(executableFileName, new DistributedCache.DistributedCacheEntry(plainFile1.toString(), true)), Tuple2.of(nonExecutableFileName, new DistributedCache.DistributedCacheEntry(plainFile2.toString(), false)), Tuple2.of(executableDirName, new DistributedCache.DistributedCacheEntry(directory1.toString(), true)), Tuple2.of(nonExecutableDirName, new DistributedCache.DistributedCacheEntry(directory2.toString(), false)) ); JobGraphGenerator.addUserArtifactEntries(originalArtifacts, jb); Map<String, DistributedCache.DistributedCacheEntry> submittedArtifacts = jb.getUserArtifacts(); DistributedCache.DistributedCacheEntry executableFileEntry = submittedArtifacts.get(executableFileName); assertState(executableFileEntry, true, false); DistributedCache.DistributedCacheEntry nonExecutableFileEntry = submittedArtifacts.get(nonExecutableFileName); assertState(nonExecutableFileEntry, false, false); DistributedCache.DistributedCacheEntry executableDirEntry = submittedArtifacts.get(executableDirName); assertState(executableDirEntry, true, true); DistributedCache.DistributedCacheEntry nonExecutableDirEntry = submittedArtifacts.get(nonExecutableDirName); assertState(nonExecutableDirEntry, false, true); }
if (!jythonInitialized) { String path = context.getDistributedCache().getFile(PythonConstants.FLINK_PYTHON_DC_ID).getAbsolutePath();
public void setUserArtifactBlobKey(String entryName, PermanentBlobKey blobKey) throws IOException { byte[] serializedBlobKey; serializedBlobKey = InstantiationUtil.serializeObject(blobKey); userArtifacts.computeIfPresent(entryName, (key, originalEntry) -> new DistributedCache.DistributedCacheEntry( originalEntry.filePath, originalEntry.isExecutable, serializedBlobKey, originalEntry.isZipped )); }
receiver.open(outputFile); String path = function.getRuntimeContext().getDistributedCache().getFile(FLINK_PYTHON_DC_ID).getAbsolutePath();
public void setUserArtifactBlobKey(String entryName, PermanentBlobKey blobKey) throws IOException { byte[] serializedBlobKey; serializedBlobKey = InstantiationUtil.serializeObject(blobKey); userArtifacts.computeIfPresent(entryName, (key, originalEntry) -> new DistributedCache.DistributedCacheEntry( originalEntry.filePath, originalEntry.isExecutable, serializedBlobKey, originalEntry.isZipped )); }
/** * Gets the local temporary file copy of a distributed cache files. * * @param name distributed cache file name * @return local temporary file copy of a distributed cache file. */ @Override public File getCachedFile(String name) { return context.getDistributedCache().getFile(name); }