public synchronized void removePath(Path remoteFile) { if (paths.containsKey(remoteFile)) { URL fileURL = null; try { fileURL = new URL(baseURL, remoteFile.toString()); } catch (MalformedURLException e) { throw new RuntimeException(e); } router.removePathPattern(fileURL.getPath()); } }
public VirtualFileServerHandler(Path path) throws IOException { this.path = path; if (!path.isAbsolute()) { throw new IllegalArgumentException("path must be absolute: " + path.toString()); } this.fs = path.getFileSystem(); if (!fs.exists(path) || fs.getFileStatus(path).isDir()) { throw new IllegalArgumentException("no such file: " + path.toString()); } }
private static void internalCopyFile(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException { try (FSDataOutputStream lfsOutput = tFS.create(targetPath, FileSystem.WriteMode.NO_OVERWRITE); FSDataInputStream fsInput = sFS.open(sourcePath)) { IOUtils.copyBytes(fsInput, lfsOutput); //noinspection ResultOfMethodCallIgnored new File(targetPath.toString()).setExecutable(executable); } }
/** * Adds a path to the artifact server. * @param path the qualified FS path to serve (local, hdfs, etc). * @param remoteFile the remote path with which to locate the file. * @return the fully-qualified remote path to the file. * @throws MalformedURLException if the remote path is invalid. */ public synchronized URL addPath(Path path, Path remoteFile) throws IOException, MalformedURLException { if (paths.containsKey(remoteFile)) { throw new IllegalArgumentException("duplicate path registered"); } if (remoteFile.isAbsolute()) { throw new IllegalArgumentException("not expecting an absolute path"); } URL fileURL = new URL(baseURL, remoteFile.toString()); router.addAny(fileURL.getPath(), new VirtualFileServerHandler(path)); paths.put(remoteFile, fileURL); return fileURL; }
@Before public void initConfig() { hadoopConfig = new org.apache.hadoop.conf.Configuration(); hadoopConfig.set(org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY, hdfsRootPath.toString()); }
@Before public void initConfig() { hadoopConfig = new org.apache.hadoop.conf.Configuration(); hadoopConfig.set(org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY, hdfsRootPath.toString()); }
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
@Override public void flatMap(FileCopyTask task, Collector<Object> out) throws Exception { LOGGER.info("Processing task: " + task); Path outPath = new Path(targetPath, task.getRelativePath()); FileSystem targetFs = targetPath.getFileSystem(); // creating parent folders in case of a local FS if (!targetFs.isDistributedFS()) { //dealing with cases like file:///tmp or just /tmp File outFile = outPath.toUri().isAbsolute() ? new File(outPath.toUri()) : new File(outPath.toString()); File parentFile = outFile.getParentFile(); if (!parentFile.mkdirs() && !parentFile.exists()) { throw new RuntimeException("Cannot create local file system directories: " + parentFile); } } FSDataOutputStream outputStream = null; FSDataInputStream inputStream = null; try { outputStream = targetFs.create(outPath, FileSystem.WriteMode.OVERWRITE); inputStream = task.getPath().getFileSystem().open(task.getPath()); int bytes = IOUtils.copy(inputStream, outputStream); bytesCounter.add(bytes); } finally { IOUtils.closeQuietly(inputStream); IOUtils.closeQuietly(outputStream); } fileCounter.add(1L); } });
} else { if (logExcludedFiles && LOG.isDebugEnabled()) { LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded."); } else { if (logExcludedFiles && LOG.isDebugEnabled()) { LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
public ContinuousFileMonitoringFunction( FileInputFormat<OUT> format, FileProcessingMode watchType, int readerParallelism, long interval) { Preconditions.checkArgument( watchType == FileProcessingMode.PROCESS_ONCE || interval >= MIN_MONITORING_INTERVAL, "The specified monitoring interval (" + interval + " ms) is smaller than the minimum " + "allowed one (" + MIN_MONITORING_INTERVAL + " ms)." ); Preconditions.checkArgument( format.getFilePaths().length == 1, "FileInputFormats with multiple paths are not supported yet."); this.format = Preconditions.checkNotNull(format, "Unspecified File Input Format."); this.path = Preconditions.checkNotNull(format.getFilePaths()[0].toString(), "Unspecified Path."); this.interval = interval; this.watchType = watchType; this.readerParallelism = Math.max(readerParallelism, 1); this.globalModificationTime = Long.MIN_VALUE; }
public static void addUserArtifactEntries(Collection<Tuple2<String, DistributedCache.DistributedCacheEntry>> userArtifacts, JobGraph jobGraph) { if (!userArtifacts.isEmpty()) { try { java.nio.file.Path tmpDir = Files.createTempDirectory("flink-distributed-cache-" + jobGraph.getJobID()); for (Tuple2<String, DistributedCache.DistributedCacheEntry> originalEntry : userArtifacts) { Path filePath = new Path(originalEntry.f1.filePath); boolean isLocalDir = false; try { FileSystem sourceFs = filePath.getFileSystem(); isLocalDir = !sourceFs.isDistributedFS() && sourceFs.getFileStatus(filePath).isDir(); } catch (IOException ioe) { LOG.warn("Could not determine whether {} denotes a local path.", filePath, ioe); } // zip local directories because we only support file uploads DistributedCache.DistributedCacheEntry entry; if (isLocalDir) { Path zip = FileUtils.compressDirectory(filePath, new Path(tmpDir.toString(), filePath.getName() + ".zip")); entry = new DistributedCache.DistributedCacheEntry(zip.toString(), originalEntry.f1.isExecutable, true); } else { entry = new DistributedCache.DistributedCacheEntry(filePath.toString(), originalEntry.f1.isExecutable, false); } jobGraph.addUserArtifact(originalEntry.f0, entry); } } catch (IOException ioe) { throw new FlinkRuntimeException("Could not compress distributed-cache artifacts.", ioe); } } }
/** * Construct a Mesos URI. */ public static Protos.CommandInfo.URI uri(MesosArtifactResolver resolver, ContainerSpecification.Artifact artifact) { checkNotNull(resolver); checkNotNull(artifact); Option<URL> url = resolver.resolve(artifact.dest); if (url.isEmpty()) { throw new IllegalArgumentException("Unresolvable artifact: " + artifact.dest); } return Protos.CommandInfo.URI.newBuilder() .setValue(url.get().toExternalForm()) .setOutputFile(artifact.dest.toString()) .setExtract(artifact.extract) .setCache(artifact.cachable) .setExecutable(artifact.executable) .build(); }
@VisibleForTesting void serializeV1(BucketState<BucketID> state, DataOutputView out) throws IOException { SimpleVersionedSerialization.writeVersionAndSerialize(bucketIdSerializer, state.getBucketId(), out); out.writeUTF(state.getBucketPath().toString()); out.writeLong(state.getInProgressFileCreationTime()); // put the current open part file if (state.hasInProgressResumableFile()) { final RecoverableWriter.ResumeRecoverable resumable = state.getInProgressResumableFile(); out.writeBoolean(true); SimpleVersionedSerialization.writeVersionAndSerialize(resumableSerializer, resumable, out); } else { out.writeBoolean(false); } // put the map of pending files per checkpoint final Map<Long, List<RecoverableWriter.CommitRecoverable>> pendingCommitters = state.getCommittableFilesPerCheckpoint(); // manually keep the version here to safe some bytes out.writeInt(commitableSerializer.getVersion()); out.writeInt(pendingCommitters.size()); for (Entry<Long, List<RecoverableWriter.CommitRecoverable>> resumablesForCheckpoint : pendingCommitters.entrySet()) { List<RecoverableWriter.CommitRecoverable> resumables = resumablesForCheckpoint.getValue(); out.writeLong(resumablesForCheckpoint.getKey()); out.writeInt(resumables.size()); for (RecoverableWriter.CommitRecoverable resumable : resumables) { byte[] serialized = commitableSerializer.serialize(resumable); out.writeInt(serialized.length); out.write(serialized); } } }
@Test public void testSetPath() { final DummyFileInputFormat format = new DummyFileInputFormat(); format.setFilePath("/some/imaginary/path"); Assert.assertEquals(format.getFilePath().toString(), "/some/imaginary/path"); }
@Test public void testCreateInputSplitSingleFile() throws IOException { String tempFile = TestFileUtils.createTempFile("Hello World"); FileInputFormat fif = new DummyFileInputFormat(); fif.setFilePath(tempFile); fif.configure(new Configuration()); FileInputSplit[] splits = fif.createInputSplits(2); Assert.assertEquals(2, splits.length); Assert.assertEquals(tempFile, splits[0].getPath().toString()); Assert.assertEquals(tempFile, splits[1].getPath().toString()); }
for (int i = 0; i < inputSplits.length; i++) { Assert.assertEquals(String.format("%d. split has block size length.", i), blockSize, inputSplits[i].getLength()); if (inputSplits[i].getPath().toString().equals(pathFile1)) { numSplitsFile1++; } else if (inputSplits[i].getPath().toString().equals(pathFile2)) { numSplitsFile2++; } else {
for (FileInputSplit fis : splits) { Assert.assertEquals(0, fis.getStart()); if (fis.getPath().toString().equals(tempFile1)) { numSplitsFile1++; Assert.assertEquals(21, fis.getLength()); } else if (fis.getPath().toString().equals(tempFile2)) { numSplitsFile2++; Assert.assertEquals(22, fis.getLength()); } else if (fis.getPath().toString().equals(tempFile3)) { numSplitsFile3++; Assert.assertEquals(23, fis.getLength());
@Test public void testSerializationOnlyInProgress() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final Path testBucket = new Path(testFolder.getPath(), "test"); final RecoverableWriter writer = fs.createRecoverableWriter(); final RecoverableFsDataOutputStream stream = writer.open(testBucket); stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8"))); final RecoverableWriter.ResumeRecoverable current = stream.persist(); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, current, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); final byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); // to simulate that everything is over for file. stream.close(); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); FileStatus[] statuses = fs.listStatus(testBucket.getParent()); Assert.assertEquals(1L, statuses.length); Assert.assertTrue( statuses[0].getPath().getPath().startsWith( (new Path(testBucket.getParent(), ".test.inprogress")).toString()) ); }