private static <E> boolean usePerTaskAttemptDatasets(View<E> target, Configuration conf) { // For performance reasons we should skip the intermediate task attempt and job output datasets if the // file system does not support efficient renaming (such as S3), and write to the target dataset directly. if (!FileSystemUtil.supportsRename(URI.create(target.getUri().getSchemeSpecificPart()), conf)) { return false; } // new API output committers are not called properly in Hadoop 1 return !Hadoop.isHadoop1() && target.getDataset() instanceof Mergeable; }
@Test public void testSupportsRenameConfigNotSet() { Assert.assertFalse("Should default to false for S3A", FileSystemUtil.supportsRename(URI.create("s3a://bucket/path"), new Configuration())); Assert.assertFalse("Should default to false for S3N", FileSystemUtil.supportsRename(URI.create("s3n://bucket/path"), new Configuration())); Assert.assertTrue("Should default to true for HDFS", FileSystemUtil.supportsRename(URI.create("hdfs://cluster/path"), new Configuration())); Assert.assertTrue("Should default to true for FILE", FileSystemUtil.supportsRename(URI.create("file:///path"), new Configuration())); }
private FileSystemWriter(FileSystem fs, Path path, long rollIntervalMillis, long targetFileSize, DatasetDescriptor descriptor, Schema writerSchema) { Preconditions.checkNotNull(fs, "File system is not defined"); Preconditions.checkNotNull(path, "Destination directory is not defined"); Preconditions.checkNotNull(descriptor, "Descriptor is not defined"); this.fs = fs; this.directory = path; this.rollIntervalMillis = rollIntervalMillis; this.targetFileSize = targetFileSize; this.descriptor = descriptor; this.conf = new Configuration(fs.getConf()); this.state = ReaderWriterState.NEW; this.schema = writerSchema; // copy file format settings from custom properties to the Configuration for (String prop : descriptor.listProperties()) { conf.set(prop, descriptor.getProperty(prop)); } // For performance reasons we will skip temp file creation if the file system does not support // efficient renaming, and write the file directly. this.useTempPath = FileSystemUtil.supportsRename(fs.getUri(), conf); }
@Test public void testSupportsRenameConfigFalse() { Configuration conf = new Configuration(); conf.setBoolean(FileSystemProperties.SUPPORTS_RENAME_PROP, false); Assert.assertFalse("Should override via config to false for HDFS", FileSystemUtil.supportsRename(URI.create("hdfs://cluster/path"), conf)); Assert.assertFalse("Should override via config to false for FILE", FileSystemUtil.supportsRename(URI.create("file:///path"), conf)); }
@Test public void testSupportsRenameConfigTrue() { Configuration conf = new Configuration(); conf.setBoolean(FileSystemProperties.SUPPORTS_RENAME_PROP, true); Assert.assertTrue("Should override via config to true for S3A", FileSystemUtil.supportsRename(URI.create("s3a://bucket/path"), conf)); Assert.assertTrue("Should override via config true for S3N", FileSystemUtil.supportsRename(URI.create("s3n://bucket/path"), conf)); }