/** * It returns FileSystem based on fs.defaultFS property defined in conf. */ public static FileSystem getFs(final Configuration conf) throws IOException { return FileSystem.get((new HadoopConfiguration(conf)).getHadoopConf()); }
private Schema getSchemaFromPath(@NonNull final Path resultSchemaPath) throws IOException { final FileSystem fs = FileSystem.get(new HadoopConfiguration(this.conf).getHadoopConf()); final FSDataInputStream inputStream = fs.open(resultSchemaPath); final String schemaString = IOUtils.toString(inputStream, UTF_8); return new Schema.Parser().parse(schemaString); }
public FileSystem getFileSystem() throws IOException { return FileSystem.get(new HadoopConfiguration(this.conf).getHadoopConf()); }
public void loadYamlFile(@NonNull final File yamlFile, final Optional<String> scope) { try { final FileSystem localFs = FileSystem.getLocal( new HadoopConfiguration(new Configuration()).getHadoopConf()); final InputStream yamlInputStream = localFs.open(new Path(yamlFile.getPath())); loadYamlStream(yamlInputStream, scope); } catch (IOException e) { final String errorMsg = String .format("Error loading yaml config file %s", yamlFile.getAbsolutePath()); log.error(errorMsg, e); throw new JobRuntimeException(errorMsg, e); } }
/** * Creates JavaSparkContext if its hasn't been created yet, or returns the instance. {@link #addSchema(Schema)} and * {@link #addSchemas(Collection)} must not be called once the JavaSparkContext has been created * @return the JavaSparkContext that will be used to execute the JobDags */ public JavaSparkContext getOrCreateSparkContext() { if (!this.sparkContext.isPresent()) { this.sparkContext = Optional.of(new JavaSparkContext( SparkUtil.getSparkConf( this.appName, Optional.of(this.schemas), this.serializationClasses, this.conf))); this.sparkContext.get().sc().addSparkListener(new SparkEventListener()); // Adding hadoop configuration to default this.sparkContext.get().sc().hadoopConfiguration().addResource( new HadoopConfiguration(conf).getHadoopConf()); this.appId = this.sparkContext.get().sc().applicationId(); } return this.sparkContext.get(); }
try { final FileSystem fileSystem = dataFolder.getFileSystem(new HadoopConfiguration(this.conf.getConf()).getHadoopConf()); if (fileSystem.exists(dataFolder)) { fileSystem.delete(dataFolder, true); try { final FileSystem fileSystem = destPath.getFileSystem(new HadoopConfiguration(this.conf.getConf()).getHadoopConf()); final FileStatus[] status = fileSystem.listStatus(new Path(this.conf.getFullPath())); int partitionId = 0;
@Test public void testHadoopConf() { final Configuration conf = new Configuration(); final Map<String, String> hadoopProps = new HashMap<>(); hadoopProps.put("hadoopProp1", "value1"); hadoopProps.put("hadoopProp2", "value2"); hadoopProps.entrySet().stream().forEach(entry -> conf .setProperty(HadoopConfiguration.HADOOP_COMMON_PROPERTY_PREFIX + entry.getKey(), entry.getValue())); final Map<String, String> nonHadoopProps = new HashMap<>(); nonHadoopProps.put("nonHadoopProp1", "value1"); nonHadoopProps.put("nonHadoopProp2", "value2"); nonHadoopProps.entrySet().stream().forEach(entry -> conf.setProperty(entry.getKey(), entry.getValue())); final org.apache.hadoop.conf.Configuration hadoopConf = new HadoopConfiguration(conf).getHadoopConf(); Assert.assertTrue(hadoopProps.entrySet().stream() .allMatch(entry -> hadoopConf.get(entry.getKey()).equals(entry.getValue()))); Assert.assertTrue(nonHadoopProps.entrySet().stream() .noneMatch(entry -> hadoopConf.get(entry.getKey()) != null)); } }
final Path destPath = new Path(this.awsConf.getSourcePath()); final FileSystem fs = destPath.getFileSystem(new HadoopConfiguration(this.conf.getConf()).getHadoopConf()); if (fs.exists(destPath)) { fs.delete(destPath, true); try { final FileSystem fileSystem = destPath.getFileSystem(new HadoopConfiguration(this.conf.getConf()).getHadoopConf());
private static Map<String, String> readMetadataInfo( @NonNull final HoodieConfiguration hoodieConf) { try { final FileSystem fs = FSUtils.getFs(hoodieConf.getConf()); HoodieUtil.initHoodieDataset(fs, hoodieConf); final HoodieTableMetaClient hoodieTableMetaClient = new HoodieTableMetaClient(new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), hoodieConf.getBasePath(), true); final HoodieActiveTimeline hoodieActiveTimeline = hoodieTableMetaClient.getActiveTimeline(); final java.util.Optional<HoodieInstant> lastInstant = hoodieActiveTimeline.getCommitTimeline() .filterCompletedInstants().lastInstant(); if (lastInstant.isPresent()) { log.info("using hoodie instant for reading checkpoint info :{}", lastInstant.get().getTimestamp()); final HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(hoodieActiveTimeline.getInstantDetails(lastInstant.get()).get()); final String serCommitInfo = commitMetadata.getMetadata(HOODIE_METADATA_KEY); if (!Strings.isNullOrEmpty(serCommitInfo)) { return MapUtil.deserializeMap(serCommitInfo); } } return new HashMap<>(); } catch (IOException e) { log.error("failed to read metadata info", e); throw new JobRuntimeException("failed to read metadata information", e); } } }
LocatedFileStatus resultSchemaFile = null; int resultSchemaVersion = -1; final FileSystem fileSystem = FileSystem.get(hadoopConfiguration.getHadoopConf()); final RemoteIterator<LocatedFileStatus> fileIterator = fileSystem.listFiles(conf.getPath(), false);
Assert.assertTrue( new HoodieTableMetaClient( new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), basePath.toString(), true) .getActiveTimeline().getCommitTimeline().filterCompletedInstants().empty()); Assert.assertFalse( new HoodieTableMetaClient( new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), basePath.toString(), true) .getActiveTimeline().getCommitTimeline().filterCompletedInstants().empty());