Refine search
private static List<String> readLines(Path location, Configuration conf) throws Exception { FileSystem fileSystem = HadoopUtil.getWorkingFileSystem(); CompressionCodecFactory factory = new CompressionCodecFactory(conf); FileStatus[] items = fileSystem.listStatus(location); if (items == null) return new ArrayList<String>(); List<String> results = new ArrayList<String>(); for (FileStatus item : items) { // ignoring files like _SUCCESS if (item.getPath().getName().startsWith("_")) { continue; } CompressionCodec codec = factory.getCodec(item.getPath()); InputStream stream = null; // check if we have a compression codec we need to use if (codec != null) { stream = codec.createInputStream(fileSystem.open(item.getPath())); } else { stream = fileSystem.open(item.getPath()); } StringWriter writer = new StringWriter(); IOUtils.copy(stream, writer, "UTF-8"); String raw = writer.toString(); for (String str : StringUtil.split(raw, "\n")) { results.add(str); } } return results; }
/** * Returns an {@link InputStream} to the specified file. * <p> * Note: It is the caller's responsibility to close the returned {@link InputStream}. * </p> * * @param path The path to the file to open. * @return An {@link InputStream} for the specified file. * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file. */ @Override public InputStream getFileStream(String path) throws FileBasedHelperException { try { Path p = new Path(path); InputStream in = this.getFileSystem().open(p); // Account for compressed files (e.g. gzip). // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala CompressionCodecFactory factory = new CompressionCodecFactory(this.getFileSystem().getConf()); CompressionCodec codec = factory.getCodec(p); return (codec == null) ? in : codec.createInputStream(in); } catch (IOException e) { throw new FileBasedHelperException("Cannot open file " + path + " due to " + e.getMessage(), e); } }
compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(fileSplit.getPath());
CompressionCodecFactory factory = new CompressionCodecFactory(conf); boolean encode = false; for(int i=0; i < args.length; ++i) { encode = false; } else { CompressionCodec codec = factory.getCodec(new Path(args[i])); if (codec == null) { System.out.println("Codec for " + args[i] + " not found.");
CompressionCodecFactory cf = new CompressionCodecFactory(getConf()); CompressionCodec codec = cf.getCodec(item.path); if (codec != null) { i.seek(0);
CompressionCodec codec = null; Configuration conf = getConfiguration(); final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString()); final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC; codec = compressionCodecFactory.getCodec(path); } else if (compressionType != CompressionType.NONE) { codec = getCompressionCodec(context, getConfiguration());
codec = getCompressionCodec(context, getConfiguration()); final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); for (final Path file : files) { try { codec = compressionCodecFactory.getCodec(file);
@Override protected boolean isSplitable(final JobContext context, final Path file) { return null == new CompressionCodecFactory(context.getConfiguration()).getCodec(file); } }
@Override protected boolean isSplitable(final JobContext context, final Path file) { return null == new CompressionCodecFactory(context.getConfiguration()).getCodec(file); }
private static List<String> readLines(Path location, Configuration conf) throws Exception { FileSystem fileSystem = FileSystem.get(location.toUri(), conf); CompressionCodecFactory factory = new CompressionCodecFactory(conf); FileStatus[] items = fileSystem.listStatus(location); if (items == null) return new ArrayList<String>(); List<String> results = new ArrayList<String>(); for (FileStatus item : items) { // ignoring files like _SUCCESS if (item.getPath().getName().startsWith("_")) { continue; } CompressionCodec codec = factory.getCodec(item.getPath()); InputStream stream = null; // check if we have a compression codec we need to use if (codec != null) { stream = codec.createInputStream(fileSystem.open(item.getPath())); } else { stream = fileSystem.open(item.getPath()); } StringWriter writer = new StringWriter(); IOUtils.copy(stream, writer, "UTF-8"); String raw = writer.toString(); for (String str : raw.split("\n")) { results.add(str); } } return results; }
@VisibleForTesting TextReader createReader(Path file, String delim, Configuration cfg, String blockPoolID) throws IOException { FileSystem fs = file.getFileSystem(cfg); if (fs instanceof LocalFileSystem) { fs = ((LocalFileSystem)fs).getRaw(); } CompressionCodecFactory factory = new CompressionCodecFactory(cfg); CompressionCodec codec = factory.getCodec(file); String filename = fileNameFromBlockPoolID(blockPoolID); if (codec != null) { filename = filename + codec.getDefaultExtension(); } Path bpidFilePath = new Path(file.getParent(), filename); return new TextReader(fs, bpidFilePath, codec, delim); }
end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file);
@Override public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException { final FileSplit split = (FileSplit) genericSplit; final Configuration configuration = context.getConfiguration(); if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER); this.gryoReader = GryoReader.build().mapper( GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create(); long start = split.getStart(); final Path file = split.getPath(); if (null != new CompressionCodecFactory(configuration).getCodec(file)) { throw new IllegalStateException("Compression is not supported for the (binary) Gryo format"); } // open the file and seek to the start of the split this.inputStream = file.getFileSystem(configuration).open(split.getPath()); this.splitLength = split.getLength(); if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start); }
private CompressionCodec getCompressionCodec(Path path, Configuration configuration){ if(compressionCodecFactory == null){ compressionCodecFactory = new CompressionCodecFactory(configuration); } return compressionCodecFactory.getCodec(path); } }
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); return codec == null; } }
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); return (null == codec); }
protected boolean isSplitable(FileSystem fs, Path file) { final CompressionCodec codec = new CompressionCodecFactory(fs.getConf()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; } }
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); return (null == codec); }
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }
@Override protected boolean isSplitable(JobContext context, Path file) { final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); if (null == codec) { return true; } return codec instanceof SplittableCompressionCodec; }