/** * All the files starting with "part" in the paths which look like: "paths[i]/subdir * @param paths * @param subdir * @return the paths to the part files * @throws IOException */ public static Path[] getInputPaths(String[] paths, String subdir) throws IOException { return SequenceFileUtility.getFilePaths(paths, subdir, "part"); }
/** * @return the output file location */ public Path getOutputPath() { return new Path(SequenceFileUtility.convertToURI(output).toString()); }
/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @param naming * the naming strategy * @param extrState * the extraction state * @param addExtension * if true, then file extensions are added to each record * automatically * @param offset * offset from which to start. Can be used to reduce number of * files extracted. * @throws IOException */ public void exportData(String uriOrPath, NamingStrategy naming, ExtractionState extrState, boolean addExtension, long offset) throws IOException { FileSystem fs = null; Path p = null; if (uriOrPath != null) { final URI uri = convertToURI(uriOrPath); fs = getFileSystem(uri); p = new Path(uri.toString()); } exportData(fs, p, naming, extrState, addExtension, offset); }
public SequenceFileUtility(String uriOrPath, CompressionType compressionType, Map<String, String> metadata) throws IOException { this.compressionType = compressionType; setup(convertToURI(uriOrPath), false); }
private void validate(boolean initial) { if (replace && initial) { try { final URI outuri = SequenceFileUtility.convertToURI(output); final FileSystem fs = SequenceFileUtility.getFileSystem(outuri, new Configuration()); fs.delete(new Path(outuri.toString()), true); } catch (final IOException e) { } } }
/** * Find a record and write the value to a file. * * @param key * @param uriOrPath * @param offset * @return false if record not found, true otherwise. * @throws IOException */ public boolean findAndExport(K key, String uriOrPath, long offset) throws IOException { FileSystem fs = null; Path p = null; if (uriOrPath != null) { final URI uri = convertToURI(uriOrPath); fs = getFileSystem(uri); p = new Path(uri.toString()); } return findAndExport(key, fs, p, offset); }
Path outpath = new Path(SequenceFileUtility.convertToURI(tmpFile.getAbsolutePath()).toString()); System.out.println("It is all going to: " + outpath); Path[] sequenceFiles = SequenceFileUtility.getFilePaths(sequenceFilePath, "part"); Configuration conf = new Configuration();
final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part"); final ExtractionState nps = new ExtractionState(); nps.setMaxFileExtract(max); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility( path.toUri(), true); totalRecords += utility.getNumberRecords(); zos = SequenceFileUtility.openZipOutputStream(outputPathOrUri); if (queryKey == null) { if (zipMode) { utility.exportDataToZip(zos, np, nps, autoExtension, offset); } else { utility.exportData(outputPathOrUri, np, nps, autoExtension, offset); throw new UnsupportedOperationException("Not implemented yet"); } else { if (!utility.findAndExport(new Text(queryKey), outputPathOrUri, offset)) { if (offset == 0) System.err.format("Key '%s' was not found in the file.\n", queryKey);
final URI uri = SequenceFileUtility.convertToURI(input); final FileSystem fs = utility.getFileSystem(uri); final Path path = utility.getPath(uri); map.putAll(utility.appendFiles(fs, path, recurse, pathFilter, strategy.getKeyProvider())); utility.writePathMap(map); utility.close(); System.err.println("Created " + utility.getSequenceFilePath());
@Override public void execute() throws IOException { final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part"); for (final Path path : sequenceFiles) { System.err.println("Outputting from seqfile: " + path); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(path.toUri(), true); if (options == null) { if (printOffsets) { for (final Entry<Text, Long> e : utility.listKeysAndOffsets().entrySet()) System.out.format("%10d %s\n", e.getValue(), e.getKey().toString()); } else { for (final Text t : utility.listKeys()) System.out.println(t.toString()); } } else { utility.extract(ListModeOptions.listOptionsToExtractPolicy(options), System.out, delim); } } } }
private void setup(URI uri, boolean read, Map<String, String> metadata) throws IOException { fileSystem = getFileSystem(uri); sequenceFilePath = new Path(uri.toString()); reader = createReader(); final Text uuidText = reader.getMetadata().get(new Text(MetadataConfiguration.UUID_KEY)); if (uuidText != null) writer = createWriter(metadata);
/** * Extracts file to a directory. Read mode only. * * @param uriOrPath * path or uri to extract to. * @throws IOException */ public void exportData(String uriOrPath) throws IOException { exportData(uriOrPath, NamingStrategy.KEY, new ExtractionState(), false, 0); }
private void appendFiles(FileSystem fs, Path path, Path base, PathFilter pathFilter, KeyProvider<K> keyProvider, Map<Path, K> addedFiles) throws IOException { if (fs.isFile(path)) { if (pathFilter == null || pathFilter.accept(path)) { final K key = keyProvider.getKey(fs, path, base); appendFile(key, fs, path); addedFiles.put(path, key); } } else { try { final FileStatus[] status = fs.listStatus(path); for (final FileStatus stat : status) { appendFiles(fs, stat.getPath(), base, pathFilter, keyProvider, addedFiles); } } catch (final Throwable e) { System.err.println("Failed listing status on path: " + path); } } }
zos = openZipOutputStream(uriOrPath); exportDataToZip(zos, naming, state, addExtension, offset); } finally { if (zos != null)
/** * Get the filesystem associated with a uri. * * @param uri * @return the filesystem * @throws IOException */ public FileSystem getFileSystem(URI uri) throws IOException { return getFileSystem(uri, config); }
/** * Append files to a sequenceFile. * * @param fs * The filesystem of the files being added. * @param path * The path of the file(s) being added. * @param recurse * If true, then subdirectories are also searched * @param pathFilter * Filter for omitting files. Can be null. * @param keyProvider * Object that can return a key for a given file. * @return Paths and their respective keys for files that were added. * @throws IOException */ public Map<Path, K> appendFiles(FileSystem fs, Path path, boolean recurse, PathFilter pathFilter, KeyProvider<K> keyProvider) throws IOException { final LinkedHashMap<Path, K> addedFiles = new LinkedHashMap<Path, K>(); appendFiles(fs, path, recurse, pathFilter, keyProvider, addedFiles); return addedFiles; }
@Override public int run(String[] args) throws Exception { Path outpath = new Path(SequenceFileUtility.convertToURI(this.outputFilePath).toString()); System.out.println("It is all going to: " + outpath); Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputFilePath, "part");
public SequenceFileUtility(String uriOrPath, CompressionType compressionType) throws IOException { this.compressionType = compressionType; setup(convertToURI(uriOrPath), false); }
public static ZipOutputStream openZipOutputStream(String uriOrPath) throws IOException { final URI uri = convertToURI(uriOrPath); final FileSystem fs = getFileSystem(uri, new Configuration()); final Path path = new Path(uri.toString()); final ZipOutputStream zos = new ZipOutputStream(fs.create(path)); zos.setLevel(Deflater.BEST_COMPRESSION); return zos; }
/** * Find a record and write the value to a file. * * @param key * @param uriOrPath * @param offset * @return false if record not found, true otherwise. * @throws IOException */ public boolean findAndExport(K key, String uriOrPath, long offset) throws IOException { FileSystem fs = null; Path p = null; if (uriOrPath != null) { final URI uri = convertToURI(uriOrPath); fs = getFileSystem(uri); p = new Path(uri.toString()); } return findAndExport(key, fs, p, offset); }