@Override public void start() throws IOException{ if(force) { URI outuri = SequenceFileUtility.convertToURI(sequenceFile); FileSystem fs = getFileSystem(outuri); fs.delete(new Path(outuri.toString()), true); } utility = new TextBytesSequenceFileUtility(sequenceFile, false); }
@Override public String process(ImageCollectionEntry<T> image) throws Exception { ByteArrayOutputStream bos = new ByteArrayOutputStream(); ImageUtilities.write(image.image, "png", bos); BytesWritable bw = new BytesWritable(bos.toByteArray()); String imageName = "" + this.seen ; utility.appendData(new Text(imageName), bw); this.seen ++; return imageName; }
@Override public void end() throws IOException{ utility.close(); }
public static void extractSequenceFileData() throws IOException { final URI[] paths = TextBytesSequenceFileUtility .getFiles("hdfs://seurat/data/flickr-all-geo-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.seq", "part-m-"); final File output = new File("/Volumes/My Book/flickr46m-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.dat"); final DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(output))); final byte[] tmp = new byte[512]; for (final URI p : paths) { System.out.println("Starting file " + p); final TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(p, true); for (final Entry<Text, BytesWritable> rec : sf) { final long id = Long.parseLong(rec.getKey().toString().trim()); System.arraycopy(rec.getValue().getBytes(), 0, tmp, 0, tmp.length); dos.writeLong(id); dos.write(tmp); } dos.flush(); } dos.close(); }
@Override public int run(String[] args) throws Exception { HadoopEXIFOptions options = new HadoopEXIFOptions(args,true); options.prepare(); // String clusterFileString = options.getInputString(); Path[] paths = options.getInputPaths(); TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true); Map<String,String> metadata = new HashMap<String,String>(); if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID()); metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/imageexif"); Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata,this.getConf()); // job.setOutputValueClass(Text.class); job.setJarByClass(this.getClass()); job.setMapperClass(HadoopEXIF.HadoopEXIFMapper.class); job.getConfiguration().setStrings(ARGS_KEY, args); job.setNumReduceTasks(0); SequenceFileOutputFormat.setCompressOutput(job, false); long start,end; start = System.currentTimeMillis(); job.waitForCompletion(true); end = System.currentTimeMillis(); System.out.println("Took: " + (end - start) + "ms"); return 0; }
@Override public int run(String[] args) throws Exception { final HadoopLocalFeaturesToolOptions options = new HadoopLocalFeaturesToolOptions(args, true); options.prepare(); final Path[] paths = options.getInputPaths(); final TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri(), true); final Map<String, String> metadata = new HashMap<String, String>(); if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID()); metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/localfeatures-" + options.getMode().name() + "-" + (options.isAsciiMode() ? "ascii" : "bin")); final Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata, this.getConf()); job.setJarByClass(this.getClass()); options.mapperModeOp.prepareJobMapper(job, LocalFeaturesMapper.class); job.getConfiguration().setStrings(ARGS_KEY, args); job.setNumReduceTasks(0); SequenceFileOutputFormat.setCompressOutput(job, !options.dontcompress); long start, end; start = System.currentTimeMillis(); job.waitForCompletion(true); end = System.currentTimeMillis(); System.out.println("Took: " + (end - start) + "ms"); options.serialiseExtractor(); return 0; }
public static void extractSequenceFileData() throws IOException { final URI[] paths = TextBytesSequenceFileUtility .getFiles("hdfs://seurat/data/flickr-all-geo-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.seq", "part-m-"); final File output = new File("/Volumes/My Book/flickr46m-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.dat"); final DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(output))); final byte[] tmp = new byte[512]; for (final URI p : paths) { System.out.println("Starting file " + p); final TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(p, true); for (final Entry<Text, BytesWritable> rec : sf) { final long id = Long.parseLong(rec.getKey().toString().trim()); System.arraycopy(rec.getValue().getBytes(), 0, tmp, 0, tmp.length); dos.writeLong(id); dos.write(tmp); } dos.flush(); } dos.close(); }
@Override public int run(String[] args) throws Exception { final HadoopLocalFeaturesToolOptions options = new HadoopLocalFeaturesToolOptions(args, true); options.prepare(); final Path[] paths = options.getInputPaths(); final TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri(), true); final Map<String, String> metadata = new HashMap<String, String>(); if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID()); metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/localfeatures-" + options.getMode().name() + "-" + (options.isAsciiMode() ? "ascii" : "bin")); final Job job = TextBytesJobUtil.createJob(paths, options.getOutputPath(), metadata, this.getConf()); job.setJarByClass(this.getClass()); options.mapperModeOp.prepareJobMapper(job, LocalFeaturesMapper.class); job.getConfiguration().setStrings(ARGS_KEY, args); job.setNumReduceTasks(0); SequenceFileOutputFormat.setCompressOutput(job, !options.dontcompress); long start, end; start = System.currentTimeMillis(); job.waitForCompletion(true); end = System.currentTimeMillis(); System.out.println("Took: " + (end - start) + "ms"); options.serialiseExtractor(); return 0; }
@Override public int run(String[] args) throws Exception { // String clusterFileString = options.getInputString(); Path[] paths = new Path[]{new Path(args[0])}; TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true); HadoopToolsUtil.removeFile(args[1]); Job job = TextBytesJobUtil.createJob(paths, new Path(args[1]), new HashMap<String,String>(), this.getConf()); job.setJarByClass(this.getClass()); job.setMapperClass(CounterMapper.class); job.setReducerClass(CounterReducer.class); SequenceFileOutputFormat.setCompressOutput(job, false); long start,end; start = System.currentTimeMillis(); job.waitForCompletion(true); end = System.currentTimeMillis(); System.out.println("Took: " + (end - start) + "ms"); return 0; }
final TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri(), true); final Map<String, String> metadata = new HashMap<String, String>(); if (util.getUUID() != null) metadata.put(MetadataConfiguration.UUID_KEY, util.getUUID()); metadata.put(MetadataConfiguration.CONTENT_TYPE_KEY, "application/quantised-" + options.getClusterType().toString().toLowerCase() + "-" + options.getExtension());
@Override public int run(String[] args) throws Exception { // String clusterFileString = options.getInputString(); Path[] paths = new Path[]{new Path(args[0])}; TextBytesSequenceFileUtility util = new TextBytesSequenceFileUtility(paths[0].toUri() , true); HadoopToolsUtil.removeFile(args[1]); Job job = TextBytesJobUtil.createJob(paths, new Path(args[1]), new HashMap<String,String>(), this.getConf()); job.setJarByClass(this.getClass()); job.setMapperClass(CounterMapper.class); job.setReducerClass(CounterReducer.class); SequenceFileOutputFormat.setCompressOutput(job, false); long start,end; start = System.currentTimeMillis(); job.waitForCompletion(true); end = System.currentTimeMillis(); System.out.println("Took: " + (end - start) + "ms"); return 0; }
@Override public void execute() throws IOException { final Path[] sequenceFiles = SequenceFileUtility.getFilePaths(inputPathOrUri, "part"); for (final Path path : sequenceFiles) { System.err.println("Outputting from seqfile: " + path); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(path.toUri(), true); if (options == null) { if (printOffsets) { for (final Entry<Text, Long> e : utility.listKeysAndOffsets().entrySet()) System.out.format("%10d %s\n", e.getValue(), e.getKey().toString()); } else { for (final Text t : utility.listKeys()) System.out.println(t.toString()); } } else { utility.extract(ListModeOptions.listOptionsToExtractPolicy(options), System.out, delim); } } } }
@Override public void execute() throws Exception { final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(inputPathOrUri, true); if (options == null) { options = new ArrayList<InfoModeOptions>(); for (final InfoModeOptions o : InfoModeOptions.values()) options.add(o); } if (options.contains(InfoModeOptions.GUID) && !options.contains(InfoModeOptions.METADATA)) { System.out.println("UUID: " + utility.getUUID()); } if (options.contains(InfoModeOptions.METADATA)) { final Map<Text, Text> metadata = utility.getMetadata(); System.out.println("Metadata:"); for (final Entry<Text, Text> e : metadata.entrySet()) { System.out.println(e.getKey() + ": " + e.getValue()); } } if (options.contains(InfoModeOptions.NRECORDS)) { System.out.println("NRecords: " + utility.getNumberRecords()); } if (options.contains(InfoModeOptions.COMPRESSION_CODEC)) { System.out.println("Compression codec: " + utility.getCompressionCodecClass()); } if (options.contains(InfoModeOptions.COMPRESSION_TYPE)) { System.out.println("Compression type: " + utility.getCompressionType()); } } }
final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(outputPathOrUri, false); final Map<Path, Text> map = new LinkedHashMap<Path, Text>();
for (final Path path : sequenceFiles) { System.out.println("... Counting from file: " + path); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility( path.toUri(), true); totalRecords += utility.getNumberRecords(); System.out.println("Extracting from " + path.getName()); final SequenceFileUtility<Text, BytesWritable> utility = new TextBytesSequenceFileUtility(path.toUri(), true); if (queryKey == null) {