public static void extractSequenceFileData() throws IOException { final URI[] paths = TextBytesSequenceFileUtility .getFiles("hdfs://seurat/data/flickr-all-geo-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.seq", "part-m-"); final File output = new File("/Volumes/My Book/flickr46m-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.dat"); final DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(output))); final byte[] tmp = new byte[512]; for (final URI p : paths) { System.out.println("Starting file " + p); final TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(p, true); for (final Entry<Text, BytesWritable> rec : sf) { final long id = Long.parseLong(rec.getKey().toString().trim()); System.arraycopy(rec.getValue().getBytes(), 0, tmp, 0, tmp.length); dos.writeLong(id); dos.write(tmp); } dos.flush(); } dos.close(); }
public static void extractSequenceFileData() throws IOException { final URI[] paths = TextBytesSequenceFileUtility .getFiles("hdfs://seurat/data/flickr-all-geo-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.seq", "part-m-"); final File output = new File("/Volumes/My Book/flickr46m-vlad64-pca128-pq16x8-indexer-mirflickr25k-sift1x.dat"); final DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(output))); final byte[] tmp = new byte[512]; for (final URI p : paths) { System.out.println("Starting file " + p); final TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(p, true); for (final Entry<Text, BytesWritable> rec : sf) { final long id = Long.parseLong(rec.getKey().toString().trim()); System.arraycopy(rec.getValue().getBytes(), 0, tmp, 0, tmp.length); dos.writeLong(id); dos.write(tmp); } dos.flush(); } dos.close(); }