@BeforeClass public static void testWriteSequenceFile() throws IOException { Configuration c = new Configuration(); URI uri = file().toURI(); try(SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(uri, c), c, new Path(uri.toString()), LongWritable.class, Text.class)) { final LongWritable key = new LongWritable(); final Text val = new Text(); for (int i = 0; i < COUNT; ++i) { key.set(i); val.set(Integer.toString(i)); writer.append(key, val); } } }
protected SequenceFile.Writer createNewFile(Path filepath, Class<? extends WritableComparable> keyClass, Class<? extends Writable> valueClass) throws IOException { SequenceFile.Writer writer = new SequenceFile.Writer(getFileSystem(), getJobConf(), filepath, keyClass, valueClass); LOG.info("creating file " + filepath.toString()); return writer; }
new Path(new Path (new Path(workdir, CRAWLDB_DIR_NAME), "current"), name); SequenceFile.Writer crawldbOut = new SequenceFile.Writer(fs, job, crawldb, Text.class, CrawlDatum.class); new SequenceFile.Writer(fs, job, crawl, Text.class, CrawlDatum.class); new SequenceFile.Writer(fs, job, generate, Text.class, CrawlDatum.class);
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } return out; }
private void writeSequenceFileFromPCollection(final FileSystem fs, final Path path, final PCollection collection) throws IOException { final PType pType = collection.getPType(); final Converter converter = pType.getConverter(); final Class valueClass = converter.getValueClass(); final SequenceFile.Writer writer = new SequenceFile.Writer(fs, fs.getConf(), path, NullWritable.class, valueClass); for (final Object o : collection.materialize()) { final Object value = pType.getOutputMapFn().map(o); writer.append(NullWritable.get(), value); } writer.close(); }
/** * Finds out the path in cluster where the point is supposed to be written. */ private Writer findWriterForVector(String clusterId) throws IOException { Path clusterDirectory = PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId); Writer writer = writersForClusters.get(clusterId); if (writer == null) { Path pathToWrite = new Path(clusterDirectory, new Path("part-m-0")); writer = new Writer(fileSystem, conf, pathToWrite, LongWritable.class, VectorWritable.class); writersForClusters.put(clusterId, writer); } return writer; }
public static void writePolicy(ClusteringPolicy policy, Path path) throws IOException { Path policyPath = new Path(path, POLICY_FILE_NAME); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(policyPath.toUri(), config); SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, policyPath, Text.class, ClusteringPolicyWritable.class); writer.append(new Text(), new ClusteringPolicyWritable(policy)); Closeables.close(writer, false); } }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } }
public static void writePolicy(ClusteringPolicy policy, Path path) throws IOException { Path policyPath = new Path(path, POLICY_FILE_NAME); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(policyPath.toUri(), config); SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, policyPath, Text.class, ClusteringPolicyWritable.class); writer.append(new Text(), new ClusteringPolicyWritable(policy)); Closeables.close(writer, false); } }
public SequenceFileRecordWriter(FileSystem fs, BSPJob job, String name) throws IOException, ClassNotFoundException { Configuration conf = job.getConfiguration(); writer = new SequenceFile.Writer(fs, conf, new Path( conf.get("bsp.output.dir"), name), conf.getClassByName(conf .get("bsp.output.key.class")), conf.getClassByName(conf .get("bsp.output.value.class"))); }
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } return out; }
/** * Create the named file with write-progress reporter for storing @{link ITuple}s with the given schema. */ public Writer(FileSystem fs, Configuration conf, Path name, Schema schema, int bufferSize, short replication, long blockSize, Progressable progress, SequenceFile.Metadata metadata) throws IOException { fillMetadata(metadata, schema); innerWriter = new SequenceFile.Writer(fs, conf, name, UNUSED, UNUSED, bufferSize, replication, blockSize, progress, metadata); init(conf, schema); }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class)) { writer.append(new IntWritable(0), new VectorWritable(v)); } }
public ChunkedWriter(Configuration conf, int chunkSizeInMB, Path output) throws IOException { this.output = output; this.conf = conf; if (chunkSizeInMB > 1984) { chunkSizeInMB = 1984; } maxChunkSizeInBytes = chunkSizeInMB * 1024 * 1024; fs = FileSystem.get(conf); currentChunkID = 0; writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID), Text.class, Text.class); }
public static void writePolicy(ClusteringPolicy policy, Path path) throws IOException { Path policyPath = new Path(path, POLICY_FILE_NAME); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(policyPath.toUri(), config); SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, policyPath, Text.class, ClusteringPolicyWritable.class); writer.append(new Text(), new ClusteringPolicyWritable(policy)); writer.close(); } }
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } return out; }
private static SequenceFile.Writer[] createWriters(Path testdir, Configuration conf, int srcs, Path[] src) throws IOException { for (int i = 0; i < srcs; ++i) { src[i] = new Path(testdir, Integer.toString(i + 10, 36)); } SequenceFile.Writer out[] = new SequenceFile.Writer[srcs]; for (int i = 0; i < srcs; ++i) { out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf, src[i], IntWritable.class, IntWritable.class); } return out; }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } }
/** * Produce initial cluster centroids and write the centroids directly to file system. * This interface is called after produceSamples is called. * * @param numClusters The number centroids to be generated * @param centroidsPath the path on FileSystem where centroids are to be written to * @return the actual number of centroids produced */ public int produceInitialCentroids(int numClusters, Path centroidsPath) throws Exception { List<Vector> iCentroids = new ArrayList<Vector>(numClusters); produceInitialCentroids(numClusters, iCentroids); SequenceFile.Writer writer = new SequenceFile.Writer(getFileSystem(), getJobConf(), centroidsPath, Text.class, Kluster.class); for (int i = 0; i < iCentroids.size(); i++) { Vector vec = iCentroids.get(i); Kluster cluster = new Kluster(vec, i, dm); // add the center so the centroid will be correct upon output cluster.observe(cluster.getCenter(), 1); writer.append(new Text(cluster.getIdentifier()), cluster); } writer.close(); return iCentroids.size(); }
/** * Create a new Writer with the given options. * @param conf the configuration to use * @param opts the options to create the file with * @return a new Writer * @throws IOException */ public static Writer createWriter(Configuration conf, Writer.Option... opts ) throws IOException { Writer.CompressionOption compressionOption = Options.getOption(Writer.CompressionOption.class, opts); CompressionType kind; if (compressionOption != null) { kind = compressionOption.getValue(); } else { kind = getDefaultCompressionType(conf); opts = Options.prependOptions(opts, Writer.compression(kind)); } switch (kind) { default: case NONE: return new Writer(conf, opts); case RECORD: return new RecordCompressWriter(conf, opts); case BLOCK: return new BlockCompressWriter(conf, opts); } }