public FSDataOutputStream createFile() throws IOException { Path file; file = FileOutputFormat.getTaskOutputPath(jobConf, fileName + getFileExtention(fileNum++) + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(jobConf); return fs.create(file, null); }
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = Schema.create(Schema.Type.BYTES); final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t").getBytes(UTF8); final DataFileWriter<ByteBuffer> writer = new DataFileWriter<>(new ReflectDatumWriter<>()); AvroOutputFormat.configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroTextRecordWriter(writer, keyValueSeparator); }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
final Path dir = FileOutputFormat.getTaskOutputPath(job, name); final FileSystem fs = dir.getFileSystem(job); if (!fs.mkdirs(dir))
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** * Returns a path to a file with the given name in the output directory * of the record writer. * @param filename * @return * @throws IOException */ protected Path getFilePath(String filename) throws IOException { if (prefix != null) filename = prefix + "_" + filename; return outDir != null ? new Path(outDir, filename) : FileOutputFormat.getTaskOutputPath(jobConf, filename); }
@Override public RecordWriter<FileBlockIndex, WritableByteArray> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { this.progress = progress; Path taskOutputPath = FileOutputFormat.getTaskOutputPath(job, name).getParent(); return new BlockRecordWriter(taskOutputPath.getFileSystem(job), taskOutputPath, BlockInputFormat.getInputPaths(job)[0]); } }
@Override public RecordWriter<NullWritable, Corc> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable progress) throws IOException { String file = FileOutputFormat.getTaskOutputPath(conf, name).toString(); RecordWriter<NullWritable, ?> writer = orcOutputFormat.getRecordWriter(fileSystem, conf, file, progress); return new CorcRecordWriter(writer); }
public PailRecordWriter(JobConf conf, String unique, Progressable p) throws IOException { PailSpec spec = (PailSpec) Utils.getObject(conf, SPEC_ARG); Path path = getOutputPath(conf); FileSystem fs = path.getFileSystem(conf); Pail.create(fs, path.toString(), spec, false); // this is a hack to get the work output directory since it's not exposed directly. instead it only // provides a path to a particular file. _pail = Pail.create(fs, FileOutputFormat.getTaskOutputPath(conf, unique).getParent().toString(), spec, false); _unique = unique; }
public SolrRecordWriter(JobConf conf, String name, Progressable progress) throws IOException { // Copy Solr core directory from HDFS to temp local location. Path sourcePath = new Path(conf.get(SOLR_CORE_PATH_KEY)); String coreName = sourcePath.getName(); String tmpDir = System.getProperty("java.io.tmpdir"); File localSolrCore = new File(tmpDir, "cascading.solr-" + UUID.randomUUID() + "/" + coreName); FileSystem sourceFS = sourcePath.getFileSystem(conf); sourceFS.copyToLocalFile(sourcePath, new Path(localSolrCore.getAbsolutePath())); // Figure out where ultimately the results need to wind up. _outputPath = new Path(FileOutputFormat.getTaskOutputPath(conf, name), "index"); _outputFS = _outputPath.getFileSystem(conf); // Get the set of fields we're indexing. Fields sinkFields = HadoopUtil.deserializeBase64(conf.get(SINK_FIELDS_KEY), conf, Fields.class); int maxSegments = conf.getInt(MAX_SEGMENTS_KEY, DEFAULT_MAX_SEGMENTS); String dataDirPropertyName = conf.get(DATA_DIR_PROPERTY_NAME_KEY); // Set up local Solr home. File localSolrHome = SolrSchemeUtil.makeTempSolrHome(localSolrCore); // This is where data will wind up, inside of an index subdir. _localIndexDir = new File(localSolrHome, "data"); _keepAliveHook = new HadoopKeepAliveHook(progress); _solrWriter = new SolrWriter(_keepAliveHook, sinkFields, dataDirPropertyName, _localIndexDir.getAbsolutePath(), localSolrCore, maxSegments) { }; }
@Override public RecordWriter<K, V> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress ) throws IOException { boolean isCompressed = getCompressOutput( job ); if( !isCompressed ) { Path file = FileOutputFormat.getTaskOutputPath( job, name ); FileSystem fs = file.getFileSystem( job ); FSDataOutputStream fileOut = fs.create( file, progress ); return new LineRecordWriter<K, V>( fileOut ); } else { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass( job, GzipCodec.class ); CompressionCodec codec = ReflectionUtils.newInstance( codecClass, job ); Path file = FileOutputFormat.getTaskOutputPath( job, name + codec.getDefaultExtension() ); FileSystem fs = file.getFileSystem( job ); FSDataOutputStream fileOut = fs.create( file, progress ); return new LineRecordWriter<K, V>( new DataOutputStream( codec.createOutputStream( fileOut ) ) ); } } }
@Override public RecordWriter<K, V> getRecordWriter( FileSystem ignored, JobConf job, String name, Progressable progress ) throws IOException { boolean isCompressed = getCompressOutput( job ); if( !isCompressed ) { Path file = FileOutputFormat.getTaskOutputPath( job, name ); FileSystem fs = file.getFileSystem( job ); FSDataOutputStream fileOut = fs.create( file, progress ); return new LineRecordWriter<K, V>( fileOut ); } else { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass( job, GzipCodec.class ); CompressionCodec codec = ReflectionUtils.newInstance( codecClass, job ); Path file = FileOutputFormat.getTaskOutputPath( job, name + codec.getDefaultExtension() ); FileSystem fs = file.getFileSystem( job ); FSDataOutputStream fileOut = fs.create( file, progress ); return new LineRecordWriter<K, V>( new DataOutputStream( codec.createOutputStream( fileOut ) ) ); } } }
public WARCWriter(JobConf job, String filename, Progressable progress) throws IOException { CompressionCodec codec = getCompressOutput(job) ? WARCFileWriter.getGzipCodec(job) : null; Path workFile = FileOutputFormat.getTaskOutputPath(job, filename); this.writer = new WARCFileWriter(job, codec, workFile, progress); }
public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema)); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroRecordWriter(writer, schema); }
@Override public RecordWriter<K, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String extension = ""; Path file = FileOutputFormat.getTaskOutputPath(job, MANIFEST_FILENAME); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); if (getCompressOutput(job)) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); extension = codec.getDefaultExtension(); } return new ExportManifestRecordWriter<>(fileOut, FileOutputFormat.getOutputPath(job), extension); } }
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = Schema.create(Schema.Type.BYTES); final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t").getBytes(UTF8); final DataFileWriter<ByteBuffer> writer = new DataFileWriter<ByteBuffer>(new ReflectDatumWriter<ByteBuffer>()); AvroOutputFormat.configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroTextRecordWriter(writer, keyValueSeparator); }
public RecordWriter<NullWritable, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { return createRecordWriter(job, FileOutputFormat.getTaskOutputPath(job, name), progress, getCompressOutput(job), getOuptutFileExtn(job), getResultEncoding(job)); }
public RecordWriter<NullWritable, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { return createRecordWriter(job, FileOutputFormat.getTaskOutputPath(job, name), progress, getCompressOutput(job), getOuptutFileExtn(job), getResultEncoding(job)); }
@Override public void close() throws IOException { // Output the result to a file Results in the output dir FileContext fc; try { fc = FileContext.getFileContext(jobConf); } catch (IOException ioe) { System.err.println("Can not initialize the file system: " + ioe.getLocalizedMessage()); return; } FSDataOutputStream o = fc.create(FileOutputFormat.getTaskOutputPath(jobConf, "Results"), EnumSet.of(CreateFlag.CREATE)); PrintStream out = new PrintStream(o); printResults(out); out.close(); o.close(); } }
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }