org.apache.hadoop.mapred.RecordWriter java code examples

Refine search

 reader = new BufferedReader(new InputStreamReader(xlearningProcess.getInputStream()));
 List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation());
 JobConf jobConf = new JobConf(conf);
 jobConf.setOutputKeyClass(Text.class);
 jobConf.setOutputValueClass(Text.class);
 jobConf.setBoolean("mapred.output.compress", true);
 jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
 String xlearningStreamResultLine;
 while ((xlearningStreamResultLine = reader.readLine()) != null) {
  writer.write(null, xlearningStreamResultLine);
 writer.close(Reporter.NULL);
 reader.close();
 dfs.close();
} catch (Exception e) {
 LOG.warn("Exception in thread stdoutRedirectThread");

@Override
protected void closeOp(boolean abort) throws HiveException {
 try {
  if(!wroteData) {
   //Send a schema only batch to signal EOS with no data written
   ArrowWrapperWritable writable = converter.emptyBatch();
   if(recordWriter == null) {
    recordWriter = LlapOutputFormatService.get().getWriter(this.attemptId);
   }
   recordWriter.write(null, writable);
  }
 } catch(Exception e) {
  LOG.error("Failed to write Arrow stream schema");
  throw new RuntimeException(e);
 } finally {
  try {
   //Close the recordWriter with null Reporter
   recordWriter.close(null);
  } catch(Exception e) {
   LOG.error("Failed to close Arrow stream");
   throw new RuntimeException(e);
  }
 }
}

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

@Test
public void testValues() throws Exception {
 JobConf job = new JobConf();
  job.set(LlapOutputFormat.LLAP_OF_ID_KEY, id);
  LlapOutputFormat format = new LlapOutputFormat();
   writer.write(NullWritable.get(),text);
  writer.close(null);

private static void test2(JobConf job) throws IOException {
 FileSystem fs = FileSystem.getLocal(job);
 String name = "part-00000";
 //pretend that we have input file with 1/2/3 as the suffix
 job.set(JobContext.MAP_INPUT_FILE, "1/2/3");
 // we use the last two legs of the input file as the output file
 job.set("mapred.outputformat.numOfTrailingLegs", "2");
 MultipleTextOutputFormat<Text, Text> theOutputFormat = new MultipleTextOutputFormat<Text, Text>();
 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
 writeData(rw);
 rw.close(null);
}

AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer =
  outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
    Reporter.NULL);
writer.write(NullWritable.get(),
  serde.serialize(new SimpleRow(null), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new SimpleRow(null), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new SimpleRow(null), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg =
    .end()
    .build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z");
properties.setProperty("columns", "z");
properties.setProperty("columns.types", "string");

private static void test1(JobConf job) throws IOException {
 FileSystem fs = FileSystem.getLocal(job);
 String name = "part-00000";
 KeyBasedMultipleTextOutputFormat theOutputFormat = new KeyBasedMultipleTextOutputFormat();
 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
 writeData(rw);
 rw.close(null);
}

  outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
    Reporter.NULL);
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(1,2,3), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(4,5,6), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(7,8,9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
properties.setProperty("columns", "z,r");
assertEquals(1, splits.length);
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
conf.set("columns", "z,r");
conf.set("columns.types", "int:struct<x:int,y:int>");
org.apache.hadoop.mapred.RecordReader reader =
  in.getRecordReader(splits[0], conf, Reporter.NULL);

  @Override
  public void writeRecord(Tuple2<K, V> record) throws IOException {
    this.recordWriter.write(record.f0, record.f1);
  }
}

 @Override
 public void close(boolean abort) throws IOException {
  //close with null reporter
  mWriter.close(null);
 }
}

@Override
public void collect(K key, V value) throws IOException {
 // only set if classes are unset to allow setting higher level class when
 // using multiple subtypes
 if (recordWriter == null) {
  setClassIfUnset("mapred.output.key.class", key.getClass());
  setClassIfUnset("mapred.output.value.class", value.getClass());
  recordWriter = outputFormat.getRecordWriter(
    FileSystem.getLocal(outputFormatConf), outputFormatConf,
    outputFile.getName(), Reporter.NULL);
 }
 recordWriter.write(key, value);
}

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException
 */
@Override
public void close() throws IOException {
  this.recordWriter.close(new DummyHadoopReporter());
  if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) {
    this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")));
  }
//TODO: commitjob when all the tasks are finished
}

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

private static void test2(JobConf job) throws IOException {
 FileSystem fs = FileSystem.getLocal(job);
 String name = "part-00000";
 //pretend that we have input file with 1/2/3 as the suffix
 job.set(JobContext.MAP_INPUT_FILE, "1/2/3");
 // we use the last two legs of the input file as the output file
 job.set("mapred.outputformat.numOfTrailingLegs", "2");
 MultipleTextOutputFormat<Text, Text> theOutputFormat = new MultipleTextOutputFormat<Text, Text>();
 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
 writeData(rw);
 rw.close(null);
}

conf.setInt("mapred.max.split.size", 50);
RecordWriter writer =
  outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
    Reporter.NULL);
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(1,2,3), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(4,5,6), inspector));
writer.write(NullWritable.get(),
  serde.serialize(new NestedRow(7,8,9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg =
    .end()
    .build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();

private static void test1(JobConf job) throws IOException {
 FileSystem fs = FileSystem.getLocal(job);
 String name = "part-00000";
 KeyBasedMultipleTextOutputFormat theOutputFormat = new KeyBasedMultipleTextOutputFormat();
 RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
 writeData(rw);
 rw.close(null);
}

@Override
@SuppressWarnings("unchecked")
public void write(Writable r) throws IOException {
 mWriter.write(null, (V) r);
}

 @Override
 public void close(Reporter reporter) throws IOException {
  this.mWriter.close(reporter);
 }
}

 .new AvroTextRecordWriter(fileWriter, "\t".getBytes(StandardCharsets.UTF_8));
rw.write(null, null);
rw.write(null, NullWritable.get());
rw.write(NullWritable.get(), null);
rw.write(NullWritable.get(), NullWritable.get());
rw.write("k1", null);
rw.write("k2", NullWritable.get());
rw.write(null, "v1");
rw.write(NullWritable.get(), "v2");
rw.write("k3", "v3");
rw.write(new Text("k4"), new Text("v4"));
rw.close(null);

/**
 * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
 * Connection (ZK) resources, and will throw exception if they are exhausted.
 */
static void openCloseTableOutputFormat(int iter)  throws IOException {
 LOG.info("Instantiating TableOutputFormat connection  " + iter);
 JobConf conf = new JobConf();
 conf.addResource(UTIL.getConfiguration());
 conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
 TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
 TableOutputFormat tof = new TableOutputFormat();
 RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
 rw.close(null);
}

Javadoc

RecordWriter writes the output <key, value> pairs to an output file.

RecordWriter implementations write the job outputs to the FileSystem.

Most used methods

close
Close this RecordWriter to future operations.
write
Writes a key/value pair.

Popular in Java

Making http requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSupportFragmentManager (FragmentActivity)
getResourceAsStream (ClassLoader)
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Runner (org.openjdk.jmh.runner)
Top 12 Jupyter Notebook extensions

How to useRecordWriter in org.apache.hadoop.mapred

Best Java code snippets using org.apache.hadoop.mapred.RecordWriter (Showing top 20 results out of 450)

Refine search

How to use
RecordWriter
in
org.apache.hadoop.mapred