org.apache.hive.hcatalog.mapreduce.HCatOutputFormat java code examples

@Override
public WriterContext prepareWrite() throws HCatException {
 OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(),
  we.getTableName(), we.getPartitionKVs());
 Job job;
 try {
  job = new Job(conf);
  HCatOutputFormat.setOutput(job, jobInfo);
  HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration()));
  HCatOutputFormat outFormat = new HCatOutputFormat();
  outFormat.checkOutputSpecs(job);
  outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
    job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job);
 } catch (IOException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 } catch (InterruptedException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 }
 WriterContextImpl cntxt = new WriterContextImpl();
 cntxt.setConf(job.getConfiguration());
 return cntxt;
}

OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration());
if (getOutputDirMarking(jobContext.getConfiguration())) {
 Path outputPath = new Path(jobInfo.getLocation());

/**
 * @param context current JobContext
 * @param baseRecordWriter RecordWriter to contain
 * @throws IOException
 * @throws InterruptedException
 */
public DefaultRecordWriterContainer(TaskAttemptContext context,
         org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException {
 super(context, baseRecordWriter);
 jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
 storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
 HCatOutputFormat.configureOutputStorageHandler(context);
 serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration());
 hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema());
 try {
  InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo);
 } catch (SerDeException e) {
  throw new IOException("Failed to initialize SerDe", e);
 }
}

/**
 * Set the schema for the data being written out to the partition. The
 * table schema is used by default for the partition if this is not called.
 * @param conf the job Configuration object
 * @param schema the schema for the data
 * @throws IOException
 */
public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException {
 OutputJobInfo jobInfo = getJobInfo(conf);
 Map<String, String> partMap = jobInfo.getPartitionValues();
 setPartDetails(jobInfo, schema, partMap);
 conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo));
}

void runMRCreateFail(
 String dbName, String tableName, Map<String, String> partitionValues,
 List<HCatFieldSchema> columns) throws Exception {
 Job job = new Job(mrConf, "hcat mapreduce write fail test");
 job.setJarByClass(this.getClass());
 job.setMapperClass(TestHCatPartitionPublish.MapFail.class);
 // input/output settings
 job.setInputFormatClass(TextInputFormat.class);
 Path path = new Path(fs.getWorkingDirectory(),
   "mapred/testHCatMapReduceInput");
 // The write count does not matter, as the map will fail in its first
 // call.
 createInputFile(path, 5);
 TextInputFormat.setInputPaths(job, path);
 job.setOutputFormatClass(HCatOutputFormat.class);
 OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName,
   partitionValues);
 HCatOutputFormat.setOutput(job, outputJobInfo);
 job.setMapOutputKeyClass(BytesWritable.class);
 job.setMapOutputValueClass(DefaultHCatRecord.class);
 job.setNumReduceTasks(0);
 HCatOutputFormat.setSchema(job, new HCatSchema(columns));
 boolean success = job.waitForCompletion(true);
 Assert.assertTrue(success == false);
}

 HCatOutputFormat.setOutput(job, outputJobInfo);
} catch (HCatException he) {
  PigHCatUtil.PIG_EXCEPTION_CODE, he);
HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
try {
 doSchemaValidations(pigSchema, hcatTblSchema);
HCatOutputFormat.setSchema(job, computedSchema);
udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));

@Override
public void commit(WriterContext context) throws HCatException {
 WriterContextImpl cntxtImpl = (WriterContextImpl)context;
 try {
  new HCatOutputFormat().getOutputCommitter(
    ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
      cntxtImpl.getConf(),
      ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))
    .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(
      cntxtImpl.getConf(), null));
 } catch (IOException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 } catch (InterruptedException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 }
}

HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
  conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
RecordWriter<WritableComparable<?>, HCatRecord> writer;
try {
 committer = outFormat.getOutputCommitter(cntxt);
 committer.setupTask(cntxt);
 writer = outFormat.getRecordWriter(cntxt);
 while (recordItr.hasNext()) {
  HCatRecord rec = recordItr.next();

HCatOutputFormat.setOutput(
  job,
  OutputJobInfo.create(
HCatSchema rowSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
assertEquals("Row-schema should have exactly one column.",
  1, rowSchema.getFields().size());
  serdeConstants.STRING_TYPE_NAME, rowSchema.getFields().get(0).getTypeString());
HCatSchema tableSchema = HCatOutputFormat.getTableSchemaWithPartitionColumns(job.getConfiguration());
assertEquals("Table-schema should have exactly 2 columns.",
  2, tableSchema.getFields().size());

public void testSetOutput() throws Exception {
 Configuration conf = new Configuration();
 Job job = Job.getInstance(conf, "test outputformat");
 Map<String, String> partitionValues = new HashMap<String, String>();
 partitionValues.put("colname", "p1");
 //null server url means local mode
 OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues);
 HCatOutputFormat.setOutput(job, info);
 OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job.getConfiguration());
 assertNotNull(jobInfo.getTableInfo());
 assertEquals(1, jobInfo.getPartitionValues().size());
 assertEquals("p1", jobInfo.getPartitionValues().get("colname"));
 assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size());
 assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName());
 publishTest(job);
}

/**
 * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hive.hcatalog.data.schema.HCatSchema)
 */
public static void setSchema(final Job job, final HCatSchema schema) throws IOException {
 setSchema(job.getConfiguration(), schema);
}

/**
 * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo)
 */
public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException {
 setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo);
}

harRequested = getHarRequested(hiveConf);
outputJobInfo.setHarRequested(harRequested);
maxDynamicPartitions = getMaxDynamicPartitions(hiveConf);
outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);

@Override
public OutputFormat getOutputFormat() throws IOException {
 return new HCatOutputFormat();
}

 protected void configureDynamicStorageHandler(JobContext context, List<String> dynamicPartVals)
   throws IOException {
  HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals);
 }
}

TextInputFormat.setInputPaths(job, INPUT_FILE_NAME);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(
  Warehouse.DEFAULT_DATABASE_NAME, "bad_props_table", null));
job.setOutputFormatClass(HCatOutputFormat.class);
HCatOutputFormat.setSchema(job, getSchema());
job.setNumReduceTasks(0);
assertTrue(job.waitForCompletion(true));

 HCatOutputFormat.setOutput(job, outputJobInfo);
} catch (HCatException he) {
  PigHCatUtil.PIG_EXCEPTION_CODE, he);
HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
try {
 doSchemaValidations(pigSchema, hcatTblSchema);
HCatOutputFormat.setSchema(job, computedSchema);
udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));

@Override
public void abort(WriterContext context) throws HCatException {
 WriterContextImpl cntxtImpl = (WriterContextImpl)context;
 try {
  new HCatOutputFormat().getOutputCommitter(
    ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
     cntxtImpl.getConf(),
     ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))
    .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(
     cntxtImpl.getConf(), null), State.FAILED);
 } catch (IOException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 } catch (InterruptedException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 }
}

HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(
  conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
RecordWriter<WritableComparable<?>, HCatRecord> writer;
try {
 committer = outFormat.getOutputCommitter(cntxt);
 committer.setupTask(cntxt);
 writer = outFormat.getRecordWriter(cntxt);
 while (recordItr.hasNext()) {
  HCatRecord rec = recordItr.next();

/**
 * Set the schema for the data being written out to the partition. The
 * table schema is used by default for the partition if this is not called.
 * @param conf the job Configuration object
 * @param schema the schema for the data
 * @throws IOException
 */
public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException {
 OutputJobInfo jobInfo = getJobInfo(conf);
 Map<String, String> partMap = jobInfo.getPartitionValues();
 setPartDetails(jobInfo, schema, partMap);
 conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo));
}

Javadoc

The OutputFormat to use to write data to HCatalog. The key value is ignored and should be given as null. The value is the HCatRecord to write.

Most used methods

setOutput
setSchema
<init>
getTableSchema
getJobInfo
getOutputCommitter
Get the output committer for this output format. This is responsible for ensuring the output is comm
checkOutputSpecs
configureOutputStorageHandler
getHarRequested
getMaxDynamicPartitions
getOutputFormat
getRecordWriter
Get the record writer for the job. This uses the StorageHandler's default OutputFormat to get the re

Popular in Java

Finding current android device location
getResourceAsStream (ClassLoader)
notifyDataSetChanged (ArrayAdapter)
runOnUiThread (Activity)
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
JTextField (javax.swing)
Option (scala)
From CI to AI: The AI layer in your organization

How to useHCatOutputFormat in org.apache.hive.hcatalog.mapreduce

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.HCatOutputFormat (Showing top 20 results out of 315)

How to use
HCatOutputFormat
in
org.apache.hive.hcatalog.mapreduce