@Override public WriterContext prepareWrite() throws HCatException { OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), we.getTableName(), we.getPartitionKVs()); Job job; try { job = new Job(conf); HCatOutputFormat.setOutput(job, jobInfo); HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration())); HCatOutputFormat outFormat = new HCatOutputFormat(); outFormat.checkOutputSpecs(job); outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } WriterContextImpl cntxt = new WriterContextImpl(); cntxt.setConf(job.getConfiguration()); return cntxt; }
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration()); if (getOutputDirMarking(jobContext.getConfiguration())) { Path outputPath = new Path(jobInfo.getLocation());
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
void runMRCreateFail( String dbName, String tableName, Map<String, String> partitionValues, List<HCatFieldSchema> columns) throws Exception { Job job = new Job(mrConf, "hcat mapreduce write fail test"); job.setJarByClass(this.getClass()); job.setMapperClass(TestHCatPartitionPublish.MapFail.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); // The write count does not matter, as the map will fail in its first // call. createInputFile(path, 5); TextInputFormat.setInputPaths(job, path); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(columns)); boolean success = job.waitForCompletion(true); Assert.assertTrue(success == false); }
HCatOutputFormat.setOutput(job, outputJobInfo); } catch (HCatException he) { PigHCatUtil.PIG_EXCEPTION_CODE, he); HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration()); try { doSchemaValidations(pigSchema, hcatTblSchema); HCatOutputFormat.setSchema(job, computedSchema); udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));
@Override public void commit(WriterContext context) throws HCatException { WriterContextImpl cntxtImpl = (WriterContextImpl)context; try { new HCatOutputFormat().getOutputCommitter( ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( cntxtImpl.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( cntxtImpl.getConf(), null)); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next();
HCatOutputFormat.setOutput( job, OutputJobInfo.create( HCatSchema rowSchema = HCatOutputFormat.getTableSchema(job.getConfiguration()); assertEquals("Row-schema should have exactly one column.", 1, rowSchema.getFields().size()); serdeConstants.STRING_TYPE_NAME, rowSchema.getFields().get(0).getTypeString()); HCatSchema tableSchema = HCatOutputFormat.getTableSchemaWithPartitionColumns(job.getConfiguration()); assertEquals("Table-schema should have exactly 2 columns.", 2, tableSchema.getFields().size());
public void testSetOutput() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test outputformat"); Map<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("colname", "p1"); //null server url means local mode OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); HCatOutputFormat.setOutput(job, info); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job.getConfiguration()); assertNotNull(jobInfo.getTableInfo()); assertEquals(1, jobInfo.getPartitionValues().size()); assertEquals("p1", jobInfo.getPartitionValues().get("colname")); assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); publishTest(job); }
/** * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hive.hcatalog.data.schema.HCatSchema) */ public static void setSchema(final Job job, final HCatSchema schema) throws IOException { setSchema(job.getConfiguration(), schema); }
/** * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) */ public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); }
harRequested = getHarRequested(hiveConf); outputJobInfo.setHarRequested(harRequested); maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
@Override public OutputFormat getOutputFormat() throws IOException { return new HCatOutputFormat(); }
protected void configureDynamicStorageHandler(JobContext context, List<String> dynamicPartVals) throws IOException { HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); } }
TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); HCatOutputFormat.setOutput(job, OutputJobInfo.create( Warehouse.DEFAULT_DATABASE_NAME, "bad_props_table", null)); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setSchema(job, getSchema()); job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true));
HCatOutputFormat.setOutput(job, outputJobInfo); } catch (HCatException he) { PigHCatUtil.PIG_EXCEPTION_CODE, he); HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration()); try { doSchemaValidations(pigSchema, hcatTblSchema); HCatOutputFormat.setSchema(job, computedSchema); udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));
@Override public void abort(WriterContext context) throws HCatException { WriterContextImpl cntxtImpl = (WriterContextImpl)context; try { new HCatOutputFormat().getOutputCommitter( ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( cntxtImpl.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( cntxtImpl.getConf(), null), State.FAILED); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
HCatOutputFormat outFormat = new HCatOutputFormat(); TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); RecordWriter<WritableComparable<?>, HCatRecord> writer; try { committer = outFormat.getOutputCommitter(cntxt); committer.setupTask(cntxt); writer = outFormat.getRecordWriter(cntxt); while (recordItr.hasNext()) { HCatRecord rec = recordItr.next();
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }