/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration()); if (getOutputDirMarking(jobContext.getConfiguration())) { Path outputPath = new Path(jobInfo.getLocation());
/** * @param context current JobContext * @param baseCommitter OutputCommitter to contain * @throws IOException */ public FileOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { super(context, baseCommitter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); this.partitionsDiscovered = !dynamicPartitioningUsed; cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); Table table = new Table(jobInfo.getTableInfo().getTable()); if (dynamicPartitioningUsed && Boolean.parseBoolean((String)table.getProperty("EXTERNAL")) && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { customDynamicLocationUsed = true; } else { customDynamicLocationUsed = false; } this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD); }
/** * @param baseWriter RecordWriter to contain * @param context current TaskAttemptContext * @throws IOException * @throws InterruptedException */ public FileRecordWriterContainer( RecordWriter<? super WritableComparable<?>, ? super Writable> baseWriter, TaskAttemptContext context) throws IOException, InterruptedException { super(context, baseWriter); this.context = context; jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo() .getStorerInfo()); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to inialize SerDe", e); } // If partition columns occur in data, we want to remove them. partColsToDel = jobInfo.getPosOfPartCols(); if (partColsToDel == null) { throw new HCatException("It seems that setSchema() is not called on " + "HCatOutputFormat. Please make sure that method is called."); } }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
if (!partitionsDiscovered) { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration());
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext.getConfiguration()); Path tblPath = new Path(jobInfo.getTableInfo().getTableLocation()); if (dynamicPartitioningUsed) {
InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
public void testSetOutput() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test outputformat"); Map<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("colname", "p1"); //null server url means local mode OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); HCatOutputFormat.setOutput(job, info); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job.getConfiguration()); assertNotNull(jobInfo.getTableInfo()); assertEquals(1, jobInfo.getPartitionValues().size()); assertEquals("p1", jobInfo.getPartitionValues().get("colname")); assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); publishTest(job); }
discoverPartitions(context); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); Configuration conf = context.getConfiguration(); Table table = new Table(jobInfo.getTableInfo().getTable());
/** * @param context * current JobContext * @param baseCommitter * OutputCommitter to contain * @throws IOException */ public CrunchFileOutputCommitterContainer(JobContext context, OutputCommitter baseCommitter) throws IOException { super(context, baseCommitter); dynamicPartitioningUsed = HCatOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed(); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }