private String getCustomPartitionRootLocation(OutputJobInfo jobInfo, Configuration conf) { if (ptnRootLocation == null) { // we only need to calculate it once, it'll be the same for other partitions in this job. String parentPath = jobInfo.getTableInfo().getTableLocation(); if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString(); } Path ptnRoot = new Path(parentPath, DYNTEMP_DIR_NAME + conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)); ptnRootLocation = ptnRoot.toString(); } return ptnRootLocation; }
/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the record-schema for the table. It does not include the table's partition columns. * @param conf the Configuration object * @return the table schema, excluding partition columns * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchema(Configuration conf) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); return jobInfo.getTableInfo().getDataColumns(); }
/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the complete table-schema, including the record-schema *and* the partitioning schema. * @param conf the Configuration object * @return the table schema, including the record-schema and partitioning schema. * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchemaWithPartitionColumns(Configuration conf) throws IOException { return getJobInfo(conf).getTableInfo().getAllColumns(); }
static void initializeOutputSerDe(AbstractSerDe serDe, Configuration conf, OutputJobInfo jobInfo) throws SerDeException { SerDeUtils.initializeSerDe(serDe, conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema()), null); }
/** * @param context current JobContext * @param baseCommitter OutputCommitter to contain * @throws IOException */ public FileOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { super(context, baseCommitter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); this.partitionsDiscovered = !dynamicPartitioningUsed; cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); Table table = new Table(jobInfo.getTableInfo().getTable()); if (dynamicPartitioningUsed && Boolean.parseBoolean((String)table.getProperty("EXTERNAL")) && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { customDynamicLocationUsed = true; } else { customDynamicLocationUsed = false; } this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD); }
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil .deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo);
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(),jobInfo.getTableInfo().getStorerInfo());
/** * @param baseWriter RecordWriter to contain * @param context current TaskAttemptContext * @throws IOException * @throws InterruptedException */ public FileRecordWriterContainer( RecordWriter<? super WritableComparable<?>, ? super Writable> baseWriter, TaskAttemptContext context) throws IOException, InterruptedException { super(context, baseWriter); this.context = context; jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo() .getStorerInfo()); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to inialize SerDe", e); } // If partition columns occur in data, we want to remove them. partColsToDel = jobInfo.getPosOfPartCols(); if (partColsToDel == null) { throw new HCatException("It seems that setSchema() is not called on " + "HCatOutputFormat. Please make sure that method is called."); } }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
Properties props = outputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(),
Path tblPath = new Path(jobInfo.getTableInfo().getTableLocation()); if (dynamicPartitioningUsed) { if (!customDynamicLocationUsed) { src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() .getPartitionKeysSize())); } else {
HCatUtil.deserialize(tableDesc.getJobProperties().get( HCatConstants.HCAT_KEY_OUTPUT_INFO)); String parentPath = jobInfo.getTableInfo().getTableLocation(); String dynHash = tableDesc.getJobProperties().get( HCatConstants.HCAT_DYNAMIC_PTN_JOBID); jobInfo.getTableInfo(). getPartitionColumns().getFieldNames()) { String value = jobInfo.getPartitionValues().get(name); jobInfo.getTableInfo().getPartitionColumns().size()) { jobProperties.put("mapred.output.dir", jobInfo.getLocation());
Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (OrcConf property : OrcConf.values()){ String propName = property.getAttribute(); Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) { String propName = property.getPropName(); Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (String key : tableProps.keySet()) { if (ParquetTableUtils.isParquetProperty(key)) {
public void testSetOutput() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test outputformat"); Map<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("colname", "p1"); //null server url means local mode OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); HCatOutputFormat.setOutput(job, info); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job.getConfiguration()); assertNotNull(jobInfo.getTableInfo()); assertEquals(1, jobInfo.getPartitionValues().size()); assertEquals("p1", jobInfo.getPartitionValues().get("colname")); assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); publishTest(job); }
new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); jobInfo.setPosOfPartCols(posOfPartCols); jobInfo.setPosOfDynPartCols(posOfDynPartCols);
Table table = new Table(jobInfo.getTableInfo().getTable()); Path tblPath = new Path(table.getTTable().getSd().getLocation()); FileSystem fs = tblPath.getFileSystem(conf); IMetaStoreClient client = null; HCatTableInfo tableInfo = jobInfo.getTableInfo(); List<Partition> partitionsAdded = new ArrayList<Partition>(); try {
externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();
static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) throws SerDeException { SerDeUtils.initializeSerDe(serDe, conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema()), null); }