private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
.deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(),jobInfo.getTableInfo().getStorerInfo());
/** * @param context current JobContext * @param baseCommitter OutputCommitter to contain * @throws IOException */ public FileOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { super(context, baseCommitter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); this.partitionsDiscovered = !dynamicPartitioningUsed; cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); Table table = new Table(jobInfo.getTableInfo().getTable()); if (dynamicPartitioningUsed && Boolean.parseBoolean((String)table.getProperty("EXTERNAL")) && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { customDynamicLocationUsed = true; } else { customDynamicLocationUsed = false; } this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD); }
/** * Create an instance of a storage handler defined in storerInfo. If one cannot be found * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe * @return storageHandler instance * @throws IOException */ public static HiveStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { return getStorageHandler(conf, storerInfo.getStorageHandlerClass(), storerInfo.getSerdeClass(), storerInfo.getIfClass(), storerInfo.getOfClass()); }
/** * @param baseWriter RecordWriter to contain * @param context current TaskAttemptContext * @throws IOException * @throws InterruptedException */ public FileRecordWriterContainer( RecordWriter<? super WritableComparable<?>, ? super Writable> baseWriter, TaskAttemptContext context) throws IOException, InterruptedException { super(context, baseWriter); this.context = context; jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo() .getStorerInfo()); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to inialize SerDe", e); } // If partition columns occur in data, we want to remove them. partColsToDel = jobInfo.getPosOfPartCols(); if (partColsToDel == null) { throw new HCatException("It seems that setSchema() is not called on " + "HCatOutputFormat. Please make sure that method is called."); } }
HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); storageHandler = HCatUtil.getStorageHandler( jobConf, partitionInfo);
try { HiveStorageHandler storageHandlerInst = HCatUtil .getStorageHandler(context.getConf(), desc.getStorageHandler(), desc.getSerName(),
Configuration conf = jobContext.getConfiguration(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( conf, partitionInfo);
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
/** * Create an instance of a storage handler defined in storerInfo. If one cannot be found * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe * @return storageHandler instance * @throws IOException */ public static HiveStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { return getStorageHandler(conf, storerInfo.getStorageHandlerClass(), storerInfo.getSerdeClass(), storerInfo.getIfClass(), storerInfo.getOfClass()); }