/** * Instantiates a new hcat partition info. * @param partitionSchema the partition schema * @param storageHandler the storage handler * @param location the location * @param hcatProperties hcat-specific properties at the partition * @param jobProperties the job properties * @param tableInfo the table information */ public PartInfo(HCatSchema partitionSchema, HiveStorageHandler storageHandler, String location, Properties hcatProperties, Map<String, String> jobProperties, HCatTableInfo tableInfo) { this.partitionSchema = partitionSchema; this.location = location; this.hcatProperties = hcatProperties; this.jobProperties = jobProperties; this.tableInfo = tableInfo; this.storageHandlerClassName = storageHandler.getClass().getName(); this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); this.serdeClassName = storageHandler.getSerDeClass().getName(); this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); }
/** * Setter for StorageHandler class. */ public HCatTable storageHandler(String storageHandler) throws HCatException { this.tblProps.put( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, storageHandler); LOG.warn("HiveStorageHandlers can't be reliably instantiated on the client-side. " + "Attempting to derive Input/OutputFormat settings from StorageHandler, on best effort: "); try { HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); this.sd.setInputFormat(sh.getInputFormatClass().getName()); this.sd.setOutputFormat(sh.getOutputFormatClass().getName()); this.sd.getSerdeInfo().setSerializationLib(sh.getSerDeClass().getName()); } catch (HiveException e) { LOG.warn("Could not derive Input/OutputFormat and SerDe settings from storageHandler. " + "These values need to be set explicitly.", e); } return this; }
final public Class<? extends InputFormat> getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); if (className == null) { if (getStorageHandler() == null) { return null; } inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return inputFormatClass; }
final public Class<? extends InputFormat> getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); if (className == null) { if (getStorageHandler() == null) { return null; } inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return inputFormatClass; }
TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), IgnoreKeyTextOutputFormat.class,props); if (tableDesc.getJobProperties() == null)
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
Class inputFormatClass = storageHandler.getInputFormatClass(); org.apache.hadoop.mapred.InputFormat inputFormat = getMapRedInputFormat(jobConf, inputFormatClass);
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
/** * Utility method which gets table or partition {@link InputFormat} class. First it * tries to get the class name from given StorageDescriptor object. If it doesn't contain it tries to get it from * StorageHandler class set in table properties. If not found throws an exception. * @param job {@link JobConf} instance needed incase the table is StorageHandler based table. * @param sd {@link StorageDescriptor} instance of currently reading partition or table (for non-partitioned tables). * @param table Table object */ public static Class<? extends InputFormat<?, ?>> getInputFormatClass(final JobConf job, final StorageDescriptor sd, final Table table) throws Exception { final String inputFormatName = sd.getInputFormat(); if (Strings.isNullOrEmpty(inputFormatName)) { final String storageHandlerClass = table.getParameters().get(META_TABLE_STORAGE); if (Strings.isNullOrEmpty(storageHandlerClass)) { throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There is neither " + "InputFormat class explicitly specified nor StorageHandler class"); } final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, storageHandlerClass); TableDesc tableDesc = new TableDesc(); tableDesc.setProperties(MetaStoreUtils.getTableMetadata(table)); storageHandler.configureInputJobProperties(tableDesc, table.getParameters()); return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass(); } else { return (Class<? extends InputFormat<?, ?>>) Class.forName(inputFormatName); } }
in_class = storageHandler.getInputFormatClass(); } else { in_class = JavaUtils.loadClass(crtViewDesc.getInputFormat());
in_class = storageHandler.getInputFormatClass(); } else { in_class = JavaUtils.loadClass(crtTblDesc.getInputFormat());
in_class = storageHandler.getInputFormatClass(); } else { in_class = JavaUtils.loadClass(crtTblDesc.getInputFormat());
/** * Setter for StorageHandler class. */ public HCatTable storageHandler(String storageHandler) throws HCatException { this.tblProps.put( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, storageHandler); LOG.warn("HiveStorageHandlers can't be reliably instantiated on the client-side. " + "Attempting to derive Input/OutputFormat settings from StorageHandler, on best effort: "); try { HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); this.sd.setInputFormat(sh.getInputFormatClass().getName()); this.sd.setOutputFormat(sh.getOutputFormatClass().getName()); this.sd.getSerdeInfo().setSerializationLib(sh.getSerDeClass().getName()); } catch (HiveException e) { LOG.warn("Could not derive Input/OutputFormat and SerDe settings from storageHandler. " + "These values need to be set explicitly.", e); } return this; }
/** * Setter for StorageHandler class. */ public HCatTable storageHandler(String storageHandler) throws HCatException { this.tblProps.put( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, storageHandler); LOG.warn("HiveStorageHandlers can't be reliably instantiated on the client-side. " + "Attempting to derive Input/OutputFormat settings from StorageHandler, on best effort: "); try { HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); this.sd.setInputFormat(sh.getInputFormatClass().getName()); this.sd.setOutputFormat(sh.getOutputFormatClass().getName()); this.sd.getSerdeInfo().setSerializationLib(sh.getSerDeClass().getName()); } catch (HiveException e) { LOG.warn("Could not derive Input/OutputFormat and SerDe settings from storageHandler. " + "These values need to be set explicitly.", e); } return this; }
final public Class<? extends InputFormat> getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); if (className == null) { if (getStorageHandler() == null) { return null; } inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) Class.forName(className, true, JavaUtils.getClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return inputFormatClass; }
public static Class<? extends InputFormat<?, ?>> getInputFormatClass(final JobConf job, final Table table, final Partition partition) throws Exception { if(partition != null){ if(partition.getSd().getInputFormat() != null){ return (Class<? extends InputFormat<?, ?>>) Class.forName(partition.getSd().getInputFormat()); } if(partition.getParameters().get(META_TABLE_STORAGE) != null){ final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, partition.getParameters().get(META_TABLE_STORAGE)); return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass(); } } if(table.getSd().getInputFormat() != null){ return (Class<? extends InputFormat<?, ?>>) Class.forName(table.getSd().getInputFormat()); } if(table.getParameters().get(META_TABLE_STORAGE) != null){ final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, table.getParameters().get(META_TABLE_STORAGE)); return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass(); } throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There is neither " + "InputFormat class explicitly specified nor a StorageHandler class provided."); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }