public static HiveOutputFormat<?, ?> getHiveOutputFormat(Configuration conf, TableDesc tableDesc) throws HiveException { return getHiveOutputFormat(conf, tableDesc.getOutputFileFormatClass()); }
public static HiveOutputFormat<?, ?> getHiveOutputFormat(Configuration conf, PartitionDesc partDesc) throws HiveException { return getHiveOutputFormat(conf, partDesc.getOutputFileFormatClass()); }
public static HiveOutputFormat<?, ?> getHiveOutputFormat(Configuration conf, PartitionDesc partDesc) throws HiveException { return getHiveOutputFormat(conf, partDesc.getOutputFileFormatClass()); }
public static HiveOutputFormat<?, ?> getHiveOutputFormat(Configuration conf, TableDesc tableDesc) throws HiveException { return getHiveOutputFormat(conf, tableDesc.getOutputFileFormatClass()); }
HiveOutputFormat<?, ?> hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc);
protected void setupWriter() throws HiveException { try { if ( tmpFile != null ) { return; } String suffix = ".tmp"; if (this.keyObject != null) { suffix = "." + this.keyObject.toString() + suffix; } parentDir = FileUtils.createLocalDirsTempFile(spillFileDirs, "hive-rowcontainer", "", true); tmpFile = File.createTempFile("RowContainer", suffix, parentDir); LOG.info("RowContainer created temp file " + tmpFile.getAbsolutePath()); // Delete the temp file if the JVM terminate normally through Hadoop job // kill command. // Caveat: it won't be deleted if JVM is killed by 'kill -9'. parentDir.deleteOnExit(); tmpFile.deleteOnExit(); // rFile = new RandomAccessFile(tmpFile, "rw"); HiveOutputFormat<?, ?> hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, serde.getSerializedClass(), false, tblDesc.getProperties(), tempOutPath, reporter); } catch (Exception e) { clearRows(); LOG.error(e.toString(), e); throw new HiveException(e); } }
public static RecordUpdater getAcidRecordUpdater(JobConf jc, TableDesc tableInfo, int bucket, FileSinkDesc conf, Path outPath, ObjectInspector inspector, Reporter reporter, int rowIdColNum) throws HiveException, IOException { HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo); AcidOutputFormat<?, ?> acidOutputFormat = null; if (hiveOutputFormat instanceof AcidOutputFormat) { acidOutputFormat = (AcidOutputFormat)hiveOutputFormat; } else { throw new HiveException("Unable to create RecordUpdater for HiveOutputFormat that does not " + "implement AcidOutputFormat"); } // TODO not 100% sure about this. This call doesn't set the compression type in the conf // file the way getHiveRecordWriter does, as ORC appears to read the value for itself. Not // sure if this is correct or not. return getRecordUpdater(jc, acidOutputFormat, bucket, inspector, tableInfo.getProperties(), outPath, reporter, rowIdColNum, conf); }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if it does not need native storage, we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false); LOG.info("Changed input file for alias {} to newPath", alias, newPath); // update the work LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(1); newList.add(alias); pathToAliases.put(newPath, newList); work.setPathToAliases(pathToAliases); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); work.addPathToPartitionInfo(newPath, pDesc); return newPath; }
public static RecordUpdater getAcidRecordUpdater(JobConf jc, TableDesc tableInfo, int bucket, FileSinkDesc conf, Path outPath, ObjectInspector inspector, Reporter reporter, int rowIdColNum) throws HiveException, IOException { HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo); AcidOutputFormat<?, ?> acidOutputFormat = null; if (hiveOutputFormat instanceof AcidOutputFormat) { acidOutputFormat = (AcidOutputFormat)hiveOutputFormat; } else { throw new HiveException("Unable to create RecordUpdater for HiveOutputFormat that does not " + "implement AcidOutputFormat"); } // TODO not 100% sure about this. This call doesn't set the compression type in the conf // file the way getHiveRecordWriter does, as ORC appears to read the value for itself. Not // sure if this is correct or not. return getRecordUpdater(jc, acidOutputFormat, bucket, inspector, tableInfo.getProperties(), outPath, reporter, rowIdColNum, conf); }
private void createHiveOutputFormat(JobConf job) throws HiveException { if (hiveOutputFormat == null) { Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job); } try { hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(job, getConf().getTableInfo()); } catch (Throwable t) { throw (t instanceof HiveException) ? (HiveException)t : new HiveException(t); } }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, PartitionDesc partDesc, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); LOG.info("Changed input file {} to empty file {} ({})", strPath, newPath, oneRow); return newPath; }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if it does not need native storage, we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false); if (LOG.isInfoEnabled()) { LOG.info("Changed input file for alias " + alias + " to " + newPath); } // update the work LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(); newList.add(alias); pathToAliases.put(newPath, newList); work.setPathToAliases(pathToAliases); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); work.addPathToPartitionInfo(newPath, pDesc); return newPath; }
public static RecordWriter getHiveRecordWriter(JobConf jc, TableDesc tableInfo, Class<? extends Writable> outputClass, FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException { HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo); try { boolean isCompressed = conf.getCompressed(); JobConf jc_output = jc; if (isCompressed) { jc_output = new JobConf(jc); String codecStr = conf.getCompressCodec(); if (codecStr != null && !codecStr.trim().equals("")) { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) JavaUtils.loadClass(codecStr); FileOutputFormat.setOutputCompressorClass(jc_output, codec); } String type = conf.getCompressType(); if (type != null && !type.trim().equals("")) { CompressionType style = CompressionType.valueOf(type); SequenceFileOutputFormat.setOutputCompressionType(jc, style); } } return getRecordWriter(jc_output, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), outPath, reporter); } catch (Exception e) { throw new HiveException(e); } }
private void createHiveOutputFormat(Configuration hconf) throws HiveException { if (hiveOutputFormat == null) { Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), hconf); } try { hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, getConf().getTableInfo()); } catch (Throwable t) { throw (t instanceof HiveException) ? (HiveException)t : new HiveException(t); } }
public static RecordWriter getHiveRecordWriter(JobConf jc, TableDesc tableInfo, Class<? extends Writable> outputClass, FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException { HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo); try { boolean isCompressed = conf.getCompressed(); JobConf jc_output = jc; if (isCompressed) { jc_output = new JobConf(jc); String codecStr = conf.getCompressCodec(); if (codecStr != null && !codecStr.trim().equals("")) { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) JavaUtils.loadClass(codecStr); FileOutputFormat.setOutputCompressorClass(jc_output, codec); } String type = conf.getCompressType(); if (type != null && !type.trim().equals("")) { CompressionType style = CompressionType.valueOf(type); SequenceFileOutputFormat.setOutputCompressionType(jc, style); } } return getRecordWriter(jc_output, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), outPath, reporter); } catch (Exception e) { throw new HiveException(e); } }
serializer.initialize(hconf, tableInfo.getProperties()); outputClass = serializer.getSerializedClass(); hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, tableInfo); } catch (SerDeException e) { throw new HiveException(e);
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); if (LOG.isInfoEnabled()) { LOG.info("Changed input file " + strPath + " to empty file " + newPath + " (" + oneRow + ")"); } // update the work work.addPathToAlias(newPath, work.getPathToAliases().get(path)); work.removePathToAlias(path); work.removePathToPartitionInfo(path); work.addPathToPartitionInfo(newPath, partDesc); return newPath; }
serializer.initialize(null, tableInfo.getProperties()); outputClass = serializer.getSerializedClass(); hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); } catch (SerDeException e) { throw new HiveException(e);
public static HiveOutputFormat<?, ?> getHiveOutputFormat(Configuration conf, PartitionDesc partDesc) throws HiveException { return getHiveOutputFormat(conf, partDesc.getOutputFileFormatClass()); }
private void createHiveOutputFormat(Configuration hconf) throws HiveException { if (hiveOutputFormat == null) { Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), hconf); } try { hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, getConf().getTableInfo()); } catch (Throwable t) { throw (t instanceof HiveException) ? (HiveException)t : new HiveException(t); } }