if (tbd.getPartitionSpec() == null || tbd.getPartitionSpec().isEmpty()) { flag = HiveFileFormatUtils.checkInputFormat( srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); } else { flag = HiveFileFormatUtils.checkInputFormat( srcFs, conf, tbd.getTable().getInputFileFormatClass(), files); } else { flag = HiveFileFormatUtils.checkInputFormat( srcFs, conf, oldPart.getInputFormatClass(), files);
/** * Get the list of operators from the operator tree that are needed for the path * @param pathToAliases mapping from path to aliases * @param aliasToWork The operator tree to be invoked for a given alias * @param dir The path to look for **/ public static List<Operator<? extends OperatorDesc>> doGetWorksFromPath( Map<Path, ArrayList<String>> pathToAliases, Map<String, Operator<? extends OperatorDesc>> aliasToWork, Path dir) { List<Operator<? extends OperatorDesc>> opList = new ArrayList<Operator<? extends OperatorDesc>>(); List<String> aliases = doGetAliasesFromPath(pathToAliases, dir); for (String alias : aliases) { opList.add(aliasToWork.get(alias)); } return opList; }
private static Path getMatchingPath(Map<Path, ArrayList<String>> pathToAliases, Path dir) { // First find the path to be searched Path path = dir; if (foundAlias(pathToAliases, path)) { return path; } Path dirPath = Path.getPathWithoutSchemeAndAuthority(dir); if (foundAlias(pathToAliases, dirPath)) { return dirPath; } while (path!=null && dirPath!=null) { path=path.getParent(); dirPath=dirPath.getParent(); //first try full match if (foundAlias(pathToAliases, path)) { return path; } if (foundAlias(pathToAliases, dirPath)) { return dirPath; } } return null; }
public static PartitionDesc getPartitionDescFromPathRecursively( Map<Path, PartitionDesc> pathToPartitionInfo, Path dir, Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap, boolean ignoreSchema) throws IOException { PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); if (part == null && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim().equals("")) || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) { Map<Path, PartitionDesc> newPathToPartitionInfo = null; if (cacheMap != null) { newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); } if (newPathToPartitionInfo == null) { // still null newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo); if (cacheMap != null) { cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); } } part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir); } if (part != null) { return part; } else { throw new IOException("cannot find dir = " + dir.toString() + " in pathToPartitionInfo: " + pathToPartitionInfo.keySet()); } }
@SuppressWarnings("unchecked") private static boolean checkTextInputFormat(FileSystem fs, HiveConf conf, List<FileStatus> files) throws HiveException { List<FileStatus> files2 = new LinkedList<>(files); Iterator<FileStatus> iter = files2.iterator(); while (iter.hasNext()) { FileStatus file = iter.next(); if (file == null) continue; if (isPipe(fs, file)) { LOG.info("Skipping format check for " + file.getPath() + " as it is a pipe"); iter.remove(); } } if (files2.isEmpty()) return true; Set<Class<? extends InputFormat>> inputFormatter = FileChecker.getInstance().registeredTextClasses(); for (Class<? extends InputFormat> reg : inputFormatter) { boolean result = checkInputFormat(fs, conf, reg, files2); if (result) { return false; } } return true; }
HiveOutputFormat<?, ?> hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, serde.getSerializedClass(), false, tblDesc.getProperties(), tempOutPath, reporter);
public void setOutputFileFormatClass(Class<?> outputFileFormatClass) { this.outputFileFormatClass = HiveFileFormatUtils .getOutputFormatSubstitute(outputFileFormatClass); }
LOG.warn("Unable to create directory with inheritPerms: " + outPath); fsp.outWriters[filesIdx] = HiveFileFormatUtils.getHiveRecordWriter(jc, conf.getTableInfo(), outputClass, conf, outPath, reporter); fsp.updaters[filesIdx] = HiveFileFormatUtils.getAcidRecordUpdater(jc, conf.getTableInfo(), acidBucketNum, conf, fsp.outPaths[filesIdx], inspector, reporter, -1);
boolean ignoreSchema) throws IOException { PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim() .equals("")) || pathsContainNoScheme(pathToPartitionInfo) populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo); part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir);
PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); opList = HiveFileFormatUtils.doGetWorksFromPath( pathToAliases, aliasToWork, filterPath); CombinePathInputFormat combinePathInputFormat =
throws HiveException { if (files.size() > 0) { Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls); if (checkerCls == null && inputFormatCls.isAssignableFrom(TextInputFormat.class)) { return checkTextInputFormat(fs, conf, files);
PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); opList = HiveFileFormatUtils.doGetWorksFromPath( pathToAliases, aliasToWork, filterPath); CombinePathInputFormat combinePathInputFormat =
/** * Get the list of aliases from the opeerator tree that are needed for the path * @param pathToAliases mapping from path to aliases * @param dir The path to look for **/ public static List<String> doGetAliasesFromPath( Map<Path, ArrayList<String>> pathToAliases, Path dir) { if (pathToAliases == null) { return new ArrayList<String>(); } Path path = getMatchingPath(pathToAliases, dir); return pathToAliases.get(path); }
PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); opList = HiveFileFormatUtils.doGetAliasesFromPath( pathToAliases, aliasToWork, filterPath); f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName));
tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, serde .getSerializedClass(), false, tblDesc.getProperties(), tempOutPath); } else if (rw == null) {
return checkTextInputFormat(fs, conf, files);
fsp.outWriters[filesIdx] = HiveFileFormatUtils.getHiveRecordWriter( jc, conf.getTableInfo(), outputClass, conf, fsp.outPaths[filesIdx]);
public void setOutputFileFormatClass(Class<?> outputFileFormatClass) { this.outputFileFormatClass = HiveFileFormatUtils .getOutputFormatSubstitute(outputFileFormatClass); }
protected void setupWriter() throws HiveException { try { if ( tmpFile != null ) { return; } String suffix = ".tmp"; if (this.keyObject != null) { suffix = "." + this.keyObject.toString() + suffix; } parentDir = FileUtils.createLocalDirsTempFile(spillFileDirs, "hive-rowcontainer", "", true); tmpFile = File.createTempFile("RowContainer", suffix, parentDir); LOG.info("RowContainer created temp file " + tmpFile.getAbsolutePath()); // Delete the temp file if the JVM terminate normally through Hadoop job // kill command. // Caveat: it won't be deleted if JVM is killed by 'kill -9'. parentDir.deleteOnExit(); tmpFile.deleteOnExit(); // rFile = new RandomAccessFile(tmpFile, "rw"); HiveOutputFormat<?, ?> hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, serde.getSerializedClass(), false, tblDesc.getProperties(), tempOutPath, reporter); } catch (Exception e) { clearRows(); LOG.error(e.toString(), e); throw new HiveException(e); } }
public static PartitionDesc getPartitionDescFromPathRecursively( Map<String, PartitionDesc> pathToPartitionInfo, Path dir, Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap, boolean ignoreSchema) throws IOException { PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); if (part == null && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim() .equals("")))) { Map<String, PartitionDesc> newPathToPartitionInfo = null; if (cacheMap != null) { newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); } if (newPathToPartitionInfo == null) { // still null newPathToPartitionInfo = new HashMap<String, PartitionDesc>(); populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo); if (cacheMap != null) { cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); } } part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir); } if (part != null) { return part; } else { throw new IOException("cannot find dir = " + dir.toString() + " in pathToPartitionInfo: " + pathToPartitionInfo.keySet()); } }