public static boolean isInputFileFormatSelfDescribing(PartitionDesc pd) { Class<?> inputFormatClass = pd.getInputFileFormatClass(); return SelfDescribingInputFormatInterface.class.isAssignableFrom(inputFormatClass); }
private boolean checkInputsVectorized(MapWork mapWork) { boolean mayWrap = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED); Collection<Class<?>> excludedInputFormats = Utilities.getClassNamesFromConfig(conf, ConfVars.HIVE_VECTORIZATION_VECTORIZED_INPUT_FILE_FORMAT_EXCLUDES); for (PartitionDesc pd : mapWork.getPathToPartitionInfo().values()) { if ((Utilities.isInputFileFormatVectorized(pd) && !excludedInputFormats .contains(pd.getInputFileFormatClass())) || (mayWrap && HiveInputFormat .canWrapForLlap(pd.getInputFileFormatClass(), true))) { continue; } LOG.info("Input format: " + pd.getInputFileFormatClassName() + ", doesn't provide vectorized input"); return false; } return true; }
public static boolean isInputFileFormatVectorized(PartitionDesc pd) { Class<?> inputFormatClass = pd.getInputFileFormatClass(); return VectorizedInputFormatInterface.class.isAssignableFrom(inputFormatClass); }
private boolean checkInputsVectorized(MapWork mapWork) { boolean mayWrap = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_NONVECTOR_WRAPPER_ENABLED); for (PartitionDesc pd : mapWork.getPathToPartitionInfo().values()) { if (Utilities.isInputFileFormatVectorized(pd) || (mayWrap && HiveInputFormat.canWrapForLlap(pd.getInputFileFormatClass(), true))) { continue; } LOG.info("Input format: " + pd.getInputFileFormatClassName() + ", doesn't provide vectorized input"); return false; } return true; }
public static boolean isInputFileFormatSelfDescribing(PartitionDesc pd) { Class<?> inputFormatClass = pd.getInputFileFormatClass(); return SelfDescribingInputFormatInterface.class.isAssignableFrom(inputFormatClass); }
public static boolean isInputFileFormatVectorized(PartitionDesc pd) { Class<?> inputFormatClass = pd.getInputFileFormatClass(); return VectorizedInputFormatInterface.class.isAssignableFrom(inputFormatClass); }
@Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); }
@Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public String getInputFileFormatClassName() { return getInputFileFormatClass().getName(); }
private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles, MapWork work) throws IOException { boolean retval = false; Path path = ((FileSplit) s).getPath(); PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively( work.getPathToPartitionInfo(), path, cache); String currentDeserializerClass = pd.getDeserializerClassName(); Class<?> currentInputFormatClass = pd.getInputFileFormatClass(); Class<?> previousInputFormatClass = null; String previousDeserializerClass = null; if (prevSplit != null) { Path prevPath = ((FileSplit) prevSplit).getPath(); if (!groupAcrossFiles) { return !path.equals(prevPath); } PartitionDesc prevPD = HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), prevPath, cache); previousDeserializerClass = prevPD.getDeserializerClassName(); previousInputFormatClass = prevPD.getInputFileFormatClass(); } if ((currentInputFormatClass != previousInputFormatClass) || (!currentDeserializerClass.equals(previousDeserializerClass))) { retval = true; } if (LOG.isDebugEnabled()) { LOG.debug("Adding split " + path + " to src new group? " + retval); } return retval; }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, PartitionDesc partDesc, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); LOG.info("Changed input file {} to empty file {} ({})", strPath, newPath, oneRow); return newPath; }
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim, Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException { this.inputSplitShim = inputSplitShim; this.pathToPartitionInfo = pathToPartitionInfo; if (job != null) { if (this.pathToPartitionInfo == null) { this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = HiveFileFormatUtils .getFromPathRecursively(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } } }
@Override public int hashCode() { final int prime = 31; int result = 1; result = result * prime + (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode()); result = result * prime + (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode()); result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode()); result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode()); result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode()); result = result * prime + (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode()); return result; }
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
for (PartitionDesc part : pathToPartitionInfo.values()) { boolean isUsingLlapIo = canWrapAny && HiveInputFormat.canWrapForLlap( part.getInputFileFormatClass(), doCheckIfs); if (isUsingLlapIo) { if (part.getTableDesc() != null && hasLlap = true; } else if (isLlapOn && HiveInputFormat.canInjectCaches(part.getInputFileFormatClass())) { hasCacheOnly = true; } else {
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim, Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException { this.inputSplitShim = inputSplitShim; this.pathToPartitionInfo = pathToPartitionInfo; if (job != null) { if (this.pathToPartitionInfo == null) { this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = HiveFileFormatUtils .getPartitionDescFromPathRecursively(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } } }
@Override public int hashCode() { final int prime = 31; int result = 1; result = result * prime + (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode()); result = result * prime + (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode()); result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode()); result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode()); result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode()); result = result * prime + (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode()); return result; }
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); if (LOG.isInfoEnabled()) { LOG.info("Changed input file " + strPath + " to empty file " + newPath + " (" + oneRow + ")"); } // update the work work.addPathToAlias(newPath, work.getPathToAliases().get(path)); work.removePathToAlias(path); work.removePathToPartitionInfo(path); work.addPathToPartitionInfo(newPath, partDesc); return newPath; }
@Override public PartitionDesc read(Kryo kryo, Input input, Class<PartitionDesc> type) { PartitionDesc partitionDesc = super.read(kryo, input, type); // The set methods in PartitionDesc intern the any duplicate strings which is why we call them // during de-serialization partitionDesc.setBaseFileName(partitionDesc.getBaseFileName()); partitionDesc.setPartSpec(partitionDesc.getPartSpec()); partitionDesc.setInputFileFormatClass(partitionDesc.getInputFileFormatClass()); partitionDesc.setOutputFileFormatClass(partitionDesc.getOutputFileFormatClass()); return partitionDesc; } }
protected FetchInputFormatSplit[] getNextSplits() throws Exception { while (getNextPath()) { // not using FileInputFormat.setInputPaths() here because it forces a connection to the // default file system - which may or may not be online during pure metadata operations job.set("mapred.input.dir", StringUtils.escapeString(currPath.toString())); // Fetch operator is not vectorized and as such turn vectorization flag off so that // non-vectorized record reader is created below. HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); Class<? extends InputFormat> formatter = currDesc.getInputFileFormatClass(); Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); InputFormat inputFormat = getInputFormatFromCache(formatter, job); InputSplit[] splits = inputFormat.getSplits(job, 1); FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; for (int i = 0; i < splits.length; i++) { inputSplits[i] = new FetchInputFormatSplit(splits[i], inputFormat); } if (work.getSplitSample() != null) { inputSplits = splitSampling(work.getSplitSample(), inputSplits); } if (inputSplits.length > 0) { return inputSplits; } } return null; }