org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively java code examples

public static PartitionDesc getPartitionDescFromPathRecursively(
  Map<Path, PartitionDesc> pathToPartitionInfo, Path dir,
  Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap, boolean ignoreSchema)
    throws IOException {
 PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir);
 if (part == null
   && (ignoreSchema
     || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim().equals(""))
     || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) {
  Map<Path, PartitionDesc> newPathToPartitionInfo = null;
  if (cacheMap != null) {
   newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
  }
  if (newPathToPartitionInfo == null) { // still null
   newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo);
   if (cacheMap != null) {
    cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
   }
  }
  part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir);
 }
 if (part != null) {
  return part;
 } else {
  throw new IOException("cannot find dir = " + dir.toString()
            + " in pathToPartitionInfo: " + pathToPartitionInfo.keySet());
 }
}

private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException {
 PartitionDesc part = null;
 Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>();
 for (Path path : hsplit.getPaths()) {
  PartitionDesc otherPart = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
    pathToPartInfo, path, cache);
  LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo);
  if (part == null) {
   part = otherPart;
  } else if (otherPart != part) { // Assume we should have the exact same object.
   // TODO: we could also compare the schema and SerDe, and pass only those to the call
   //       instead; most of the time these would be the same and LLAP IO can handle that.
   LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {"
     + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}");
   return null;
  }
 }
 return part;
}

private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles,
                   MapWork work) throws IOException {
 boolean retval = false;
 Path path = ((FileSplit) s).getPath();
 PartitionDesc pd = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
   work.getPathToPartitionInfo(), path, cache);
 String currentDeserializerClass = pd.getDeserializerClassName();
 Class<?> currentInputFormatClass = pd.getInputFileFormatClass();
 Class<?> previousInputFormatClass = null;
 String previousDeserializerClass = null;
 if (prevSplit != null) {
  Path prevPath = ((FileSplit) prevSplit).getPath();
  if (!groupAcrossFiles) {
   return !path.equals(prevPath);
  }
  PartitionDesc prevPD =
    HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
      prevPath, cache);
  previousDeserializerClass = prevPD.getDeserializerClassName();
  previousInputFormatClass = prevPD.getInputFileFormatClass();
 }
 if ((currentInputFormatClass != previousInputFormatClass)
   || (!currentDeserializerClass.equals(previousDeserializerClass))) {
  retval = true;
 }
 if (LOG.isDebugEnabled()) {
  LOG.debug("Adding split " + path + " to src new group? " + retval);
 }
 return retval;
}

public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
 super.init(job);
 Path[] dirs = FileInputFormat.getInputPaths(job);
 if (dirs.length == 0) {
  throw new IOException("No input paths specified in job");
 }
 JobConf newjob = new JobConf(job);
 ArrayList<InputSplit> result = new ArrayList<InputSplit>();
 // for each dir, get the InputFormat, and do getSplits.
 PartitionDesc part;
 for (Path dir : dirs) {
  part = HiveFileFormatUtils
    .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
      IOPrepareCache.get().allocatePartitionDescMap(), true);
  // create a new InputFormat instance if this is the first time to see this
  // class
  Class inputFormatClass = part.getInputFileFormatClass();
  InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
  Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
  FileInputFormat.setInputPaths(newjob, dir);
  newjob.setInputFormat(inputFormat.getClass());
  InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
  for (InputSplit is : iss) {
   result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
 }
 return result.toArray(new HiveInputSplit[result.size()]);
}

public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx,
  MapWork mapWork, FileSplit split, Object[] partitionValues)
  throws IOException {
 Map<Path, PartitionDesc> pathToPartitionInfo = mapWork.getPathToPartitionInfo();
 PartitionDesc partDesc = HiveFileFormatUtils
   .getPartitionDescFromPathRecursively(pathToPartitionInfo,
     split.getPath(), IOPrepareCache.get().getPartitionDescMap());
 getPartitionValues(vrbCtx, partDesc, partitionValues);
}

public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
  Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException {
 this.inputSplitShim = inputSplitShim;
 this.pathToPartitionInfo = pathToPartitionInfo;
 if (job != null) {
  if (this.pathToPartitionInfo == null) {
   this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
  }
  // extract all the inputFormatClass names for each chunk in the
  // CombinedSplit.
  Path[] ipaths = inputSplitShim.getPaths();
  if (ipaths.length > 0) {
   PartitionDesc part = HiveFileFormatUtils
     .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
       ipaths[0], IOPrepareCache.get().getPartitionDescMap());
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
 }
}

 /**
  * Writable interface.
  */
 @Override
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   if (pathToPartitionInfo == null) {
    pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
   }
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
  pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap());
TableDesc tableDesc = part.getTableDesc();

PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
  pathToPartitionInfo, hsplit.getPath(), null);
if (LOG.isDebugEnabled()) {

   .getPartitionDescFromPathRecursively(pathToPartitionInfo,
     filePath, IOPrepareCache.get().getPartitionDescMap());
} catch (AssertionError ae) {

public static PartitionDesc getPartitionDescFromPathRecursively(
  Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
  Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap,
  boolean ignoreSchema) throws IOException {
 PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir);
 if (part == null
   && (ignoreSchema || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim()
     .equals("")))) {
  Map<String, PartitionDesc> newPathToPartitionInfo = null;
  if (cacheMap != null) {
   newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
  }
  if (newPathToPartitionInfo == null) { // still null
   newPathToPartitionInfo = new HashMap<String, PartitionDesc>();
   populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo);
   if (cacheMap != null) {
    cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
   }
  }
  part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir);
 }
 if (part != null) {
  return part;
 } else {
  throw new IOException("cannot find dir = " + dir.toString()
            + " in pathToPartitionInfo: " + pathToPartitionInfo.keySet());
 }
}

public static PartitionDesc getPartitionDescFromPathRecursively(
  Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
  Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap,
  boolean ignoreSchema) throws IOException {
 PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir);
 if (part == null
   && (ignoreSchema
     || (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim()
     .equals(""))
     || pathsContainNoScheme(pathToPartitionInfo)
     )
   ) {
  Map<String, PartitionDesc> newPathToPartitionInfo = null;
  if (cacheMap != null) {
   newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
  }
  if (newPathToPartitionInfo == null) { // still null
   newPathToPartitionInfo = new HashMap<String, PartitionDesc>();
   populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo);
   if (cacheMap != null) {
    cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
   }
  }
  part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir);

public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
 super.init(job);
 Path[] dirs = FileInputFormat.getInputPaths(job);
 if (dirs.length == 0) {
  throw new IOException("No input paths specified in job");
 }
 JobConf newjob = new JobConf(job);
 ArrayList<InputSplit> result = new ArrayList<InputSplit>();
 // for each dir, get the InputFormat, and do getSplits.
 for (Path dir : dirs) {
  PartitionDesc part = HiveFileFormatUtils
    .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
      IOPrepareCache.get().allocatePartitionDescMap(), true);
  // create a new InputFormat instance if this is the first time to see this
  // class
  Class inputFormatClass = part.getInputFileFormatClass();
  InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
  Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
  FileInputFormat.setInputPaths(newjob, dir);
  newjob.setInputFormat(inputFormat.getClass());
  InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
  for (InputSplit is : iss) {
   result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
 }
 return result.toArray(new HiveInputSplit[result.size()]);
}

public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
 super.init(job);
 Path[] dirs = FileInputFormat.getInputPaths(job);
 if (dirs.length == 0) {
  throw new IOException("No input paths specified in job");
 }
 JobConf newjob = new JobConf(job);
 ArrayList<InputSplit> result = new ArrayList<InputSplit>();
 // for each dir, get the InputFormat, and do getSplits.
 PartitionDesc part;
 for (Path dir : dirs) {
  part = HiveFileFormatUtils
    .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
      IOPrepareCache.get().allocatePartitionDescMap(), true);
  // create a new InputFormat instance if this is the first time to see this
  // class
  Class inputFormatClass = part.getInputFileFormatClass();
  InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
  Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
  FileInputFormat.setInputPaths(newjob, dir);
  newjob.setInputFormat(inputFormat.getClass());
  InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
  for (InputSplit is : iss) {
   result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
 }
 return result.toArray(new HiveInputSplit[result.size()]);
}

public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim)
  throws IOException {
 this.inputSplitShim = inputSplitShim;
 if (job != null) {
  Map<String, PartitionDesc> pathToPartitionInfo = Utilities
    .getMapRedWork(job).getPathToPartitionInfo();
  // extract all the inputFormatClass names for each chunk in the
  // CombinedSplit.
  Path[] ipaths = inputSplitShim.getPaths();
  if (ipaths.length > 0) {
   PartitionDesc part = HiveFileFormatUtils
     .getPartitionDescFromPathRecursively(pathToPartitionInfo,
       ipaths[0], IOPrepareCache.get().getPartitionDescMap());
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
 }
}

public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
  Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
 this.inputSplitShim = inputSplitShim;
 this.pathToPartitionInfo = pathToPartitionInfo;
 if (job != null) {
  if (this.pathToPartitionInfo == null) {
   this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
  }
  // extract all the inputFormatClass names for each chunk in the
  // CombinedSplit.
  Path[] ipaths = inputSplitShim.getPaths();
  if (ipaths.length > 0) {
   PartitionDesc part = HiveFileFormatUtils
     .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
       ipaths[0], IOPrepareCache.get().getPartitionDescMap());
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
 }
}

 /**
  * Writable interface.
  */
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   Map<String, PartitionDesc> pathToPartitionInfo = Utilities
     .getMapRedWork(getJob()).getPathToPartitionInfo();
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

 /**
  * Writable interface.
  */
 @Override
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   if (pathToPartitionInfo == null) {
    pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
   }
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

.getPartitionDescFromPathRecursively(pathToPartitionInfo,
  split.getPath(), IOPrepareCache.get().getPartitionDescMap());

Path path = ((FileSplit) s).getPath();
PartitionDesc pd =
  HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
    path, cache);
String currentDeserializerClass = pd.getDeserializerClassName();
   HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(),
     prevPath, cache);
 previousDeserializerClass = prevPD.getDeserializerClassName();

Popular methods of HiveFileFormatUtils

checkInputFormat
checks if files are in same format as the given input format.
checkTextInputFormat
doGetAliasesFromPath
Get the list of aliases from the opeerator tree that are needed for the path
foundAlias
getHiveRecordWriter
getMatchingPath
getOutputFormatSubstitute
get a OutputFormat's substitute HiveOutputFormat.
getRecordWriter
doGetPartitionDescFromPath
doGetWorksFromPath
Get the list of operators from the operator tree that are needed for the path
getAcidRecordUpdater
getFromPathRecursively

Popular in Java

Making http requests using okhttp
onCreateOptionsMenu (Activity)
startActivity (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
ImageIO (javax.imageio)
JPanel (javax.swing)
From CI to AI: The AI layer in your organization

How to use getPartitionDescFromPathRecursivelymethodin org.apache.hadoop.hive.ql.io.HiveFileFormatUtils

Best Java code snippets using org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.getPartitionDescFromPathRecursively (Showing top 20 results out of 315)

How to use
getPartitionDescFromPathRecursively
method
in
org.apache.hadoop.hive.ql.io.HiveFileFormatUtils