public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { super.init(job); Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } JobConf newjob = new JobConf(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); // for each dir, get the InputFormat, and do getSplits. PartitionDesc part; for (Path dir : dirs) { part = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); for (InputSplit is : iss) { result.add(new HiveInputSplit(is, inputFormatClass.getName())); } } return result.toArray(new HiveInputSplit[result.size()]); }
pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) {
pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) {
IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret); IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part3 not found.", partDesc_3, ret); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); } catch (IOException e) { exception = true; IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret); IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret); IOPrepareCache.get().allocatePartitionDescMap()); IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part5 not found.", partDesc_5, ret); IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part5 not found.", partDesc_5, ret); IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("hdfs:///tbl/par1/part2/part5 not found", partDesc_5, ret);
public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { super.init(job); Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } JobConf newjob = new JobConf(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); // for each dir, get the InputFormat, and do getSplits. for (Path dir : dirs) { PartitionDesc part = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); for (InputSplit is : iss) { result.add(new HiveInputSplit(is, inputFormatClass.getName())); } } return result.toArray(new HiveInputSplit[result.size()]); }
public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { super.init(job); Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } JobConf newjob = new JobConf(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); // for each dir, get the InputFormat, and do getSplits. PartitionDesc part; for (Path dir : dirs) { part = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); for (InputSplit is : iss) { result.add(new HiveInputSplit(is, inputFormatClass.getName())); } } return result.toArray(new HiveInputSplit[result.size()]); }
pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) {
pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) {