@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (isTaskSideMetaData(job)) { return super.getSplits(job, numSplits); } List<Footer> footers = getFooters(job); List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers); if (splits == null) { return null; } InputSplit[] resultSplits = new InputSplit[splits.size()]; int i = 0; for (ParquetInputSplit split : splits) { resultSplits[i++] = new ParquetInputSplitWrapper(split); } return resultSplits; }
/** * {@inheritDoc} */ @Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException { Configuration configuration = ContextUtil.getConfiguration(jobContext); List<InputSplit> splits = new ArrayList<InputSplit>(); if (isTaskSideMetaData(configuration)) { // Although not required by the API, some clients may depend on always // receiving ParquetInputSplit. Translation is required at some point. for (InputSplit split : super.getSplits(jobContext)) { Preconditions.checkArgument(split instanceof FileSplit, "Cannot wrap non-FileSplit: " + split); splits.add(ParquetInputSplit.from((FileSplit) split)); } return splits; } else { splits.addAll(getSplits(configuration, getFooters(jobContext))); } return splits; }
@Override public ResourceStatistics getStatistics(String location, Job job) throws IOException { if (DEBUG) LOG.debug("LoadMetadata.getStatistics(" + location + ", " + job + ")"); /* We need to call setInput since setLocation is not guaranteed to be called before this */ setInput(location, job); long length = 0; try { for (InputSplit split : getParquetInputFormat().getSplits(job)) { length += split.getLength(); } } catch (InterruptedException e) { LOG.warn("Interrupted: ", e); return null; } ResourceStatistics stats = new ResourceStatistics(); // TODO use pig-0.12 setBytes api when its available stats.setmBytes(length / 1024 / 1024); return stats; }