private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { conf.readFields(in); int numOfSplits = in.readInt(); for (int i = 0; i < numOfSplits; i++) { HCatSplit split = new HCatSplit(); split.readFields(in); splits.add(split); } } }
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); createDeserializer(hcatSplit, storageHandler, taskContext); // Pull the output schema out of the TaskAttemptContext outputSchema = (HCatSchema) HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); if (outputSchema == null) { outputSchema = hcatSplit.getTableSchema(); } // Pull the table schema out of the Split info // TODO This should be passed in the TaskAttemptContext instead dataSchema = hcatSplit.getDataSchema(); errorTracker = new InputErrorTracker(taskContext.getConfiguration()); }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
@Override public void writeExternal(ObjectOutput out) throws IOException { conf.write(out); out.writeInt(splits.size()); for (InputSplit split : splits) { ((HCatSplit) split).write(out); } }
splits.add(new HCatSplit(partitionInfo, split));
@Override public String getInputSplitSignature(InputSplit inputSplit) { FileSplit baseSplit = (FileSplit) ((HCatSplit) inputSplit).getBaseSplit(); //file name(for intermediate table) + start pos + length return baseSplit.getPath().getName() + "_" + baseSplit.getStart() + "_" + baseSplit.getLength(); } }
PartInfo partitionInfo = hcatSplit.getPartitionInfo();
@Override public void writeExternal(ObjectOutput out) throws IOException { conf.write(out); out.writeInt(splits.size()); for (InputSplit split : splits) { ((HCatSplit) split).write(out); } }
splits.add(new HCatSplit(partitionInfo, split));
@Override public String getInputSplitSignature(InputSplit inputSplit) { FileSplit baseSplit = (FileSplit) ((HCatSplit) inputSplit).getBaseSplit(); //file name(for intermediate table) + start pos + length return baseSplit.getPath().getName() + "_" + baseSplit.getStart() + "_" + baseSplit.getLength(); } }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { conf.readFields(in); int numOfSplits = in.readInt(); for (int i = 0; i < numOfSplits; i++) { HCatSplit split = new HCatSplit(); split.readFields(in); splits.add(split); } } }
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); createDeserializer(hcatSplit, storageHandler, taskContext); // Pull the output schema out of the TaskAttemptContext outputSchema = (HCatSchema) HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); if (outputSchema == null) { outputSchema = hcatSplit.getTableSchema(); } // Pull the table schema out of the Split info // TODO This should be passed in the TaskAttemptContext instead dataSchema = hcatSplit.getDataSchema(); errorTracker = new InputErrorTracker(taskContext.getConfiguration()); }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
@Override public void writeExternal(ObjectOutput out) throws IOException { conf.write(out); out.writeInt(splits.size()); for (InputSplit split : splits) { ((HCatSplit) split).write(out); } }
splits.add(new HCatSplit(partitionInfo, split));
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { conf.readFields(in); int numOfSplits = in.readInt(); for (int i = 0; i < numOfSplits; i++) { HCatSplit split = new HCatSplit(); split.readFields(in); splits.add(split); } } }
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); createDeserializer(hcatSplit, storageHandler, taskContext); // Pull the output schema out of the TaskAttemptContext outputSchema = (HCatSchema) HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); if (outputSchema == null) { outputSchema = hcatSplit.getTableSchema(); } // Pull the table schema out of the Split info // TODO This should be passed in the TaskAttemptContext instead dataSchema = hcatSplit.getDataSchema(); errorTracker = new InputErrorTracker(taskContext.getConfiguration()); }