org.apache.hive.hcatalog.mapreduce.HCatSplit java code examples

private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit,
                  HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException {
 JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext);
 HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf);
 org.apache.hadoop.mapred.InputFormat inputFormat =
  HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass());
 return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf,
  InternalUtil.createReporter(taskContext));
}

  @Override
  public void readExternal(ObjectInput in) throws IOException,
    ClassNotFoundException {
    conf.readFields(in);
    int numOfSplits = in.readInt();
    for (int i = 0; i < numOfSplits; i++) {
      HCatSplit split = new HCatSplit();
      split.readFields(in);
      splits.add(split);
    }
  }
}

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split,
       TaskAttemptContext taskContext) throws IOException, InterruptedException {
 HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split);
 baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext);
 createDeserializer(hcatSplit, storageHandler, taskContext);
 // Pull the output schema out of the TaskAttemptContext
 outputSchema = (HCatSchema) HCatUtil.deserialize(
  taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA));
 if (outputSchema == null) {
  outputSchema = hcatSplit.getTableSchema();
 }
 // Pull the table schema out of the Split info
 // TODO This should be passed in the TaskAttemptContext instead
 dataSchema = hcatSplit.getDataSchema();
 errorTracker = new InputErrorTracker(taskContext.getConfiguration());
}

private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler,
        TaskAttemptContext taskContext) throws IOException {
 deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
  taskContext.getConfiguration());
 try {
  InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(),
   hcatSplit.getPartitionInfo().getTableInfo(),
   hcatSplit.getPartitionInfo().getPartitionSchema());
 } catch (SerDeException e) {
  throw new IOException("Failed initializing deserializer "
   + storageHandler.getSerDeClass().getName(), e);
 }
}

@Override
public void writeExternal(ObjectOutput out) throws IOException {
  conf.write(out);
  out.writeInt(splits.size());
  for (InputSplit split : splits) {
    ((HCatSplit) split).write(out);
  }
}

splits.add(new HCatSplit(partitionInfo, split));

  @Override
  public String getInputSplitSignature(InputSplit inputSplit) {
    FileSplit baseSplit = (FileSplit) ((HCatSplit) inputSplit).getBaseSplit();
    //file name(for intermediate table) + start pos + length
    return baseSplit.getPath().getName() + "_" + baseSplit.getStart() + "_" + baseSplit.getLength();
  }
}

PartInfo partitionInfo = hcatSplit.getPartitionInfo();

@Override
public void writeExternal(ObjectOutput out) throws IOException {
  conf.write(out);
  out.writeInt(splits.size());
  for (InputSplit split : splits) {
    ((HCatSplit) split).write(out);
  }
}

splits.add(new HCatSplit(partitionInfo, split));

  @Override
  public String getInputSplitSignature(InputSplit inputSplit) {
    FileSplit baseSplit = (FileSplit) ((HCatSplit) inputSplit).getBaseSplit();
    //file name(for intermediate table) + start pos + length
    return baseSplit.getPath().getName() + "_" + baseSplit.getStart() + "_" + baseSplit.getLength();
  }
}

private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit,
                  HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException {
 JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext);
 HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf);
 org.apache.hadoop.mapred.InputFormat inputFormat =
  HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass());
 return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf,
  InternalUtil.createReporter(taskContext));
}

  @Override
  public void readExternal(ObjectInput in) throws IOException,
    ClassNotFoundException {
    conf.readFields(in);
    int numOfSplits = in.readInt();
    for (int i = 0; i < numOfSplits; i++) {
      HCatSplit split = new HCatSplit();
      split.readFields(in);
      splits.add(split);
    }
  }
}

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split,
       TaskAttemptContext taskContext) throws IOException, InterruptedException {
 HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split);
 baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext);
 createDeserializer(hcatSplit, storageHandler, taskContext);
 // Pull the output schema out of the TaskAttemptContext
 outputSchema = (HCatSchema) HCatUtil.deserialize(
  taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA));
 if (outputSchema == null) {
  outputSchema = hcatSplit.getTableSchema();
 }
 // Pull the table schema out of the Split info
 // TODO This should be passed in the TaskAttemptContext instead
 dataSchema = hcatSplit.getDataSchema();
 errorTracker = new InputErrorTracker(taskContext.getConfiguration());
}

private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler,
        TaskAttemptContext taskContext) throws IOException {
 deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
  taskContext.getConfiguration());
 try {
  InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(),
   hcatSplit.getPartitionInfo().getTableInfo(),
   hcatSplit.getPartitionInfo().getPartitionSchema());
 } catch (SerDeException e) {
  throw new IOException("Failed initializing deserializer "
   + storageHandler.getSerDeClass().getName(), e);
 }
}

@Override
public void writeExternal(ObjectOutput out) throws IOException {
  conf.write(out);
  out.writeInt(splits.size());
  for (InputSplit split : splits) {
    ((HCatSplit) split).write(out);
  }
}

splits.add(new HCatSplit(partitionInfo, split));

private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit,
                  HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException {
 JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext);
 HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf);
 org.apache.hadoop.mapred.InputFormat inputFormat =
  HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass());
 return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf,
  InternalUtil.createReporter(taskContext));
}

  @Override
  public void readExternal(ObjectInput in) throws IOException,
    ClassNotFoundException {
    conf.readFields(in);
    int numOfSplits = in.readInt();
    for (int i = 0; i < numOfSplits; i++) {
      HCatSplit split = new HCatSplit();
      split.readFields(in);
      splits.add(split);
    }
  }
}

@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit split,
       TaskAttemptContext taskContext) throws IOException, InterruptedException {
 HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split);
 baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext);
 createDeserializer(hcatSplit, storageHandler, taskContext);
 // Pull the output schema out of the TaskAttemptContext
 outputSchema = (HCatSchema) HCatUtil.deserialize(
  taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA));
 if (outputSchema == null) {
  outputSchema = hcatSplit.getTableSchema();
 }
 // Pull the table schema out of the Split info
 // TODO This should be passed in the TaskAttemptContext instead
 dataSchema = hcatSplit.getDataSchema();
 errorTracker = new InputErrorTracker(taskContext.getConfiguration());
}

Javadoc

The HCatSplit wrapper around the InputSplit returned by the underlying InputFormat

Most used methods

getBaseSplit
Gets the underlying InputSplit.
<init>
Instantiates a new hcat split.
getDataSchema
Gets the data schema.
getPartitionInfo
Gets the partition info.
getTableSchema
Gets the table schema.
readFields
write

Popular in Java

Updating database using SQL prepared statement
getExternalFilesDir (Context)
setRequestProperty (URLConnection)
startActivity (Activity)
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top Sublime Text plugins

How to useHCatSplit in org.apache.hive.hcatalog.mapreduce

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.HCatSplit (Showing top 20 results out of 315)

How to use
HCatSplit
in
org.apache.hive.hcatalog.mapreduce