org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput java code examples

/**
 * Initializes the input with a null filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Configuration conf, String dbName, String tableName)
 throws IOException {
 return setInput(conf, dbName, tableName, null);
}

/**
 * Initializes the input with a null filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Job job, String dbName, String tableName)
 throws IOException {
 return setInput(job.getConfiguration(), dbName, tableName, null);
}

/**
 * Initializes the input with a provided filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Job job, String dbName, String tableName, String filter)
 throws IOException {
 return setInput(job.getConfiguration(), dbName, tableName, filter);
}

@Override
public void configureJob(Job job) {
  try {
    job.getConfiguration().addResource("hive-site.xml");
    HCatInputFormat.setInput(job, dbName, tableName);
    job.setInputFormatClass(HCatInputFormat.class);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

protected HCatSchema getTableSchema() throws Exception {
 Configuration conf = new Configuration();
 Job job = new Job(conf, "hcat mapreduce read schema test");
 job.setJarByClass(this.getClass());
 // input/output settings
 job.setInputFormatClass(HCatInputFormat.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 HCatInputFormat.setInput(job, dbName, tableName);
 return HCatInputFormat.getTableSchema(job.getConfiguration());
}

/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
  super();
  this.configuration = config;
  HadoopUtils.mergeHadoopConf(this.configuration);
  this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
  this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);
  // configure output schema of HCatFormat
  configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
  // set type information
  this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}

private boolean runJob(float badRecordThreshold) throws Exception {
 Configuration conf = new Configuration();
 conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold);
 Job job = new Job(conf);
 job.setJarByClass(this.getClass());
 job.setMapperClass(MyMapper.class);
 job.setInputFormatClass(HCatInputFormat.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 HCatInputFormat.setInput(job, "default", "test_bad_records");
 job.setMapOutputKeyClass(HCatRecord.class);
 job.setMapOutputValueClass(HCatRecord.class);
 job.setNumReduceTasks(0);
 Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output");
 if (path.getFileSystem(conf).exists(path)) {
  path.getFileSystem(conf).delete(path, true);
 }
 TextOutputFormat.setOutputPath(job, path);
 return job.waitForCompletion(true);
}

job.setOutputFormatClass(TextOutputFormat.class);
HCatInputFormat.setInput(job, dbName, tableName, filter);

HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString());

@Override
public ReaderContext prepareRead() throws HCatException {
 try {
  Job job = new Job(conf);
  HCatInputFormat hcif = HCatInputFormat.setInput(
   job, re.getDbName(), re.getTableName(), re.getFilterString());
  ReaderContextImpl cntxt = new ReaderContextImpl();
  cntxt.setInputSplits(hcif.getSplits(
    ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null)));
  cntxt.setConf(job.getConfiguration());
  return cntxt;
 } catch (IOException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 } catch (InterruptedException e) {
  throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
 }
}

@Test
public void testGetPartitionAndDataColumns() throws Exception {
 Configuration conf = new Configuration();
 Job myJob = new Job(conf, "hcatTest");
 HCatInputFormat.setInput(myJob, "default", "testHCIFMethods");
 HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration());
 Assert.assertTrue(cols.getFields() != null);
 Assert.assertEquals(cols.getFields().size(), 2);
 Assert.assertTrue(cols.getFields().get(0).getName().equals("a"));
 Assert.assertTrue(cols.getFields().get(1).getName().equals("b"));
 Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
 Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT));
 HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration());
 Assert.assertTrue(pcols.getFields() != null);
 Assert.assertEquals(pcols.getFields().size(), 2);
 Assert.assertTrue(pcols.getFields().get(0).getName().equals("x"));
 Assert.assertTrue(pcols.getFields().get(1).getName().equals("y"));
 Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
 Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING));
}

private void setupMapper() throws IOException {
  String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME);
  String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName);
  log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]);
  HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);
  job.setInputFormatClass(HCatInputFormat.class);
  job.setMapperClass(IIDistinctColumnsMapper.class);
  job.setCombinerClass(IIDistinctColumnsCombiner.class);
  job.setMapOutputKeyClass(ShortWritable.class);
  job.setMapOutputValueClass(Text.class);
}

  private void setupMapper(String intermediateTable) throws IOException {
//        FileInputFormat.setInputPaths(job, input);

    String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
    HCatInputFormat.setInput(job, dbTableNames[0],
        dbTableNames[1]);
    
    job.setInputFormatClass(HCatInputFormat.class);
    job.setMapperClass(FactDistinctColumnsMapper.class);
    job.setCombinerClass(FactDistinctColumnsCombiner.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(Text.class);
  }

private void setupMapper(String intermediateTable) throws IOException {
  String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
  HCatInputFormat.setInput(job, dbTableNames[0],
      dbTableNames[1]);
  
  job.setInputFormatClass(HCatInputFormat.class);
  job.setMapperClass(InvertedIndexMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(ImmutableBytesWritable.class);
  job.setPartitionerClass(InvertedIndexPartitioner.class);
}

HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);

/**
 * Initializes the input with a null filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Configuration conf, String dbName, String tableName)
 throws IOException {
 return setInput(conf, dbName, tableName, null);
}

/**
 * Initializes the input with a provided filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Job job, String dbName, String tableName, String filter)
 throws IOException {
 return setInput(job.getConfiguration(), dbName, tableName, filter);
}

/**
 * Initializes the input with a null filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Job job, String dbName, String tableName)
 throws IOException {
 return setInput(job.getConfiguration(), dbName, tableName, null);
}

/**
 * Initializes the input with a null filter.
 * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)}
 */
public static HCatInputFormat setInput(
    Job job, String dbName, String tableName)
 throws IOException {
 return setInput(job.getConfiguration(), dbName, tableName, null);
}

@Override
public void configureJob(Job job) {
  try {
    job.getConfiguration().addResource("hive-site.xml");
    HCatInputFormat.setInput(job, dbName, tableName);
    job.setInputFormatClass(HCatInputFormat.class);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Javadoc

Initializes the input with a null filter. See #setInput(org.apache.hadoop.conf.Configuration,String,String,String)

Popular methods of HCatInputFormat

<init>
createRecordReader
getSplits
getMapRedInputFormat
getTableSchema
setFilter
setOutputSchema
getDataColumns
Return data columns for this input, can only be called after setInput is called.
getPartitionColumns
Return partitioning columns for this input, can only be called after setInput is called.

Popular in Java

Updating database using SQL prepared statement
putExtra (Intent)
findViewById (Activity)
compareTo (BigDecimal)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
CodeWhisperer alternatives

How to use setInputmethodin org.apache.hive.hcatalog.mapreduce.HCatInputFormat

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput (Showing top 20 results out of 315)

How to use
setInput
method
in
org.apache.hive.hcatalog.mapreduce.HCatInputFormat