/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName) throws IOException { return setInput(conf, dbName, tableName, null); }
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, null); }
/** * Initializes the input with a provided filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName, String filter) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, filter); }
@Override public void configureJob(Job job) { try { job.getConfiguration().addResource("hive-site.xml"); HCatInputFormat.setInput(job, dbName, tableName); job.setInputFormatClass(HCatInputFormat.class); } catch (IOException e) { throw new RuntimeException(e); } }
protected HCatSchema getTableSchema() throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read schema test"); job.setJarByClass(this.getClass()); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName); return HCatInputFormat.getTableSchema(job.getConfiguration()); }
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
private boolean runJob(float badRecordThreshold) throws Exception { Configuration conf = new Configuration(); conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, "default", "test_bad_records"); job.setMapOutputKeyClass(HCatRecord.class); job.setMapOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); if (path.getFileSystem(conf).exists(path)) { path.getFileSystem(conf).delete(path, true); } TextOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true); }
job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName, filter);
HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString());
@Override public ReaderContext prepareRead() throws HCatException { try { Job job = new Job(conf); HCatInputFormat hcif = HCatInputFormat.setInput( job, re.getDbName(), re.getTableName(), re.getFilterString()); ReaderContextImpl cntxt = new ReaderContextImpl(); cntxt.setInputSplits(hcif.getSplits( ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); cntxt.setConf(job.getConfiguration()); return cntxt; } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
@Test public void testGetPartitionAndDataColumns() throws Exception { Configuration conf = new Configuration(); Job myJob = new Job(conf, "hcatTest"); HCatInputFormat.setInput(myJob, "default", "testHCIFMethods"); HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration()); Assert.assertTrue(cols.getFields() != null); Assert.assertEquals(cols.getFields().size(), 2); Assert.assertTrue(cols.getFields().get(0).getName().equals("a")); Assert.assertTrue(cols.getFields().get(1).getName().equals("b")); Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING)); Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT)); HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration()); Assert.assertTrue(pcols.getFields() != null); Assert.assertEquals(pcols.getFields().size(), 2); Assert.assertTrue(pcols.getFields().get(0).getName().equals("x")); Assert.assertTrue(pcols.getFields().get(1).getName().equals("y")); Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING)); Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING)); }
private void setupMapper() throws IOException { String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME); String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName); log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(IIDistinctColumnsMapper.class); job.setCombinerClass(IIDistinctColumnsCombiner.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(Text.class); }
private void setupMapper(String intermediateTable) throws IOException { // FileInputFormat.setInputPaths(job, input); String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(FactDistinctColumnsMapper.class); job.setCombinerClass(FactDistinctColumnsCombiner.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(Text.class); }
private void setupMapper(String intermediateTable) throws IOException { String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(InvertedIndexMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(InvertedIndexPartitioner.class); }
HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName) throws IOException { return setInput(conf, dbName, tableName, null); }
/** * Initializes the input with a provided filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName, String filter) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, filter); }
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, null); }
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, null); }
@Override public void configureJob(Job job) { try { job.getConfiguration().addResource("hive-site.xml"); HCatInputFormat.setInput(job, dbName, tableName); job.setInputFormatClass(HCatInputFormat.class); } catch (IOException e) { throw new RuntimeException(e); } }