private static void setHiveConf(HiveConf conf, String var) { if (LOG.isDebugEnabled()) { LOG.debug("Overriding HiveConf setting : " + var + " = " + true); } conf.setBoolean(var, true); }
public static HiveConf storePropertiesToHiveConf(Properties properties, HiveConf hiveConf) throws IOException { for (Map.Entry<Object, Object> prop : properties.entrySet()) { if (prop.getValue() instanceof String) { hiveConf.set((String) prop.getKey(), (String) prop.getValue()); } else if (prop.getValue() instanceof Integer) { hiveConf.setInt((String) prop.getKey(), (Integer) prop.getValue()); } else if (prop.getValue() instanceof Boolean) { hiveConf.setBoolean((String) prop.getKey(), (Boolean) prop.getValue()); } else if (prop.getValue() instanceof Long) { hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); } else if (prop.getValue() instanceof Float) { hiveConf.setFloat((String) prop.getKey(), (Float) prop.getValue()); } else { LOG.warn("Unsupported type: key=" + prop.getKey() + " value=" + prop.getValue()); } } return hiveConf; }
@Override public void init(QueryState queryState, LogHelper console, Hive db) { super.init(queryState, console, db); // Tez requires us to use RPC for the query plan HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true); // We require the use of recursive input dirs for union processing conf.setBoolean("mapred.input.dir.recursive", true); }
@Override public void init(QueryState queryState, LogHelper console, Hive db) { super.init(queryState, console, db); //It is required the use of recursive input dirs when hive.optimize.union.remove = true if(conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)) { conf.setBoolean("mapred.input.dir.recursive", true); } }
@Override public void init(QueryState queryState, LogHelper console, Hive db) { super.init(queryState, console, db); // Tez requires us to use RPC for the query plan HiveConf.setBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN, true); // We require the use of recursive input dirs for union processing conf.setBoolean("mapred.input.dir.recursive", true); }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the conf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current conf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the hiveConf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current hiveConf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
private static IMetaStoreClient getMetaStoreClient(HiveConf conf, String metastoreUri, boolean secureMode, String owner) throws ConnectionError { if (metastoreUri != null) { conf.set(MetastoreConf.ConfVars.THRIFT_URIS.getHiveName(), metastoreUri); } if (secureMode) { conf.setBoolean(MetastoreConf.ConfVars.USE_THRIFT_SASL.getHiveName(), true); } try { LOG.info("Creating metastore client for {}", owner); return HiveMetaStoreUtils.getHiveMetastoreClient(conf); } catch (MetaException | IOException e) { throw new ConnectionError("Error connecting to Hive Metastore URI: " + metastoreUri + ". " + e.getMessage(), e); } }
private IMetaStoreClient prepareParallelTest(String tableName, int val) throws Exception, MetaException, TException, NoSuchObjectException { hiveConf.setBoolean("hive.stats.autogather", true); hiveConf.setBoolean("hive.stats.column.autogather", true); runStatementOnDriver("drop table if exists " + tableName); runStatementOnDriver(String.format("create table %s (a int) stored as orc " + "TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')", tableName)); runStatementOnDriver(String.format("insert into %s (a) values (" + val + ")", tableName)); runStatementOnDriver(String.format("insert into %s (a) values (" + val + ")", tableName)); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); // Stats should be valid after serial inserts. List<ColumnStatisticsObj> stats = getTxnTableStats(msClient, tableName); Assert.assertEquals(1, stats.size()); return msClient; }
@Test public void testGetScratchDirectoriesForPaths() throws IOException { Context spyContext = spy(context); // When Object store paths are used, then getMRTmpPatch() is called to get a temporary // directory on the default scratch diretory location (usually /temp) Path mrTmpPath = new Path("hdfs://hostname/tmp/scratch"); doReturn(mrTmpPath).when(spyContext).getMRTmpPath(); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir"))); // When local filesystem paths are used, then getMRTmpPatch() should be called to // get a temporary directory assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:/user"))); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user"))); // When Non-Object store paths are used, then getExtTmpPathRelTo is called to get a temporary // directory on the same path passed as a parameter Path tmpPathRelTo = new Path("hdfs://hostname/user"); doReturn(tmpPathRelTo).when(spyContext).getExtTmpPathRelTo(any(Path.class)); assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("/user"))); conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, false); assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir"))); assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user"))); conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, true); } }
@Before public void init() { conf.setBoolean("dummyBoolean", true); conf.set("dummy", "aaa"); conf.set("dummy2", "aaa"); conf.set("3dummy", "aaa"); }
@Test public void testTxnStatsOnOff() throws Exception { String tableName = "mm_table"; hiveConf.setBoolean("hive.stats.autogather", true); hiveConf.setBoolean("hive.stats.column.autogather", true); runStatementOnDriver("drop table if exists " + tableName); runStatementOnDriver(String.format("create table %s (a int) stored as orc " + Assert.assertEquals(1, stats.size()); msClient.close(); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), false); msClient = new HiveMetaStoreClient(hiveConf); Assert.assertEquals(0, stats.size()); msClient.close(); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), true); msClient = new HiveMetaStoreClient(hiveConf); stats = getTxnTableStats(msClient, tableName); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), false); hiveConf.setBoolean(MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED.getVarname(), true); msClient = new HiveMetaStoreClient(hiveConf); stats = getTxnTableStats(msClient, tableName);
hiveConfig.setBoolean(CLIENT_CACHE_DISABLED_PROPERTY, true);
/** * Set the query plan to use cache entry passed in to return the query results. * @param cacheEntry The results cache entry that will be used to resolve the query. */ private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean needsReset) { if (needsReset) { reset(true); inputs.clear(); } // Change query FetchTask to use new location specified in results cache. FetchTask fetchTask = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork()); setFetchTask(fetchTask); queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation()); resultSchema = cacheEntry.getQueryInfo().getResultSchema(); setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo()); setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo()); inputs.addAll(cacheEntry.getQueryInfo().getInputs()); // Set recursive traversal in case the cached query was UNION generated by Tez. conf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true); // Indicate that the query will use a cached result. setCacheUsage(new CacheUsage( CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry)); }
hiveConfig.setBoolean(CLIENT_CACHE_DISABLED_PROPERTY, true);
HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true); HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true); conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); hiveConf.setBoolean("mapred.input.dir.recursive", true); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(hiveConf);
private void testMM(boolean existingTable, boolean isSourceMM) throws Exception { HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY, true); hiveConf.setBoolean("mapred.input.dir.recursive", true); int[][] data = {{1,2}, {3, 4}, {5, 6}}; runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); if(existingTable) { runStatementOnDriver("create table T (a int, b int)"); } runStatementOnDriver("create table Tstage (a int, b int)" + (isSourceMM ? "" : " tblproperties('transactional'='false')")); runStatementOnDriver("insert into Tstage" + TestTxnCommands2.makeValuesClause(data)); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); //verify data List<String> rs = runStatementOnDriver("select a, b from T order by a, b"); Assert.assertEquals("reading imported data", TestTxnCommands2.stringifyValues(data), rs); //verify that we are indeed doing an Acid write (import) rs = runStatementOnDriver("select INPUT__FILE__NAME from T order by INPUT__FILE__NAME"); Assert.assertEquals(3, rs.size()); for (String s : rs) { Assert.assertTrue(s, s.contains("/delta_0000001_0000001_0000/")); Assert.assertTrue(s, s.endsWith("/000000_0")); } } private void checkResult(String[][] expectedResult, String query, boolean isVectorized,
HiveConf.setBoolVar(conf, ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS, true); HiveConf.setBoolVar(conf, ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS, true); conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_NODE_LOCAL_ONLY, true);
/** * This test will fail - MM export doesn't filter out aborted transaction data. */ @Ignore() @Test public void testMMExportAborted() throws Exception { HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY, true); hiveConf.setBoolean("mapred.input.dir.recursive", true); int[][] data = {{1, 2}, {3, 4}, {5, 6}}; int[][] dataAbort = {{10, 2}}; runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int)"); runStatementOnDriver("create table Tstage (a int, b int)"); HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); runStatementOnDriver("insert into Tstage" + TestTxnCommands2.makeValuesClause(dataAbort)); HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false); runStatementOnDriver("insert into Tstage" + TestTxnCommands2.makeValuesClause(data)); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("import table T from '" + getWarehouseDir() + "/1'"); //verify data List<String> rs = runStatementOnDriver("select a, b from T order by a, b"); Assert.assertEquals("reading imported data", TestTxnCommands2.stringifyValues(data), rs); } }