@Override public void run() { startWorkers(); while (!stop.get()) { boolean hadUpdates = runOneIteration(); try { Thread.sleep(hadUpdates ? 0 : noUpdatesWaitMs); } catch (InterruptedException e) { LOG.info("Stats updater thread was interrupted and will now exit"); stopWorkers(); return; } } LOG.info("Stats updater thread was stopped and will now exit"); }
@Test(timeout=40000) public void testAllPartitions() throws Exception { final int PART_COUNT = 3; StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string) partitioned by (i int)"); for (int i = 0; i < PART_COUNT; ++i) { executeQuery("insert into simple_stats partition(i='" + i + "') values ('test')"); } verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", false); assertTrue(su.runOneIteration()); drainWorkQueue(su, 1); // All the partitions need to be updated; a single command can be used. verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
ValidWriteIdList initialWriteIds = msClient.getValidWriteIds(fqName); verifyStatsUpToDate(tblName, cols, msClient, initialWriteIds.toString(), true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); ValidWriteIdList nextWriteIds = msClient.getValidWriteIds(fqName); verifyStatsUpToDate(tblName, cols, msClient, nextWriteIds.toString(), true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); String currentWriteIds = msClient.getValidWriteIds(fqName).toString(); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su);
@Test(timeout=40000) public void testSimpleUpdateWithThreads() throws Exception { StatsUpdaterThread su = createUpdater(); su.startWorkers(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (i int, s string)"); executeQuery("insert into simple_stats (i, s) values (1, 'test')"); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("i"), msClient); assertTrue(su.runOneIteration()); su.waitForQueuedCommands(); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, true); msClient.close(); }
@Test(timeout=40000) public void testMultipleTables() throws Exception { StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (s string)"); executeQuery("insert into simple_stats (s) values ('test')"); executeQuery("create table simple_stats2 (s string)"); executeQuery("insert into simple_stats2 (s) values ('test2')"); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); setTableSkipProperty(msClient, "simple_stats", "true"); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, false); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); msClient.close(); }
@Test(timeout=80000) public void testQueueingWithThreads() throws Exception { final int PART_COUNT = 12; hiveConf.setInt(MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX.getVarname(), 5); hiveConf.setInt(MetastoreConf.ConfVars.STATS_AUTO_UPDATE_WORKER_COUNT.getVarname(), 2); StatsUpdaterThread su = createUpdater(); su.startWorkers(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string) partitioned by (i int)"); for (int i = 0; i < PART_COUNT; ++i) { executeQuery("insert into simple_stats partition(i='" + i + "') values ('test')"); } verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", false); // Set one of the partitions to be skipped, so that a command is created for every other one. setPartitionSkipProperty(msClient, "simple_stats", "i=0", "true"); assertTrue(su.runOneIteration()); su.waitForQueuedCommands(); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, false); verifyPartStatsUpToDate(PART_COUNT, 1, msClient, "simple_stats", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
executeQuery("insert into simple_stats partition(p=2) values ('test2')"); executeQuery("insert into simple_stats partition(p=3) values ('test3')"); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); assertEquals(1, stats.size()); assertTrue(su.runOneIteration()); drainWorkQueue(su, 2);
executeQuery("insert into simple_stats3 partition(i=1) values ('test')"); assertTrue(su.runOneIteration()); assertEquals(3, su.getQueueLength()); verifyStatsUpToDate("simple_stats3", "i=1", Lists.newArrayList("s"), msClient, true); assertFalse(su.runOneIteration()); assertEquals(3, su.getQueueLength()); // Nothing new added to the queue while analyze runs. executeQuery("insert into simple_stats3 partition(i=2) values ('test')"); assertTrue(su.runOneIteration()); assertEquals(4, su.getQueueLength()); // An item for new partition is queued now. verifyPartStatsUpToDate(3, 0, msClient, "simple_stats3", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update.
@Test(timeout=40000) public void testPartitionsWithDifferentColsAll() throws Exception { StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)"); executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')"); executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')"); executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s"); executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns s, u"); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, true); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("t", "u"), msClient, false); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "u"), msClient, true); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("t"), msClient, false); assertTrue(su.runOneIteration()); // Different columns means different commands have to be run. drainWorkQueue(su, 2); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t", "u"), msClient, true); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
@Test(timeout=40000) public void testExistingOnly() throws Exception { hiveConf.set(MetastoreConf.ConfVars.STATS_AUTO_UPDATE.getVarname(), "existing"); StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (i int, s string)"); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("insert into simple_stats (i, s) values (1, 'test')"); executeQuery("analyze table simple_stats compute statistics for columns i"); verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("i"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, true); verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false); msClient.close(); }
setPartitionSkipProperty(msClient, "simple_stats", "i=" + EXCLUDED_PART, "true"); assertTrue(su.runOneIteration());
verifyAndUnsetColStats("simple_stats", "i=1", Lists.newArrayList("t"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su, 2);