@Override public void run() { startWorkers(); while (!stop.get()) { boolean hadUpdates = runOneIteration(); try { Thread.sleep(hadUpdates ? 0 : noUpdatesWaitMs); } catch (InterruptedException e) { LOG.info("Stats updater thread was interrupted and will now exit"); stopWorkers(); return; } } LOG.info("Stats updater thread was stopped and will now exit"); }
@Override public void run() { while (true) { // This should not start the actual Tez AM. SessionState ss = DriverUtils.setUpSessionState(conf, user, false); // Wait for the first item to arrive at the queue and process it. try { runOneWorkerIteration(ss, user, conf, true); } catch (InterruptedException e) { closeSession(ss); LOG.info("Worker thread was interrupted and will now exit"); return; } // Keep draining the queue in the same session. try { while (runOneWorkerIteration(ss, user, conf, false)) {} } catch (InterruptedException e) { closeSession(ss); LOG.info("Worker thread was interrupted unexpectedly and will now exit"); return; }; // Close the session before we have to wait again. closeSession(ss); SessionState.detachSession(); } } }
private StatsUpdaterThread createUpdater() throws MetaException { StatsUpdaterThread su = new StatsUpdaterThread(); su.setConf(hiveConf); su.init(new AtomicBoolean(false), null); return su; } }
private List<AnalyzeWork> processOneTable(TableName fullTableName) throws MetaException, NoSuchTxnException, NoSuchObjectException { if (isAnalyzeTableInProgress(fullTableName)) return null; String cat = fullTableName.getCat(), db = fullTableName.getDb(), tbl = fullTableName.getTable(); Table table = rs.getTable(cat, db, tbl); if (isTxn) { if (!areTxnStatsEnabled) return null; // Skip transactional tables. ValidReaderWriteIdList writeIds = getWriteIds(fullTableName); if (writeIds == null) { LOG.error("Cannot get writeIds for transactional table " + fullTableName + "; skipping"); if (isExistingOnly) { colsToUpdate = getExistingNonPartTableStatsToUpdate( fullTableName, cat, db, tbl, params, writeId, allCols, writeIdString); } else { colsToUpdate = getAnyStatsToUpdate(db, tbl, allCols, params, writeId, writeIdString); } else { Map<String, List<String>> partsToAnalyze = new HashMap<>(); List<String> colsForAllParts = findPartitionsToAnalyze( fullTableName, cat, db, tbl, allCols, partsToAnalyze, writeIdString); LOG.debug("Columns to update are {} for all partitions; {} individual partitions." fullTableName, null, buildPartColStr(table), colsForAllParts));
@Test(timeout=40000) public void testSimpleUpdateWithThreads() throws Exception { StatsUpdaterThread su = createUpdater(); su.startWorkers(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (i int, s string)"); executeQuery("insert into simple_stats (i, s) values (1, 'test')"); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("i"), msClient); assertTrue(su.runOneIteration()); su.waitForQueuedCommands(); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, true); msClient.close(); }
@Test(timeout=40000) public void testAllPartitions() throws Exception { final int PART_COUNT = 3; StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string) partitioned by (i int)"); for (int i = 0; i < PART_COUNT; ++i) { executeQuery("insert into simple_stats partition(i='" + i + "') values ('test')"); } verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", false); assertTrue(su.runOneIteration()); drainWorkQueue(su, 1); // All the partitions need to be updated; a single command can be used. verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
if (isAnalyzePartInProgress(fullTableName, partName) || "true".equalsIgnoreCase(skipParam)) { if (isAllParts) { addPreviousPartitions(t, partNames, currentBatchStart, currentBatch, currentIxInBatch, colsToUpdateForAll, partsToAnalyze); Collections.sort(colsToMaybeUpdate); List<String> colsToUpdate = getAnyStatsToUpdate(db, tbl, colsToMaybeUpdate, params, writeIdString == null ? -1 : part.getWriteId(), writeIdString); addPreviousPartitions(t, partNames, currentBatchStart, currentBatch, currentIxInBatch, colsToUpdateForAll, partsToAnalyze); List<String> newCols = verifySameColumnsForAllParts(colsToUpdateForAll, colsToUpdate); if (newCols == null) { isAllParts = false; addPreviousPartitions(t, partNames, currentBatchStart, currentBatch, currentIxInBatch, colsToUpdateForAll, partsToAnalyze); } else if (colsToUpdateForAll == null) {
executeQuery("insert into simple_stats3 partition(i=1) values ('test')"); assertTrue(su.runOneIteration()); assertEquals(3, su.getQueueLength()); verifyStatsUpToDate("simple_stats3", "i=1", Lists.newArrayList("s"), msClient, true); assertFalse(su.runOneIteration()); assertEquals(3, su.getQueueLength()); // Nothing new added to the queue while analyze runs. executeQuery("insert into simple_stats3 partition(i=2) values ('test')"); assertTrue(su.runOneIteration()); assertEquals(4, su.getQueueLength()); // An item for new partition is queued now. verifyPartStatsUpToDate(3, 0, msClient, "simple_stats3", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update.
private List<String> getExistingNonPartTableStatsToUpdate(TableName fullTableName, String cat, String db, String tbl, Map<String, String> params, long statsWriteId, List<String> allCols, String writeIdString) throws MetaException { ColumnStatistics existingStats = null; try { // Note: this should NOT do txn verification - we want to get outdated stats, to // see if we need to update anything. existingStats = rs.getTableColumnStatistics(cat, db, tbl, allCols); } catch (NoSuchObjectException e) { LOG.error("Cannot retrieve existing stats, skipping " + fullTableName, e); return null; } // TODO: we should probably skip updating if writeId is from an active txn boolean isTxnValid = (writeIdString == null) || ObjectStore.isCurrentStatsValidForTheQuery( conf, params, statsWriteId , writeIdString, false); return getExistingStatsToUpdate(existingStats, params, isTxnValid); }
@Test(timeout=80000) public void testQueueingWithThreads() throws Exception { final int PART_COUNT = 12; hiveConf.setInt(MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX.getVarname(), 5); hiveConf.setInt(MetastoreConf.ConfVars.STATS_AUTO_UPDATE_WORKER_COUNT.getVarname(), 2); StatsUpdaterThread su = createUpdater(); su.startWorkers(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string) partitioned by (i int)"); for (int i = 0; i < PART_COUNT; ++i) { executeQuery("insert into simple_stats partition(i='" + i + "') values ('test')"); } verifyPartStatsUpToDate(PART_COUNT, 0, msClient, "simple_stats", false); // Set one of the partitions to be skipped, so that a command is created for every other one. setPartitionSkipProperty(msClient, "simple_stats", "i=0", "true"); assertTrue(su.runOneIteration()); su.waitForQueuedCommands(); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, false); verifyPartStatsUpToDate(PART_COUNT, 1, msClient, "simple_stats", true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
ValidWriteIdList initialWriteIds = msClient.getValidWriteIds(fqName); verifyStatsUpToDate(tblName, cols, msClient, initialWriteIds.toString(), true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); ValidWriteIdList nextWriteIds = msClient.getValidWriteIds(fqName); verifyStatsUpToDate(tblName, cols, msClient, nextWriteIds.toString(), true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); String currentWriteIds = msClient.getValidWriteIds(fqName).toString(); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su); assertTrue(su.runOneIteration()); drainWorkQueue(su);
executeQuery("insert into simple_stats partition(p=2) values ('test2')"); executeQuery("insert into simple_stats partition(p=3) values ('test3')"); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); assertEquals(1, stats.size()); assertTrue(su.runOneIteration()); drainWorkQueue(su, 2);
private void drainWorkQueue(StatsUpdaterThread su) throws InterruptedException { while (su.runOneWorkerIteration(ss, ss.getUserName(), ss.getConf(), false)) {} }
@Test(timeout=40000) public void testMultipleTables() throws Exception { StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (s string)"); executeQuery("insert into simple_stats (s) values ('test')"); executeQuery("create table simple_stats2 (s string)"); executeQuery("insert into simple_stats2 (s) values ('test2')"); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("s"), msClient); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); setTableSkipProperty(msClient, "simple_stats", "true"); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, false); verifyAndUnsetColStats("simple_stats2", Lists.newArrayList("s"), msClient); msClient.close(); }
private void drainWorkQueue(StatsUpdaterThread su, int expectedReqs) throws InterruptedException { int actualReqs = 0; while (su.runOneWorkerIteration(ss, ss.getUserName(), ss.getConf(), false)) { ++actualReqs; } assertEquals(expectedReqs, actualReqs); }
@Test(timeout=40000) public void testPartitionsWithDifferentColsAll() throws Exception { StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("create table simple_stats (s string, t string, u string) partitioned by (i int)"); executeQuery("insert into simple_stats partition(i=0) values ('test', '0', 'foo')"); executeQuery("insert into simple_stats partition(i=1) values ('test', '1', 'bar')"); executeQuery("analyze table simple_stats partition(i=0) compute statistics for columns s"); executeQuery("analyze table simple_stats partition(i=1) compute statistics for columns s, u"); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s"), msClient, true); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("t", "u"), msClient, false); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "u"), msClient, true); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("t"), msClient, false); assertTrue(su.runOneIteration()); // Different columns means different commands have to be run. drainWorkQueue(su, 2); verifyStatsUpToDate("simple_stats", "i=0", Lists.newArrayList("s", "t", "u"), msClient, true); verifyStatsUpToDate("simple_stats", "i=1", Lists.newArrayList("s", "t", "u"), msClient, true); assertFalse(su.runOneIteration()); drainWorkQueue(su, 0); // Nothing else is updated after the first update. msClient.close(); }
@Test(timeout=40000) public void testExistingOnly() throws Exception { hiveConf.set(MetastoreConf.ConfVars.STATS_AUTO_UPDATE.getVarname(), "existing"); StatsUpdaterThread su = createUpdater(); IMetaStoreClient msClient = new HiveMetaStoreClient(hiveConf); executeQuery("create table simple_stats (i int, s string)"); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); executeQuery("insert into simple_stats (i, s) values (1, 'test')"); executeQuery("analyze table simple_stats compute statistics for columns i"); verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false); verifyAndUnsetColStats("simple_stats", Lists.newArrayList("i"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su); verifyStatsUpToDate("simple_stats", Lists.newArrayList("i"), msClient, true); verifyStatsUpToDate("simple_stats", Lists.newArrayList("s"), msClient, false); msClient.close(); }
setPartitionSkipProperty(msClient, "simple_stats", "i=" + EXCLUDED_PART, "true"); assertTrue(su.runOneIteration());
verifyAndUnsetColStats("simple_stats", "i=1", Lists.newArrayList("t"), msClient); assertTrue(su.runOneIteration()); drainWorkQueue(su, 2);