@VisibleForTesting protected int calculateNewBulkInsertParallelism(final long numRecords) { final long avgRecordSize = this.sinkStatMgr.getAvgRecordSize(); final long targetFileSize = this.hoodieConf.getTargetFileSize(); final int newParallelism = (int) Math.ceil((numRecords * avgRecordSize * 1.0) / Math.max(1, targetFileSize)); final int currentParallelism = this.hoodieConf.getBulkInsertParallelism(); log.info( "StatsManager:targetFileSize:{}:avgRecordSize:{}:numRecords:{}:" + "newBulkInsertParallelism:{}:currentBulkInsertParallelism:{}", targetFileSize, avgRecordSize, numRecords, newParallelism, currentParallelism); return newParallelism; }
@Test public void testSerDser() { final String tableName = "testTable"; final IMetadataManager metadataManager = new MemoryMetadataManager(); final SinkStatManager sinkStatManager1 = new SinkStatManager(tableName, metadataManager); // Initially nothing will be found; it should not crash. sinkStatManager1.init(); Assert.assertEquals(0, sinkStatManager1.getAvgRecordSize()); Assert.assertFalse(sinkStatManager1.isStatHistoryAvailable()); final int avgRecordSize1 = 30; sinkStatManager1.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(avgRecordSize1)); // nothing is saved to metadata manager before persist. Assert.assertEquals(0, metadataManager.getAllKeys().size()); sinkStatManager1.persist(); Assert.assertEquals(1, metadataManager.getAllKeys().size()); final SinkStatManager sinkStatManager2 = new SinkStatManager(tableName, metadataManager); sinkStatManager2.init(); Assert.assertEquals(avgRecordSize1, sinkStatManager2.getAvgRecordSize()); final int avgRecordSize2 = 20; sinkStatManager2.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(avgRecordSize2)); sinkStatManager2.persist(); final SinkStatManager sinkStatManager3 = new SinkStatManager(tableName, metadataManager); sinkStatManager3.init(); Assert.assertEquals((avgRecordSize1 + avgRecordSize2) / 2, sinkStatManager3.getAvgRecordSize()); }
@Test public void testMaxStatHistory() { final String tableName = "testTable"; final IMetadataManager metadataManager = new MemoryMetadataManager(); final SinkStatManager sinkStatManager1 = new SinkStatManager(tableName, metadataManager); sinkStatManager1.init(); final int initialValue = SinkStatManager.MAX_HISTORY_SIZE * 2; sinkStatManager1.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(initialValue)); sinkStatManager1.persist(); final int targetValue = 1; for (int i = 0; i < SinkStatManager.MAX_HISTORY_SIZE; i++) { final SinkStatManager tempSinkStatManager = new SinkStatManager(tableName, metadataManager); tempSinkStatManager.init(); tempSinkStatManager.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(targetValue)); Assert.assertNotEquals(targetValue, tempSinkStatManager.getAvgRecordSize()); tempSinkStatManager.persist(); } // After SinkStatManager.MAX_HISTORY_SIZE runs very first stat should get dropped. final SinkStatManager sinkStatManager2 = new SinkStatManager(tableName, metadataManager); sinkStatManager2.init(); Assert.assertEquals(targetValue, sinkStatManager2.getAvgRecordSize()); } }