/** * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}. * As a part of {@link SinkStat} computation; it will compute avg record size for current run. * @param writesStatuses */ private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent()) { final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach( writeStatus -> { final long writeBytes = writeStatus.getStat().getTotalWriteBytes(); final long numInserts = writeStatus.getStat().getNumWrites() - writeStatus.getStat().getNumUpdateWrites(); if (writeBytes > 0 && numInserts > 0) { avgRecordSizeCounter.add(writeBytes / numInserts); } } ); final long avgRecordSize = (int) avgRecordSizeCounter.avg(); if (avgRecordSize > 0) { log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize); this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize)); } } this.sinkStatMgr.persist(); }
@Test public void testMaxStatHistory() { final String tableName = "testTable"; final IMetadataManager metadataManager = new MemoryMetadataManager(); final SinkStatManager sinkStatManager1 = new SinkStatManager(tableName, metadataManager); sinkStatManager1.init(); final int initialValue = SinkStatManager.MAX_HISTORY_SIZE * 2; sinkStatManager1.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(initialValue)); sinkStatManager1.persist(); final int targetValue = 1; for (int i = 0; i < SinkStatManager.MAX_HISTORY_SIZE; i++) { final SinkStatManager tempSinkStatManager = new SinkStatManager(tableName, metadataManager); tempSinkStatManager.init(); tempSinkStatManager.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(targetValue)); Assert.assertNotEquals(targetValue, tempSinkStatManager.getAvgRecordSize()); tempSinkStatManager.persist(); } // After SinkStatManager.MAX_HISTORY_SIZE runs very first stat should get dropped. final SinkStatManager sinkStatManager2 = new SinkStatManager(tableName, metadataManager); sinkStatManager2.init(); Assert.assertEquals(targetValue, sinkStatManager2.getAvgRecordSize()); } }
@Test public void testSerDser() { final String tableName = "testTable"; final IMetadataManager metadataManager = new MemoryMetadataManager(); final SinkStatManager sinkStatManager1 = new SinkStatManager(tableName, metadataManager); // Initially nothing will be found; it should not crash. sinkStatManager1.init(); Assert.assertEquals(0, sinkStatManager1.getAvgRecordSize()); Assert.assertFalse(sinkStatManager1.isStatHistoryAvailable()); final int avgRecordSize1 = 30; sinkStatManager1.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(avgRecordSize1)); // nothing is saved to metadata manager before persist. Assert.assertEquals(0, metadataManager.getAllKeys().size()); sinkStatManager1.persist(); Assert.assertEquals(1, metadataManager.getAllKeys().size()); final SinkStatManager sinkStatManager2 = new SinkStatManager(tableName, metadataManager); sinkStatManager2.init(); Assert.assertEquals(avgRecordSize1, sinkStatManager2.getAvgRecordSize()); final int avgRecordSize2 = 20; sinkStatManager2.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Integer.toString(avgRecordSize2)); sinkStatManager2.persist(); final SinkStatManager sinkStatManager3 = new SinkStatManager(tableName, metadataManager); sinkStatManager3.init(); Assert.assertEquals((avgRecordSize1 + avgRecordSize2) / 2, sinkStatManager3.getAvgRecordSize()); }