bytesWritten.add(countSizeInBytes(row));
/** * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}. * As a part of {@link SinkStat} computation; it will compute avg record size for current run. * @param writesStatuses */ private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent()) { final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach( writeStatus -> { final long writeBytes = writeStatus.getStat().getTotalWriteBytes(); final long numInserts = writeStatus.getStat().getNumWrites() - writeStatus.getStat().getNumUpdateWrites(); if (writeBytes > 0 && numInserts > 0) { avgRecordSizeCounter.add(writeBytes / numInserts); } } ); final long avgRecordSize = (int) avgRecordSizeCounter.avg(); if (avgRecordSize > 0) { log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize); this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize)); } } this.sinkStatMgr.persist(); }
/** * Instantiates a new SparkMLongAccumulator * * @param name the name of the accumulator */ public SparkMLongAccumulator(String name) { super(new LongAccumulator()); this.name = name; }
private void verifyKeys(final List<Integer> keys, final DI di) { keys.stream().forEach(key -> { if (!this.registeredKeys.containsKey(key)) { log.error("Invalid key:{}: in keys:{}:for record:{}", key, keys, di); throw new ForkOperationException("Using unregistered key :" + key); } this.registeredKeys.get(key).get().add(1); }); }
Set<String> fileIdsWithPendingCompactions) throws IOException { totalLogFiles = new LongAccumulator(); totalFileSlices = new LongAccumulator(); jsc.sc().register(totalLogFiles); jsc.sc().register(totalFileSlices);
private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException { List<Row> filtersList = Lists.newArrayList(filters); if (filtersList.size() == 0) { throw new RuntimeException("Kudu existing filter was not provided."); } if (filtersList.get(0).schema() == null) { throw new RuntimeException("Kudu existing filter did not contain a schema."); } if (hasAccumulators()) { accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1); accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size()); } KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table); for (String fieldName : filtersList.get(0).schema().fieldNames()) { ColumnSchema columnSchema = table.getSchema().getColumn(fieldName); List<Object> columnValues = Lists.newArrayList(); for (Row filter : filtersList) { Object columnValue = RowUtils.get(filter, fieldName); columnValues.add(columnValue); } KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues); builder = builder.addPredicate(predicate); } KuduScanner scanner = builder.build(); return scanner; }
Set<String> fileIdsWithPendingCompactions) throws IOException { totalLogFiles = new LongAccumulator(); totalFileSlices = new LongAccumulator(); jsc.sc().register(totalLogFiles); jsc.sc().register(totalFileSlices);
bytesWritten.add(countSizeInBytes(row));
.lookForAccumulatorByName("numRowGroups"); if (accu.isDefined()) { ((LongAccumulator)accu.get()).add((long)blocks.size());
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }