bytesWritten.add(countSizeInBytes(row));
@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
private void verifyKeys(final List<Integer> keys, final DI di) { keys.stream().forEach(key -> { if (!this.registeredKeys.containsKey(key)) { log.error("Invalid key:{}: in keys:{}:for record:{}", key, keys, di); throw new ForkOperationException("Using unregistered key :" + key); } this.registeredKeys.get(key).get().add(1); }); }
private void logWriteMetrics(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent() && this.dataFeedMetrics.isPresent()) { final LongAccumulator totalCount = writesStatuses.get().rdd().sparkContext().longAccumulator(); final LongAccumulator errorCount = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach(writeStatus -> { errorCount.add(writeStatus.getFailedRecords().size()); totalCount.add(writeStatus.getTotalRecords()); }); this.dataFeedMetrics.get().createLongMetric(DataFeedMetricNames.ERROR_ROWCOUNT, errorCount.value(), this.dataFeedMetricsTags); this.dataFeedMetrics.get().createLongMetric(DataFeedMetricNames.OUTPUT_ROWCOUNT, totalCount.value() - errorCount.value(), this.dataFeedMetricsTags); } }
private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException { List<Row> filtersList = Lists.newArrayList(filters); if (filtersList.size() == 0) { throw new RuntimeException("Kudu existing filter was not provided."); } if (filtersList.get(0).schema() == null) { throw new RuntimeException("Kudu existing filter did not contain a schema."); } if (hasAccumulators()) { accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1); accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size()); } KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table); for (String fieldName : filtersList.get(0).schema().fieldNames()) { ColumnSchema columnSchema = table.getSchema().getColumn(fieldName); List<Object> columnValues = Lists.newArrayList(); for (Row filter : filtersList) { Object columnValue = RowUtils.get(filter, fieldName); columnValues.add(columnValue); } KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues); builder = builder.addPredicate(predicate); } KuduScanner scanner = builder.build(); return scanner; }
bytesWritten.add(countSizeInBytes(row));
/** * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}. * As a part of {@link SinkStat} computation; it will compute avg record size for current run. * @param writesStatuses */ private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent()) { final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach( writeStatus -> { final long writeBytes = writeStatus.getStat().getTotalWriteBytes(); final long numInserts = writeStatus.getStat().getNumWrites() - writeStatus.getStat().getNumUpdateWrites(); if (writeBytes > 0 && numInserts > 0) { avgRecordSizeCounter.add(writeBytes / numInserts); } } ); final long avgRecordSize = (int) avgRecordSizeCounter.avg(); if (avgRecordSize > 0) { log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize); this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize)); } } this.sinkStatMgr.persist(); }
.lookForAccumulatorByName("numRowGroups"); if (accu.isDefined()) { ((LongAccumulator)accu.get()).add((long)blocks.size());
recordCounter.add(records); System.out.println("This RDD: " + records + " running total: " + recordCounter.value()); });
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }