org.apache.spark.util.LongAccumulator java code examples

@Test
public void foreachPartition() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreachPartition(iter -> {
  while (iter.hasNext()) {
   iter.next();
   accum.add(1);
  }
 });
 assertEquals(2, accum.value().intValue());
}

bytesWritten.add(countSizeInBytes(row));

/**
 * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}.
 * As a part of {@link SinkStat} computation; it will compute avg record size for current run.
 * @param writesStatuses
 */
private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) {
  if (writesStatuses.isPresent()) {
    final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator();
    writesStatuses.get().foreach(
      writeStatus -> {
        final long writeBytes = writeStatus.getStat().getTotalWriteBytes();
        final long numInserts = writeStatus.getStat().getNumWrites()
            - writeStatus.getStat().getNumUpdateWrites();
        if (writeBytes > 0 && numInserts > 0) {
          avgRecordSizeCounter.add(writeBytes / numInserts);
        }
      }
    );
    final long avgRecordSize = (int) avgRecordSizeCounter.avg();
    if (avgRecordSize > 0) {
      log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize);
      this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize));
    }
  }
  this.sinkStatMgr.persist();
}

logger.info("HDFS Read: {} HDFS Write", bytesWritten.value());
counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value()));

/**
* Instantiates a new SparkMLongAccumulator
*
* @param name the name of the accumulator
*/
public SparkMLongAccumulator(String name) {
 super(new LongAccumulator());
 this.name = name;
}

private void verifyKeys(final List<Integer> keys, final DI di) {
  keys.stream().forEach(key -> {
      if (!this.registeredKeys.containsKey(key)) {
        log.error("Invalid key:{}: in keys:{}:for record:{}", key, keys, di);
        throw new ForkOperationException("Using unregistered key :" + key);
      }
      this.registeredKeys.get(key).get().add(1);
    });
}

logger.info("HDFS Read: {} HDFS Write", bytesWritten.value());
counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten.value()));

 Set<String> fileIdsWithPendingCompactions) throws IOException {
totalLogFiles = new LongAccumulator();
totalFileSlices = new LongAccumulator();
jsc.sc().register(totalLogFiles);
jsc.sc().register(totalFileSlices);

@Test
public void foreachPartition() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreachPartition(iter -> {
  while (iter.hasNext()) {
   iter.next();
   accum.add(1);
  }
 });
 assertEquals(2, accum.value().intValue());
}

private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException {
 List<Row> filtersList = Lists.newArrayList(filters);
 if (filtersList.size() == 0) {
  throw new RuntimeException("Kudu existing filter was not provided.");
 }
 
 if (filtersList.get(0).schema() == null) {
  throw new RuntimeException("Kudu existing filter did not contain a schema.");
 }
 
 if (hasAccumulators()) {
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1);
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size());
 }
 
 KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table);
 for (String fieldName : filtersList.get(0).schema().fieldNames()) {
  ColumnSchema columnSchema = table.getSchema().getColumn(fieldName);
  List<Object> columnValues = Lists.newArrayList();
  for (Row filter : filtersList) {
   Object columnValue = RowUtils.get(filter, fieldName);
   columnValues.add(columnValue);
  }
  KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues);
  builder = builder.addPredicate(predicate);
 }
 KuduScanner scanner = builder.build();
 return scanner;
}

 Set<String> fileIdsWithPendingCompactions) throws IOException {
totalLogFiles = new LongAccumulator();
totalFileSlices = new LongAccumulator();
jsc.sc().register(totalLogFiles);
jsc.sc().register(totalFileSlices);

@Test
public void foreachPartition() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreachPartition(iter -> {
  while (iter.hasNext()) {
   iter.next();
   accum.add(1);
  }
 });
 assertEquals(2, accum.value().intValue());
}

bytesWritten.add(countSizeInBytes(row));

@Test
public void foreach() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreach(s -> accum.add(1));
 assertEquals(2, accum.value().intValue());
}

 .lookForAccumulatorByName("numRowGroups");
if (accu.isDefined()) {
 ((LongAccumulator)accu.get()).add((long)blocks.size());

@Test
public void foreach() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreach(s -> accum.add(1));
 assertEquals(2, accum.value().intValue());
}

@Test
public void foreach() {
 LongAccumulator accum = sc.sc().longAccumulator();
 JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World"));
 rdd.foreach(s -> accum.add(1));
 assertEquals(2, accum.value().intValue());
}

@Test
public void testForeach() {
 LongAccumulator accum = jsc.sc().longAccumulator();
 List<String> data = Arrays.asList("a", "b", "c");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 ds.foreach((ForeachFunction<String>) s -> accum.add(1));
 Assert.assertEquals(3, accum.value().intValue());
}

@Test
public void testForeach() {
 LongAccumulator accum = jsc.sc().longAccumulator();
 List<String> data = Arrays.asList("a", "b", "c");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 ds.foreach((ForeachFunction<String>) s -> accum.add(1));
 Assert.assertEquals(3, accum.value().intValue());
}

@Test
public void testForeach() {
 LongAccumulator accum = jsc.sc().longAccumulator();
 List<String> data = Arrays.asList("a", "b", "c");
 Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
 ds.foreach((ForeachFunction<String>) s -> accum.add(1));
 Assert.assertEquals(3, accum.value().intValue());
}

Most used methods

Popular in Java

Updating database using SQL prepared statement
setContentView (Activity)
setScale (BigDecimal)
getSharedPreferences (Context)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Top Vim plugins

How to useLongAccumulator in org.apache.spark.util

Best Java code snippets using org.apache.spark.util.LongAccumulator (Showing top 20 results out of 315)

How to use
LongAccumulator
in
org.apache.spark.util