org.apache.spark.api.java.JavaRDD.take java code examples

public Void call(JavaRDD<String> rdd) {
 List<String> currentIPAddresses = rdd.take(100);
 return null;
}});

@Test
public void take() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 assertEquals(1, rdd.first().intValue());
 rdd.take(2);
 rdd.takeSample(false, 2, 42);
}

   public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
  Tuple4<Long, Long, Long, Long> contentSizeStats =
    Functions.contentSizeStats(accessLogs);
  List<Tuple2<Integer, Long>> responseCodeToCount =
    Functions.responseCodeCount(accessLogs)
    .take(100);
  JavaPairRDD<String, Long> ipAddressCounts =
    Functions.ipAddressCount(accessLogs);
  List<String> ip = Functions.filterIPAddress(ipAddressCounts)
    .take(100);
  Object ordering = Ordering.natural();
  Comparator<Long> cmp = (Comparator<Long>)ordering;
  List<Tuple2<String, Long>> topEndpoints =
    Functions.endpointCount(accessLogs)
   .top(10, new Functions.ValueComparator<String, Long>(cmp));
  logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount,
    ip, topEndpoints);
  return null;
   }});
}

@Test
public void take() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 assertEquals(1, rdd.first().intValue());
 rdd.take(2);
 rdd.takeSample(false, 2, 42);
}

static List<Vector> getInitialCentroids(JavaRDD<Vector> data, final int K) {
  List<Vector> centroidTuples = data.take(K);
  final List<Vector> centroids = new ArrayList<Vector>();
  for (Vector t : centroidTuples) {
    centroids.add(t);
  }
  return centroids;
}

@Test
public void take() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 assertEquals(1, rdd.first().intValue());
 rdd.take(2);
 rdd.takeSample(false, 2, 42);
}

List<List<Writable>> sample = processed.take(10);

@Override
public Iterator<String> head(final String location, final int totalLines) {
  return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString);
}

  static List<Vector> getInitialCentroids(JavaRDD<Vector> data, final int K) {
    List<Vector> centroidTuples = data.take(K);
    final List<Vector> centroids = new ArrayList<>();
    centroids.addAll(centroidTuples);
    return centroids;
  }
}

  @Override
  public Void call(JavaRDD<Long> rdd) throws Exception {
    Long num = rdd.take(1).get(0);
    String ts = sdf.format(new Date());
    Put put = new Put(Bytes.toBytes(ts));
    put.add(Bytes.toBytes("f"), Bytes.toBytes("nums"), Bytes.toBytes(num));
    table.put(put);
    return null;
  }
});

@Override
public List<T> take(int n) {
 Preconditions.checkArgument(n >= 0, "N must be non-negative.");
 if (n == 0) {
   return Collections.emptyList();
 }
 return rdd.take(n);
}

  @Override
  public Void call(JavaRDD<Long> rdd) throws Exception {
    Long num = rdd.take(1).get(0);
    String ts = sdf.format(new Date());
    Put put = new Put(Bytes.toBytes(ts));
    put.add(Bytes.toBytes("f1"), Bytes.toBytes("nums"), Bytes.toBytes(num));
    table.put(put);
    return null;
  }
});

  public static SpatialRDD<Geometry> createSpatialRDD(JavaRDD rawTextRDD, FormatMapper<Geometry> formatMapper)
  {
    SpatialRDD spatialRDD = new SpatialRDD<Geometry>();
    spatialRDD.rawSpatialRDD = rawTextRDD.mapPartitions(formatMapper);
    spatialRDD.fieldNames = formatMapper.readPropertyNames(rawTextRDD.take(1).get(0).toString());
    return spatialRDD;
  }
}

@Override
public SpashCollection<String> head(Path file, int lines) {
  URI uri = SpashFileSystem.get().getURI(file.normalize().toString());
  JavaRDD<String> rdd = sc.textFile(uri.toString());
  List<String> stream = rdd.take(lines);
  return new SpashCollectionListAdapter<>(stream);
}

@Override
public Iterator<String> head(final String location, final int totalLines) {
  return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString);
}

/**
 * Instantiates a new point RDD.
 *
 * @param sparkContext the spark context
 * @param InputLocation the input location
 * @param Offset the offset
 * @param splitter the splitter
 * @param carryInputData the carry input data
 * @param partitions the partitions
 * @param newLevel the new level
 * @param sourceEpsgCRSCode the source epsg CRS code
 * @param targetEpsgCode the target epsg code
 */
public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter,
    boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode)
{
  JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation);
  if (Offset != null) {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(Offset, splitter, carryInputData)));}
  else {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(splitter, carryInputData)));}
  if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);}
  if (newLevel != null) { this.analyze(newLevel);}
  if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); }
}

/**
 * Instantiates a new line string RDD.
 *
 * @param sparkContext the spark context
 * @param InputLocation the input location
 * @param startOffset the start offset
 * @param endOffset the end offset
 * @param splitter the splitter
 * @param carryInputData the carry input data
 * @param partitions the partitions
 * @param newLevel the new level
 * @param sourceEpsgCRSCode the source epsg CRS code
 * @param targetEpsgCode the target epsg code
 */
public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset,
    FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode)
{
  JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation);
  if (startOffset != null && endOffset != null) {
    this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(startOffset, endOffset, splitter, carryInputData)));
  }
  else {
    this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(splitter, carryInputData)));
  }
  if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);}
  if (newLevel != null) { this.analyze(newLevel);}
  if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); }
}

/**
 * Instantiates a new polygon RDD.
 *
 * @param sparkContext the spark context
 * @param InputLocation the input location
 * @param startOffset the start offset
 * @param endOffset the end offset
 * @param splitter the splitter
 * @param carryInputData the carry input data
 * @param partitions the partitions
 * @param newLevel the new level
 * @param sourceEpsgCRSCode the source epsg CRS code
 * @param targetEpsgCode the target epsg code
 */
public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset,
    FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode)
{
  JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation);
  if (startOffset != null && endOffset != null) {
    this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(startOffset, endOffset, splitter, carryInputData)));
  }
  else {
    this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(splitter, carryInputData)));
  }
  if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);}
  if (newLevel != null) { this.analyze(newLevel);}
  if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); }
}

/**
 * Build a ResultSet from a RDD depending the context.
 * 
 * @param rdd RDD which corresponds to Spark result.
 * @param isRoot Indicates if this node is root in this plan.
 * @param isCount Indicates if this query have a COUNT clause.
 * @param selectedCols List of columns selected in current SelectStatement.
 * @return ResultSet containing the result of built.
 */
private ResultSet returnResult(JavaRDD<Cells> rdd, boolean isRoot, boolean isCount,
  List<String> selectedCols) {
 if (isRoot) {
  if (isCount) {
   return DeepUtils.buildCountResult(rdd);
  }
  return DeepUtils.buildResultSet(rdd.take(DEFAULT_RESULT_SIZE), selectedCols);
 } else {
  CassandraResultSet crs = new CassandraResultSet();
  crs.add(new Row("RDD", new Cell(rdd)));
  List<ColumnMetadata> columns = new ArrayList<>();
  ColumnMetadata metadata = new ColumnMetadata("RDD", "RDD");
  ColumnType type = ColumnType.VARCHAR;
  type.setDBMapping("class", JavaRDD.class);
  metadata.setType(type);
  crs.setColumnMetadata(columns);
  LOG.info("LEAF: rdd.count=" + ((int) rdd.count()));
  return crs;
 }
}

WriteStatus status = result.take(1).get(0);
Path origPath = new Path(basePath + "/" + status.getStat().getPath());
Path hidePath = new Path(basePath + "/" + status.getStat().getPath() + "_hide");

Popular methods of JavaRDD

Popular in Java

Making http requests using okhttp
setRequestProperty (URLConnection)
setScale (BigDecimal)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
Permission (java.security)
Legacy security code; do not use.
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
JPanel (javax.swing)
Runner (org.openjdk.jmh.runner)
CodeWhisperer alternatives

How to use takemethodin org.apache.spark.api.java.JavaRDD

Best Java code snippets using org.apache.spark.api.java.JavaRDD.take (Showing top 20 results out of 315)

How to use
take
method
in
org.apache.spark.api.java.JavaRDD