public Void call(JavaRDD<String> rdd) { List<String> currentIPAddresses = rdd.take(100); return null; }});
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
public Void call(JavaRDD<ApacheAccessLog> accessLogs) { Tuple4<Long, Long, Long, Long> contentSizeStats = Functions.contentSizeStats(accessLogs); List<Tuple2<Integer, Long>> responseCodeToCount = Functions.responseCodeCount(accessLogs) .take(100); JavaPairRDD<String, Long> ipAddressCounts = Functions.ipAddressCount(accessLogs); List<String> ip = Functions.filterIPAddress(ipAddressCounts) .take(100); Object ordering = Ordering.natural(); Comparator<Long> cmp = (Comparator<Long>)ordering; List<Tuple2<String, Long>> topEndpoints = Functions.endpointCount(accessLogs) .top(10, new Functions.ValueComparator<String, Long>(cmp)); logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount, ip, topEndpoints); return null; }}); }
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
static List<Vector> getInitialCentroids(JavaRDD<Vector> data, final int K) { List<Vector> centroidTuples = data.take(K); final List<Vector> centroids = new ArrayList<Vector>(); for (Vector t : centroidTuples) { centroids.add(t); } return centroids; }
@Test public void take() { JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); assertEquals(1, rdd.first().intValue()); rdd.take(2); rdd.takeSample(false, 2, 42); }
List<List<Writable>> sample = processed.take(10);
@Override public Iterator<String> head(final String location, final int totalLines) { return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString); }
static List<Vector> getInitialCentroids(JavaRDD<Vector> data, final int K) { List<Vector> centroidTuples = data.take(K); final List<Vector> centroids = new ArrayList<>(); centroids.addAll(centroidTuples); return centroids; } }
@Override public Void call(JavaRDD<Long> rdd) throws Exception { Long num = rdd.take(1).get(0); String ts = sdf.format(new Date()); Put put = new Put(Bytes.toBytes(ts)); put.add(Bytes.toBytes("f"), Bytes.toBytes("nums"), Bytes.toBytes(num)); table.put(put); return null; } });
@Override public List<T> take(int n) { Preconditions.checkArgument(n >= 0, "N must be non-negative."); if (n == 0) { return Collections.emptyList(); } return rdd.take(n); }
@Override public Void call(JavaRDD<Long> rdd) throws Exception { Long num = rdd.take(1).get(0); String ts = sdf.format(new Date()); Put put = new Put(Bytes.toBytes(ts)); put.add(Bytes.toBytes("f1"), Bytes.toBytes("nums"), Bytes.toBytes(num)); table.put(put); return null; } });
public static SpatialRDD<Geometry> createSpatialRDD(JavaRDD rawTextRDD, FormatMapper<Geometry> formatMapper) { SpatialRDD spatialRDD = new SpatialRDD<Geometry>(); spatialRDD.rawSpatialRDD = rawTextRDD.mapPartitions(formatMapper); spatialRDD.fieldNames = formatMapper.readPropertyNames(rawTextRDD.take(1).get(0).toString()); return spatialRDD; } }
@Override public SpashCollection<String> head(Path file, int lines) { URI uri = SpashFileSystem.get().getURI(file.normalize().toString()); JavaRDD<String> rdd = sc.textFile(uri.toString()); List<String> stream = rdd.take(lines); return new SpashCollectionListAdapter<>(stream); }
@Override public Iterator<String> head(final String location, final int totalLines) { return IteratorUtils.map(Spark.getRDD(location).toJavaRDD().take(totalLines).iterator(), Object::toString); }
/** * Instantiates a new point RDD. * * @param sparkContext the spark context * @param InputLocation the input location * @param Offset the offset * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions * @param newLevel the new level * @param sourceEpsgCRSCode the source epsg CRS code * @param targetEpsgCode the target epsg code */ public PointRDD(JavaSparkContext sparkContext, String InputLocation, Integer Offset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (Offset != null) {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(Offset, splitter, carryInputData)));} else {this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PointFormatMapper(splitter, carryInputData)));} if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } }
/** * Instantiates a new line string RDD. * * @param sparkContext the spark context * @param InputLocation the input location * @param startOffset the start offset * @param endOffset the end offset * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions * @param newLevel the new level * @param sourceEpsgCRSCode the source epsg CRS code * @param targetEpsgCode the target epsg code */ public LineStringRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (startOffset != null && endOffset != null) { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(startOffset, endOffset, splitter, carryInputData))); } else { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new LineStringFormatMapper(splitter, carryInputData))); } if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } }
/** * Instantiates a new polygon RDD. * * @param sparkContext the spark context * @param InputLocation the input location * @param startOffset the start offset * @param endOffset the end offset * @param splitter the splitter * @param carryInputData the carry input data * @param partitions the partitions * @param newLevel the new level * @param sourceEpsgCRSCode the source epsg CRS code * @param targetEpsgCode the target epsg code */ public PolygonRDD(JavaSparkContext sparkContext, String InputLocation, Integer startOffset, Integer endOffset, FileDataSplitter splitter, boolean carryInputData, Integer partitions, StorageLevel newLevel, String sourceEpsgCRSCode, String targetEpsgCode) { JavaRDD rawTextRDD = partitions != null ? sparkContext.textFile(InputLocation, partitions) : sparkContext.textFile(InputLocation); if (startOffset != null && endOffset != null) { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(startOffset, endOffset, splitter, carryInputData))); } else { this.setRawSpatialRDD(rawTextRDD.mapPartitions(new PolygonFormatMapper(splitter, carryInputData))); } if (sourceEpsgCRSCode != null && targetEpsgCode != null) { this.CRSTransform(sourceEpsgCRSCode, targetEpsgCode);} if (newLevel != null) { this.analyze(newLevel);} if (splitter.equals(FileDataSplitter.GEOJSON)) { this.fieldNames = FormatMapper.readGeoJsonPropertyNames(rawTextRDD.take(1).get(0).toString()); } }
/** * Build a ResultSet from a RDD depending the context. * * @param rdd RDD which corresponds to Spark result. * @param isRoot Indicates if this node is root in this plan. * @param isCount Indicates if this query have a COUNT clause. * @param selectedCols List of columns selected in current SelectStatement. * @return ResultSet containing the result of built. */ private ResultSet returnResult(JavaRDD<Cells> rdd, boolean isRoot, boolean isCount, List<String> selectedCols) { if (isRoot) { if (isCount) { return DeepUtils.buildCountResult(rdd); } return DeepUtils.buildResultSet(rdd.take(DEFAULT_RESULT_SIZE), selectedCols); } else { CassandraResultSet crs = new CassandraResultSet(); crs.add(new Row("RDD", new Cell(rdd))); List<ColumnMetadata> columns = new ArrayList<>(); ColumnMetadata metadata = new ColumnMetadata("RDD", "RDD"); ColumnType type = ColumnType.VARCHAR; type.setDBMapping("class", JavaRDD.class); metadata.setType(type); crs.setColumnMetadata(columns); LOG.info("LEAF: rdd.count=" + ((int) rdd.count())); return crs; } }
WriteStatus status = result.take(1).get(0); Path origPath = new Path(basePath + "/" + status.getStat().getPath()); Path hidePath = new Path(basePath + "/" + status.getStat().getPath() + "_hide");