pmmlToMFModel(sparkContext, model, modelParentPath, bUserIDToIndex, bItemIDToIndex); parsedTestRDD.unpersist();
bItemIDToIndex.unpersist(); parsedRDD.unpersist();
public void close() { if (this.groupRDD.isPresent()) { this.groupRDD.get().unpersist(); this.groupRDD = Optional.absent(); } } }
@Override public void close() throws IOException { this.rdd.unpersist(); if (onClose != null) { onClose.run(); } }
train_samples.unpersist(); System.out.println("Start Learning..."); Date startTime = new Date();
train_samples.unpersist(); System.out.println("Start Learning..."); Date startTime = new Date();
@Override public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc, HoodieTable<T> hoodieTable) { // Step 0: cache the input record RDD if (config.getBloomIndexUseCaching()) { recordRDD.persist(config.getBloomIndexInputStorageLevel()); } // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey) JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey())); // Lookup indexes for all the partition/recordkey pair JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable); // Cache the result, for subsequent stages. if (config.getBloomIndexUseCaching()) { rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER()); } if (logger.isDebugEnabled()) { long totalTaggedRecords = rowKeyFilenamePairRDD.count(); logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords); } // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys // Cost: 4 sec. JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD, recordRDD); if (config.getBloomIndexUseCaching()) { recordRDD.unpersist(); // unpersist the input Record RDD rowKeyFilenamePairRDD.unpersist(); } return taggedRecordRDD; }
@Override public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc, HoodieTable<T> hoodieTable) { // Step 0: cache the input record RDD if (config.getBloomIndexUseCaching()) { recordRDD.persist(config.getBloomIndexInputStorageLevel()); } // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey) JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey())); // Lookup indexes for all the partition/recordkey pair JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable); // Cache the result, for subsequent stages. if (config.getBloomIndexUseCaching()) { rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER()); } if (logger.isDebugEnabled()) { long totalTaggedRecords = rowKeyFilenamePairRDD.count(); logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords); } // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys // Cost: 4 sec. JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD, recordRDD); if (config.getBloomIndexUseCaching()) { recordRDD.unpersist(); // unpersist the input Record RDD rowKeyFilenamePairRDD.unpersist(); } return taggedRecordRDD; }
}).cache(); samples.unpersist();
}).cache(); samples.unpersist();
hist = drdd.histogram(maxHistogramBuckets); seqLengths.unpersist();
hist = drdd.histogram(maxHistogramBuckets); seqLengths.unpersist();