private void buildProfile() { Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords .mapToPair(record -> new Tuple2<>( new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())), record)).countByKey(); for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts .entrySet()) { String partitionPath = e.getKey()._1(); Long count = e.getValue(); Option<HoodieRecordLocation> locOption = e.getKey()._2(); if (!partitionPathStatMap.containsKey(partitionPath)) { partitionPathStatMap.put(partitionPath, new WorkloadStat()); } if (locOption.isDefined()) { // update partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count); globalStat.addUpdates(locOption.get(), count); } else { // insert partitionPathStatMap.get(partitionPath).addInserts(count); globalStat.addInserts(count); } } }
public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) { this.taggedRecords = taggedRecords; this.partitionPathStatMap = new HashMap<>(); this.globalStat = new WorkloadStat(); buildProfile(); }
profile.getPartitionPaths().stream().forEach(path -> { WorkloadStat partitionStat = profile.getWorkloadStat(path.toString()); partitionStat.getUpdateLocationToCount().entrySet().stream().forEach(entry -> { HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(entry.getKey());
public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) { this.taggedRecords = taggedRecords; this.partitionPathStatMap = new HashMap<>(); this.globalStat = new WorkloadStat(); buildProfile(); }
profile.getPartitionPaths().stream().forEach(path -> { WorkloadStat partitionStat = profile.getWorkloadStat(path.toString()); partitionStat.getUpdateLocationToCount().entrySet().stream().forEach(entry -> { HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(entry.getKey());
private void buildProfile() { Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords .mapToPair(record -> new Tuple2<>( new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())), record)).countByKey(); for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts .entrySet()) { String partitionPath = e.getKey()._1(); Long count = e.getValue(); Option<HoodieRecordLocation> locOption = e.getKey()._2(); if (!partitionPathStatMap.containsKey(partitionPath)) { partitionPathStatMap.put(partitionPath, new WorkloadStat()); } if (locOption.isDefined()) { // update partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count); globalStat.addUpdates(locOption.get(), count); } else { // insert partitionPathStatMap.get(partitionPath).addInserts(count); globalStat.addInserts(count); } } }