/** * It is used for filtering out tupleEntries. If it returns true then tupleEntry will be * filtered out. It will have same set of keys as defined by corresponding ForkFunction. * * @param forkData : forkData to be filtered out or retained. */ protected Boolean execute(final ForkData<DI> forkData) { return forkData.getKeys().contains(this.filterKey); } }
public JavaRDD<DI> getRDD(final FilterFunction<DI> filterFunction) { if (!this.groupRDD.isPresent()) { throw new ForkOperationException("No RDD is found"); } return this.groupRDD.get().filter(filterFunction).map(record -> record.getRecord()); }
@Override protected List<ForkData<String>> process(final String record) { final List<Integer> keys = new LinkedList<>(); if (record.toLowerCase().contains("error")) { // errors. keys.add(INVALID_KEY); } else { if (record.toLowerCase().contains("delete")) { // deleted records. keys.add(DELETE_KEY); } // all no error records are valid. keys.add(VALID_KEY); } return Collections.singletonList(new ForkData<>(keys, record)); } }
@Override public final Iterator<ForkData<DI>> call(final DI di) { final List<ForkData<DI>> forkData = process(di); forkData.stream().forEach(fd -> verifyKeys(fd.getKeys(), di)); return forkData.iterator(); }
@Override protected List<ForkData<String>> process(final String record) { final List<ForkData<String>> forkData = super.process(record); // Add invalid keys. forkData.stream().forEach(fd -> fd.getKeys().add(UNREGISTERED_KEY)); return forkData; } }