/** {@inheritDoc */ default LTable<K, V> union(PTable<K, V> other) { return factory().wrap(underlying().union(other)); } }
/** * The method will take an input path and generates BloomFilters for all text * files in that path. The method return back a {@link PObject} containing a * {@link Map} having file names as keys and filters as values */ public static PObject<Map<String, BloomFilter>> createFilter(Path inputPath, BloomFilterFn<String> filterFn) throws IOException { MRPipeline pipeline = new MRPipeline(BloomFilterFactory.class); FileStatus[] listStatus = FileSystem.get(pipeline.getConfiguration()).listStatus(inputPath); PTable<String, BloomFilter> filterTable = null; for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); PCollection<String> readTextFile = pipeline.readTextFile(path.toString()); pipeline.getConfiguration().set(BloomFilterFn.CRUNCH_FILTER_NAME, path.getName()); PTable<String, BloomFilter> currentTable = createFilterTable(readTextFile, filterFn); if (filterTable != null) { filterTable = filterTable.union(currentTable); } else { filterTable = currentTable; } } return filterTable.asMap(); }
/** {@inheritDoc */ default LTable<K, V> union(LTable<K, V> other) { return factory().wrap(underlying().union(other.underlying())); }
PTable<K, Union> union = firstInter.union(inter); PGroupedTable<K, Union> grouped; if (numReducers > 0) {
private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); } }
private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin( PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); } }
static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right, int numReducers) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.requireSortedKeys(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); if (numReducers > 0) { optionsBuilder.numReducers(numReducers); } return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); }
/** * Co-groups the two {@link PTable} arguments. * * @return a {@code PTable} representing the co-grouped tables. */ public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PType<K> keyType = left.getPTableType().getKeyType(); PType<U> leftType = left.getPTableType().getValueType(); PType<V> rightType = right.getPTableType().getValueType(); PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType); PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(), ptf.tableOf(keyType, itype)); PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(), ptf.tableOf(keyType, itype)); PTable<K, Pair<U, V>> both = cgLeft.union(cgRight); PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs(ptf.collections(leftType), ptf.collections(rightType)); return both.groupByKey().parallelDo("cogroup", new PostGroupFn<K, U, V>(), ptf.tableOf(keyType, otype)); }
/** * Co-groups the two {@link PTable} arguments. * * @return a {@code PTable} representing the co-grouped tables. */ public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup( PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PType<K> keyType = left.getPTableType().getKeyType(); PType<U> leftType = left.getPTableType().getValueType(); PType<V> rightType = right.getPTableType().getValueType(); PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType); PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(), ptf.tableOf(keyType, itype)); PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(), ptf.tableOf(keyType, itype)); PTable<K, Pair<U, V>> both = cgLeft.union(cgRight); PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs( ptf.collections(leftType), ptf.collections(rightType)); return both.groupByKey().parallelDo("cogroup", new PostGroupFn<K, U, V>(), ptf.tableOf(keyType, otype)); }
return leftJoinedWithFilteredRight.union( right .parallelDo(