public static PGroupedTable<TupleN, ByteBuffer> apply(PCollection<ByteBuffer> traces, List<String> keys) { Field[] fields = new Field[keys.size()]; boolean[] negate = new boolean[keys.size()]; for (int i = 0; i < keys.size(); i++) { String key = keys.get(i); if (key.charAt(0) == '-') { negate[i] = true; key = key.substring(1); } fields[i] = Fields.getSortField(key); if (fields[i] == null) { throw new IllegalArgumentException("Unrecognized susort key: " + keys.get(i)); } } PTypeFamily tf = traces.getTypeFamily(); PType[] headerTypes = new PType[keys.size()]; for (int i = 0; i < keys.size(); i++) { headerTypes[i] = tf.ints(); } GroupingOptions options = GroupingOptions.builder() .partitionerClass(JoinUtils.getPartitionerClass(tf)).build(); return traces.parallelDo("gethw", new HeaderExtractor(fields, negate), tf.tableOf(tf.tuples(headerTypes), tf.bytes())).groupByKey(options); } }
writePartitionInfo(conf, partitionFile, splitPoints); GroupingOptions options = GroupingOptions.builder() .partitionerClass(TotalOrderPartitioner.class) .sortComparatorClass(KeyValueComparator.class) .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString())
writePartitionInfo(conf, partitionFile, splitPoints); GroupingOptions options = GroupingOptions.builder() .partitionerClass(TotalOrderPartitioner.class) .sortComparatorClass(KeyValueComparator.class) .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString())
static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right, int numReducers) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.requireSortedKeys(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); if (numReducers > 0) { optionsBuilder.numReducers(numReducers); } return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); }
private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); } }
private static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin( PTable<K, U> left, PTable<K, V> right) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); } }
private static <K, V> void configureReducers(GroupingOptions.Builder builder, PTable<K, V> ptable, Configuration conf, int numReducers) { if (numReducers <= 0) { numReducers = PartitionUtils.getRecommendedPartitions(ptable, conf); if (numReducers < 5) { // Not worth the overhead, force it to 1 numReducers = 1; } } builder.numReducers(numReducers); if (numReducers > 1) { Iterable<K> iter = Sample.reservoirSample(ptable.keys(), numReducers - 1).materialize(); MaterializableIterable<K> mi = (MaterializableIterable<K>) iter; if (mi.isSourceTarget()) { builder.sourceTargets((SourceTarget) mi.getSource()); } builder.partitionerClass(TotalOrderPartitioner.class); builder.conf(TotalOrderPartitioner.PARTITIONER_PATH, mi.getPath().toString()); //TODO: distcache handling } }
private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare( PTable<K, Pair<V1, V2>> input) { PTypeFamily ptf = input.getTypeFamily(); PType<Pair<V1, V2>> valueType = input.getValueType(); PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf( ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)), valueType); PTableType<K, Collection<Pair<V1, V2>>> out = ptf.tableOf(input.getKeyType(), ptf.collections(input.getValueType())); return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter) .groupByKey( GroupingOptions.builder() .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf)) .partitionerClass(JoinUtils.getPartitionerClass(ptf)) .build()); }
private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare( PTable<K, Pair<V1, V2>> input, int numReducers) { PTypeFamily ptf = input.getTypeFamily(); PType<Pair<V1, V2>> valueType = input.getValueType(); PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf( ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)), valueType); GroupingOptions.Builder gob = GroupingOptions.builder() .requireSortedKeys() .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf)) .partitionerClass(JoinUtils.getPartitionerClass(ptf)); if (numReducers > 0) { gob.numReducers(numReducers); } return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter) .groupByKey(gob.build()); }