private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf, int numReducers, Order order) { PType<K> ptype = ptable.getKeyType(); PTypeFamily tf = ptable.getTypeFamily(); GroupingOptions.Builder builder = GroupingOptions.builder(); if (order == Order.DESCENDING) { if (tf == WritableTypeFamily.getInstance()) { builder.sortComparatorClass(ReverseWritableComparator.class); } else if (tf == AvroTypeFamily.getInstance()) { AvroType<K> avroType = (AvroType<K>) ptype; Schema schema = avroType.getSchema(); builder.conf("crunch.schema", schema.toString()); builder.sortComparatorClass(ReverseAvroComparator.class); } else { throw new RuntimeException("Unrecognized type family: " + tf); } } else if (tf == AvroTypeFamily.getInstance()) { builder.conf("crunch.schema", ((AvroType<K>) ptype).getSchema().toString()); } builder.requireSortedKeys(); configureReducers(builder, ptable, conf, numReducers); return builder.build(); }
private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf, int numReducers, ColumnOrder[] columnOrders) { PTypeFamily tf = ptable.getTypeFamily(); PType<K> keyType = ptable.getKeyType(); GroupingOptions.Builder builder = GroupingOptions.builder(); if (tf == WritableTypeFamily.getInstance()) { if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) { builder.sortComparatorClass(ReverseWritableComparator.class); } else { WritableType[] wt = new WritableType[columnOrders.length]; for (int i = 0; i < wt.length; i++) { wt[i] = (WritableType) keyType.getSubTypes().get(i); } TupleWritableComparator.configureOrdering(conf, wt, columnOrders); builder.sortComparatorClass(TupleWritableComparator.class); } } else if (tf == AvroTypeFamily.getInstance()) { AvroType<K> avroType = (AvroType<K>) keyType; Schema schema = avroType.getSchema(); builder.conf("crunch.schema", schema.toString()); if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) { builder.sortComparatorClass(ReverseAvroComparator.class); } } else { throw new RuntimeException("Unrecognized type family: " + tf); } builder.requireSortedKeys(); configureReducers(builder, ptable, conf, numReducers); return builder.build(); }
private static <K, V1, V2> PGroupedTable<Pair<K, V1>, Pair<V1, V2>> prepare( PTable<K, Pair<V1, V2>> input, int numReducers) { PTypeFamily ptf = input.getTypeFamily(); PType<Pair<V1, V2>> valueType = input.getValueType(); PTableType<Pair<K, V1>, Pair<V1, V2>> inter = ptf.tableOf( ptf.pairs(input.getKeyType(), valueType.getSubTypes().get(0)), valueType); GroupingOptions.Builder gob = GroupingOptions.builder() .requireSortedKeys() .groupingComparatorClass(JoinUtils.getGroupingComparator(ptf)) .partitionerClass(JoinUtils.getPartitionerClass(ptf)); if (numReducers > 0) { gob.numReducers(numReducers); } return input.parallelDo("SecondarySort.format", new SSFormatFn<K, V1, V2>(), inter) .groupByKey(gob.build()); }
static <K, U, V> PGroupedTable<Pair<K, Integer>, Pair<U, V>> preJoin(PTable<K, U> left, PTable<K, V> right, int numReducers) { PTypeFamily ptf = left.getTypeFamily(); PTableType<Pair<K, Integer>, Pair<U, V>> ptt = ptf.tableOf(ptf.pairs(left.getKeyType(), ptf.ints()), ptf.pairs(left.getValueType(), right.getValueType())); PTable<Pair<K, Integer>, Pair<U, V>> tag1 = left.parallelDo("joinTagLeft", new MapFn<Pair<K, U>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, U> input) { return Pair.of(Pair.of(input.first(), 0), Pair.of(input.second(), (V) null)); } }, ptt); PTable<Pair<K, Integer>, Pair<U, V>> tag2 = right.parallelDo("joinTagRight", new MapFn<Pair<K, V>, Pair<Pair<K, Integer>, Pair<U, V>>>() { @Override public Pair<Pair<K, Integer>, Pair<U, V>> map(Pair<K, V> input) { return Pair.of(Pair.of(input.first(), 1), Pair.of((U) null, input.second())); } }, ptt); GroupingOptions.Builder optionsBuilder = GroupingOptions.builder(); optionsBuilder.requireSortedKeys(); optionsBuilder.partitionerClass(JoinUtils.getPartitionerClass(ptf)); if (numReducers > 0) { optionsBuilder.numReducers(numReducers); } return (tag1.union(tag2)).groupByKey(optionsBuilder.build()); }