private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf, int numReducers, Order order) { PType<K> ptype = ptable.getKeyType(); PTypeFamily tf = ptable.getTypeFamily(); GroupingOptions.Builder builder = GroupingOptions.builder(); if (order == Order.DESCENDING) { if (tf == WritableTypeFamily.getInstance()) { builder.sortComparatorClass(ReverseWritableComparator.class); } else if (tf == AvroTypeFamily.getInstance()) { AvroType<K> avroType = (AvroType<K>) ptype; Schema schema = avroType.getSchema(); builder.conf("crunch.schema", schema.toString()); builder.sortComparatorClass(ReverseAvroComparator.class); } else { throw new RuntimeException("Unrecognized type family: " + tf); } } else if (tf == AvroTypeFamily.getInstance()) { builder.conf("crunch.schema", ((AvroType<K>) ptype).getSchema().toString()); } builder.requireSortedKeys(); configureReducers(builder, ptable, conf, numReducers); return builder.build(); }
private static <K, V> GroupingOptions buildGroupingOptions(PTable<K, V> ptable, Configuration conf, int numReducers, ColumnOrder[] columnOrders) { PTypeFamily tf = ptable.getTypeFamily(); PType<K> keyType = ptable.getKeyType(); GroupingOptions.Builder builder = GroupingOptions.builder(); if (tf == WritableTypeFamily.getInstance()) { if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) { builder.sortComparatorClass(ReverseWritableComparator.class); } else { WritableType[] wt = new WritableType[columnOrders.length]; for (int i = 0; i < wt.length; i++) { wt[i] = (WritableType) keyType.getSubTypes().get(i); } TupleWritableComparator.configureOrdering(conf, wt, columnOrders); builder.sortComparatorClass(TupleWritableComparator.class); } } else if (tf == AvroTypeFamily.getInstance()) { AvroType<K> avroType = (AvroType<K>) keyType; Schema schema = avroType.getSchema(); builder.conf("crunch.schema", schema.toString()); if (columnOrders.length == 1 && columnOrders[0].order == Order.DESCENDING) { builder.sortComparatorClass(ReverseAvroComparator.class); } } else { throw new RuntimeException("Unrecognized type family: " + tf); } builder.requireSortedKeys(); configureReducers(builder, ptable, conf, numReducers); return builder.build(); }
.partitionerClass(TotalOrderPartitioner.class) .sortComparatorClass(KeyValueComparator.class) .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()) .numReducers(splitPoints.size() + 1) .build();
.partitionerClass(TotalOrderPartitioner.class) .sortComparatorClass(KeyValueComparator.class) .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()) .numReducers(splitPoints.size() + 1) .build();
private static <K, V> void configureReducers(GroupingOptions.Builder builder, PTable<K, V> ptable, Configuration conf, int numReducers) { if (numReducers <= 0) { numReducers = PartitionUtils.getRecommendedPartitions(ptable, conf); if (numReducers < 5) { // Not worth the overhead, force it to 1 numReducers = 1; } } builder.numReducers(numReducers); if (numReducers > 1) { Iterable<K> iter = Sample.reservoirSample(ptable.keys(), numReducers - 1).materialize(); MaterializableIterable<K> mi = (MaterializableIterable<K>) iter; if (mi.isSourceTarget()) { builder.sourceTargets((SourceTarget) mi.getSource()); } builder.partitionerClass(TotalOrderPartitioner.class); builder.conf(TotalOrderPartitioner.PARTITIONER_PATH, mi.getPath().toString()); //TODO: distcache handling } }