private static <S, T> Map<S, Collection<T>> createMapFor(PType<S> keyType, GroupingOptions options, Pipeline pipeline) { if (options != null && options.getSortComparatorClass() != null) { RawComparator<S> rc = ReflectionUtils.newInstance(options.getSortComparatorClass(), pipeline.getConfiguration()); return new TreeMap<S, Collection<T>>(rc); } else if (keyType != null && Comparable.class.isAssignableFrom(keyType.getTypeClass())) { return new TreeMap<S, Collection<T>>(); } return Maps.newHashMap(); }
public SparkComparator(GroupingOptions options, PGroupedTableType ptype, SparkRuntimeContext ctxt) { if (options.getSortComparatorClass() != null) { this.cmpClass = options.getSortComparatorClass(); } else if (AvroTypeFamily.getInstance().equals(ptype.getFamily())) { this.cmpClass = AvroKeyComparator.class; } else { this.cmpClass = null; } this.options = options; this.ptype = ptype; this.ctxt = ctxt; }
public SparkComparator(GroupingOptions options, PGroupedTableType ptype, SparkRuntimeContext ctxt) { if (options.getSortComparatorClass() != null) { this.cmpClass = options.getSortComparatorClass(); } else if (AvroTypeFamily.getInstance().equals(ptype.getFamily())) { this.cmpClass = AvroKeyComparator.class; } else { this.cmpClass = null; } this.options = options; this.ptype = ptype; this.ctxt = ctxt; }
public static <S, T> Shuffler<S, T> create(PType<S> keyType, GroupingOptions options, Pipeline pipeline) { Map<S, Collection<T>> map = getMapForKeyType(keyType); if (options != null) { if (Pair.class.equals(keyType.getTypeClass()) && options.getGroupingComparatorClass() != null) { PType<?> pairKey = keyType.getSubTypes().get(0); return new SecondarySortShuffler(getMapForKeyType(pairKey)); } else if (options.getSortComparatorClass() != null) { RawComparator<S> rc = ReflectionUtils.newInstance(options.getSortComparatorClass(), pipeline.getConfiguration()); map = new TreeMap<S, Collection<T>>(rc); } } return new MapShuffler<S, T>(map); }
public static <S, T> Shuffler<S, T> create(PType<S> keyType, GroupingOptions options, Pipeline pipeline) { Map<Object, Collection<T>> map = getMapForKeyType(keyType); if (options != null) { Job job; try { job = new Job(pipeline.getConfiguration()); } catch (IOException e) { throw new IllegalStateException("Could not create Job instance", e); } options.configure(job); if (Pair.class.equals(keyType.getTypeClass()) && options.getGroupingComparatorClass() != null) { PType<?> pairKey = keyType.getSubTypes().get(0); return new SecondarySortShuffler(getMapForKeyType(pairKey)); } else if (options.getSortComparatorClass() != null) { RawComparator rc = ReflectionUtils.newInstance( options.getSortComparatorClass(), job.getConfiguration()); map = new TreeMap<Object, Collection<T>>(rc); return new MapShuffler<S, T>(map, keyType); } } return new MapShuffler<S, T>(map); }
@Override public void configureShuffle(Job job, GroupingOptions options) { if (options != null) { options.configure(job); } WritableType keyType = (WritableType) tableType.getKeyType(); WritableType valueType = (WritableType) tableType.getValueType(); job.setMapOutputKeyClass(keyType.getSerializationClass()); job.setMapOutputValueClass(valueType.getSerializationClass()); if ((options == null || options.getSortComparatorClass() == null) && TupleWritable.class.equals(keyType.getSerializationClass())) { job.setSortComparatorClass(TupleWritable.Comparator.class); } } }
if (groupingOptions.requireSortedKeys() || groupingOptions.getSortComparatorClass() != null) { SparkComparator scmp = new SparkComparator(groupingOptions, ptype, runtime.getRuntimeContext()); groupedRDD = groupedRDD.sortByKey(scmp);
if (groupingOptions.requireSortedKeys() || groupingOptions.getSortComparatorClass() != null) { SparkComparator scmp = new SparkComparator(groupingOptions, ptype, runtime.getRuntimeContext()); groupedRDD = groupedRDD.sortByKey(scmp);