@SuppressWarnings("deprecation") static List<FieldSchema> partitionColumns(PartitionStrategy strategy, Schema schema) { List<FieldSchema> columns = Lists.newArrayList(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { columns.add(new FieldSchema(fp.getName(), getHiveType(SchemaUtil.getPartitionType(fp, schema)), "Partition column derived from '" + fp.getSourceName() + "' column, " + "generated by Kite.")); } return columns; }
private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), fp.getType()); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert( values.get(fp.getSourceName()), fp.getSourceType())); } else { throw new IllegalStateException( "Cannot create StorageKey, missing data for field:" + fp.getName()); } } }
return String.format("hash(\"%s\", \"%s\", %s)", fieldPartitioner.getSourceName(), fieldPartitioner.getName(), fieldPartitioner.getCardinality()); } else if (fieldPartitioner instanceof IdentityFieldPartitioner) { return String.format("identity(\"%s\", \"%s\", \"%s\", %s)", fieldPartitioner.getSourceName(), fieldPartitioner.getName(), fieldPartitioner.getType().getName(), fieldPartitioner.getCardinality()); } else if (fieldPartitioner instanceof RangeFieldPartitioner) { List<String> upperBounds = ((RangeFieldPartitioner) fieldPartitioner) fieldPartitioner.getSourceName(), fieldPartitioner.getName(), builder.toString()); } else if (fieldPartitioner instanceof IntRangeFieldPartitioner) { fieldPartitioner.getSourceName(), fieldPartitioner.getName(), builder.toString()); } else if (fieldPartitioner instanceof LongFixedSizeRangeFieldPartitioner) { return String.format("fixedSizeRange(\"%s\", \"%s\", %s)", fieldPartitioner.getSourceName(), fieldPartitioner.getName(), ((LongFixedSizeRangeFieldPartitioner) fieldPartitioner).getSize()); } else if (fieldPartitioner instanceof DateFormatPartitioner) { return String.format("dateFormat(\"%s\", \"%s\", \"%s\")", fieldPartitioner.getSourceName(), fieldPartitioner.getName(), ((DateFormatPartitioner) fieldPartitioner).getPattern()); } else if (fieldPartitioner instanceof YearFieldPartitioner) { return String.format("year(\"%s\", \"%s\")", fieldPartitioner.getSourceName(), fieldPartitioner.getName()); } else if (fieldPartitioner instanceof MonthFieldPartitioner) {
@SuppressWarnings("unchecked") public static <S, T> Class<? extends T> getPartitionType(FieldPartitioner<S, T> fp, Schema schema) { if (fp instanceof ProvidedFieldPartitioner) { // provided partitioners have no source field schema return fp.getType(); } Class<? extends S> inputType = (Class<S>) getClassForType( fieldSchema(schema, fp.getSourceName()).getType()); return fp.getType(inputType); }
@Test public void testIdentityFieldPartitionerRangePredicate() { FieldPartitioner<String, String> fp = new IdentityFieldPartitioner<String>("str", "str_copy", String.class, 50); Range<String> r = Ranges.openClosed("a", "b"); Assert.assertEquals(r, fp.project(r)); Assert.assertEquals(r, fp.projectStrict(r)); }
@SuppressWarnings("unchecked") private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), SchemaUtil.getPartitionType(fp, schema)); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert(values.get(fp.getSourceName()), SchemaUtil.getSourceType(fp, schema))); } else { throw new IllegalStateException( "Cannot create Key, missing data for field:" + fp.getName()); } } }
for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { ObjectNode partitioner = JsonNodeFactory.instance.objectNode(); partitioner.set(NAME, TextNode.valueOf(fp.getName())); if (fp instanceof IdentityFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("identity")); } else if (fp instanceof HashFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("hash")); partitioner.set(BUCKETS, LongNode.valueOf(fp.getCardinality())); } else if (fp instanceof LongFixedSizeRangeFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("range")); partitioner.set(SIZE, LongNode.valueOf(((LongFixedSizeRangeFieldPartitioner) fp).getSize())); } else if (fp instanceof YearFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("year")); } else if (fp instanceof MonthFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("month")); } else if (fp instanceof DayOfMonthFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("day")); } else if (fp instanceof HourFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("hour")); } else if (fp instanceof MinuteFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName()));
Set<String> partitionFields = Sets.newHashSet(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { partitioners.put(fp.getSourceName(), fp); partitionFields.add(fp.getName()); break; } else { Predicate strict = fp.projectStrict(predicate); Predicate permissive = fp.project(predicate); if (strict != null && strict.equals(permissive)) { satisfied = true;
@SuppressWarnings("deprecation") private static Collection<String[]> requiredFields(@Nullable PartitionStrategy strategy) { if (strategy == null) { return NO_REQUIRED_FIELDS; } List<String[]> requiredFields = Lists.newArrayList(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { // source name is not present for provided partitioners if (fp.getSourceName() != null) { requiredFields.add(fp.getSourceName().split("\\.")); } } return requiredFields; } }
for (int i = 0; i < preds.length; i += 1) { FieldPartitioner fp = partitioners.get(i); Predicate sourcePredicate = predicates.get(fp.getSourceName()); if (sourcePredicate != null) { Predicate projectedPredicate = fp.project(sourcePredicate); if (projectedPredicate != null) { preds[i] = projectedPredicate; timeFields.put(fp.getSourceName(), sourcePredicate); Predicate partitionPredicate = predicates.get(fp.getName()); if (preds[i] != null) { if (partitionPredicate != null) {
int i = 0; for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { String partition = fp.getName(); Predicate partitionPredicate = unsatisfied.get(partition); if (partitionPredicate != null && partitionPredicate.apply(key.get(i))) { String source = fp.getSourceName(); if (fp instanceof CalendarFieldPartitioner) { Predicate isSatisfiedBy = fp.projectStrict(original); LOG.debug("original: " + original + ", strict: " + isSatisfiedBy); if ((isSatisfiedBy != null) && isSatisfiedBy.apply(key.get(i))) {
/** * Builds a Schema for the FieldPartitioner using the given Schema to * determine types not fixed by the FieldPartitioner. * * @param fp a FieldPartitioner * @param schema an entity Schema that will be partitioned * @return a Schema for the field partitioner */ public static Schema partitionFieldSchema(FieldPartitioner<?, ?> fp, Schema schema) { if (fp instanceof IdentityFieldPartitioner) { // copy the schema directly from the entity to preserve annotations return fieldSchema(schema, fp.getSourceName()); } else { Class<?> fieldType = getPartitionType(fp, schema); if (fieldType == Integer.class) { return Schema.create(Schema.Type.INT); } else if (fieldType == Long.class) { return Schema.create(Schema.Type.LONG); } else if (fieldType == String.class) { return Schema.create(Schema.Type.STRING); } else { throw new ValidationException( "Cannot encode partition " + fp.getName() + " with type " + fp.getSourceType() ); } } }
@Test public void test() throws Exception { final PartitionStrategy p = new PartitionStrategy.Builder() .identity("month", "month_ordinal", 12) .hash("userId", 7) .build(); List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners(); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); assertEquals("month_ordinal", fp0.getName()); assertEquals(12, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); assertEquals("userId_hash", fp1.getName()); assertEquals(7, fp1.getCardinality()); assertEquals(12 * 7, p.getCardinality()); // useful for writers }
List<FieldPartitioner> compatible = Lists.newArrayList(); for (FieldPartitioner fp : fps) { Predicate<?> projected = fp.project(constraint); if (projected instanceof Range) { current = addProjected(projected, fp.getName(), current); } else if (projected instanceof In) { compatible.add(fp); } else if (compatible.size() == 1) { FieldPartitioner fp = compatible.get(0); return addProjected(fp.project(constraint), fp.getName(), current); } else { return new SetGroupIterator(constraint, compatible, current);
/** * <p> * Construct a partition key for the given entity, reusing the supplied key if * not null. * </p> * <p> * This is a convenient way to find the partition that a given entity is * written to, or to find a partition using objects from the entity domain. * </p> */ @SuppressWarnings("unchecked") public static <E> PartitionKey partitionKeyForEntity(PartitionStrategy strategy, E entity, EntityAccessor<E> accessor, @Nullable PartitionKey reuseKey) { List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); PartitionKey key = (reuseKey == null ? new PartitionKey(new Object[fieldPartitioners.size()]) : reuseKey); for (int i = 0; i < fieldPartitioners.size(); i++) { FieldPartitioner fp = fieldPartitioners.get(i); key.set(i, fp.apply(accessor.get(entity, fp.getSourceName()))); } return key; }
@Test @SuppressWarnings("unchecked") public void testHashFieldPartitionerSetPredicate() { FieldPartitioner<Object, Integer> fp = new HashFieldPartitioner("name", 50); Assert.assertEquals(Predicates.in(fp.apply("a"), fp.apply("b")), fp.project((Predicate)Predicates.in("a", "b"))); // the set of inputs that result in a particular value is not closed Assert.assertNull(fp.projectStrict((Predicate) Predicates.in("a"))); }
public static void checkStrategyUpdate(PartitionStrategy existing, PartitionStrategy other, Schema schema) { List<FieldPartitioner> existingFields = Accessor.getDefault() .getFieldPartitioners(existing); List<FieldPartitioner> otherFields = Accessor.getDefault() .getFieldPartitioners(other); ValidationException.check(existingFields.size() == otherFields.size(), "Not compatible: cannot replace %s partitioners with %s partitioners", existingFields.size(), otherFields.size()); for (int i = 0; i < existingFields.size(); i += 1) { FieldPartitioner fp = existingFields.get(i); FieldPartitioner replacement = otherFields.get(i); if (fp.equals(replacement)) { continue; } ValidationException.check(fp instanceof ProvidedFieldPartitioner, "Cannot replace partition %s: not a provided partitioner", fp.getName()); ValidationException.check(fp.getName().equals(replacement.getName()), "Cannot change the name of partition %s (to %s)", fp.getName(), replacement.getName()); Class<?> outputType = SchemaUtil.getPartitionType(replacement, schema); ValidationException.check( isCompatibleWithProvidedType(fp.getType(), outputType), "Cannot change the data type of partition %s", fp.getName()); } }
private static void checkPartitionStrategy( Schema schema, @Nullable PartitionStrategy strategy) { if (strategy == null) { return; } for (FieldPartitioner fp : strategy.getFieldPartitioners()) { if (fp instanceof ProvidedFieldPartitioner) { // provided partitioners are not based on the entity fields continue; } // check the entity is a record if there is a non-provided partitioner ValidationException.check(schema.getType() == Schema.Type.RECORD, "Cannot partition non-records: %s", schema); // the source name should be a field in the schema, but not necessarily // the record. Schema fieldSchema; try { fieldSchema = SchemaUtil.fieldSchema(schema, fp.getSourceName()); } catch (IllegalArgumentException e) { throw new ValidationException( "Cannot partition on " + fp.getSourceName(), e); } ValidationException.check( SchemaUtil.isConsistentWithExpectedType( fieldSchema.getType(), fp.getSourceType()), "Field type %s does not match partitioner %s", fieldSchema.getType(), fp); } } }