private SchemaElementDefinition getSchemaElementDefinition(final String group) { final SchemaElementDefinition elementDefinition = schema.getElement(group); if (null == elementDefinition) { throw new AccumuloElementConversionException("No SchemaElementDefinition found for group " + group + ", is this group in your schema or do your table iterators need updating?"); } return elementDefinition; }
private void addToGroupByMap(final String group) { final SchemaElementDefinition sed = schema.getElement(group); groupToGroupByProperties.put(group, sed.getGroupBy()); if (!aggregatedGroups.contains(group)) { groupsWithNoAggregation.add(group); } final Set<String> nonGroupByProperties = new HashSet<>(sed.getProperties()); nonGroupByProperties.removeAll(sed.getGroupBy()); groupToNonGroupByProperties.put(group, nonGroupByProperties); } }
@Override public Properties reduce(final String group, final Key key, final Iterator<Properties> iter, final Set<String> groupBy, final ElementAggregator viewAggregator) { if (!iter.hasNext()) { return new Properties(); } final Properties properties = iter.next(); if (!iter.hasNext()) { return properties; } final ElementAggregator aggregator = schema.getElement(group).getQueryAggregator(groupBy, viewAggregator); Properties aggregatedProps = properties; while (iter.hasNext()) { aggregatedProps = aggregator.apply(aggregatedProps, iter.next()); } return aggregatedProps; }
@Override public void init(final SortedKeyValueIterator<Key, Value> source, final Map<String, String> options, final IteratorEnvironment env) throws IOException { super.init(source, options, env); this.source = source; try { schema = Schema.fromJson(options.get(AccumuloStoreConstants.SCHEMA).getBytes(CommonConstants.UTF_8)); } catch (final UnsupportedEncodingException e) { throw new SchemaException("Unable to deserialise the schema", e); } LOGGER.debug("Initialising RowIDAggregator with schema {}", schema); final String elementConverterClass = options.get(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS); try { elementConverter = Class .forName(elementConverterClass) .asSubclass(AccumuloElementConverter.class) .getConstructor(Schema.class) .newInstance(schema); LOGGER.debug("Creating AccumuloElementConverter of class {}", elementConverterClass); } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { throw new AggregationException("Failed to create element converter of the class name provided (" + elementConverterClass + ")", e); } group = options.get(AccumuloStoreConstants.COLUMN_FAMILY); LOGGER.debug("group is set to {}", group); aggregator = schema.getElement(group).getFullAggregator(); LOGGER.debug("Aggregator is set to {}", aggregator); }
protected void reduceMultiValue(final KEY key, final Iterator<VALUE> iter, final VALUE firstValue, final Context context) throws IOException, InterruptedException { String group = null; try { group = getGroup(key, firstValue); final SchemaElementDefinition elementDef = schema.getElement(group); if (elementDef.isAggregate()) { Properties state; final ElementAggregator aggregator = elementDef.getIngestAggregator(); state = getValueProperties(key, firstValue, group); while (iter.hasNext()) { state = aggregator.apply(state, getValueProperties(key, iter.next(), group)); } context.write(key, createValue(key, firstValue, state, group)); } else { // The group has aggregation disabled - so write all values out. context.write(key, firstValue); while (iter.hasNext()) { context.write(key, iter.next()); } } } catch (final Exception e) { if (null == group) { group = "UNKNOWN"; } throw new RuntimeException("Failed to reduce values for group: " + group, e); } }
@Override public Tuple2<List<Object>, Element> call(final Element element) throws Exception { if (null == gafferSchema) { gafferSchema = Schema.fromJson(jsonGafferSchema); } final String group = element.getGroup(); final List<Object> list = new ArrayList<>(); list.add(group); if (gafferSchema.getEntityGroups().contains(group)) { final Entity entity = (Entity) element; list.add(entity.getVertex()); } else { final Edge edge = (Edge) element; list.add(edge.getSource()); list.add(edge.getDestination()); list.add(edge.getDirectedType()); } for (final String property : gafferSchema.getElement(group).getGroupBy()) { list.add(element.getProperty(property)); } return new Tuple2<>(list, element); } }
@Override public Element call(final Element v1, final Element v2) { if (null == gafferSchema) { gafferSchema = Schema.fromJson(jsonGafferSchema); } final ElementAggregator aggregator = gafferSchema.getElement(v2.getGroup()).getIngestAggregator(); return aggregator.apply(v1, v2); } }
private Element addNonAggElement(final Element element, final Schema schema, final MapImpl mapImpl) { final Element elementClone = element.emptyClone(); // Copy properties that exist in the schema final SchemaElementDefinition elementDef = schema.getElement(element.getGroup()); for (final String property : elementDef.getProperties()) { elementClone.putProperty(property, element.getProperty(property)); } mapImpl.addNonAggElement(elementClone); return elementClone; }
private void buildSchema() { LOGGER.info("Building Spark SQL schema for groups {}", StringUtils.join(groups, ',')); for (final String group : groups) { final SchemaElementDefinition elementDefn = schema.getElement(group); final List<StructField> structFieldList = new ArrayList<>(); if (elementDefn instanceof SchemaEntityDefinition) {
/** * Constructs an iterator over {@link Value}s whose {@link Key}s are * versions of the current topKey of the source * {@link SortedKeyValueIterator}. * * @param source The {@link SortedKeyValueIterator} of {@link Key}, * {@link Value} pairs from which to read data. * @param group the element group * @param elementConverter the elementConverter to use * @param schema the schema * @param groupBy the groupBy properties */ public KeyValueIterator(final SortedKeyValueIterator<Key, Value> source, final String group, final AccumuloElementConverter elementConverter, final Schema schema, final Set<String> groupBy) { this.source = source; this.group = group; this.elementConverter = elementConverter; final Key unsafeRef = source.getTopKey(); topKey = new Key(unsafeRef.getRow().getBytes(), unsafeRef.getColumnFamily().getBytes(), unsafeRef.getColumnQualifier().getBytes(), unsafeRef.getColumnVisibility().getBytes(), unsafeRef.getTimestamp(), unsafeRef.isDeleted(), true); schemaGroupBy = schema.getElement(this.group).getGroupBy(); this.groupBy = groupBy; hasNext = _hasNext(); }
final ElementAggregator aggregator = schema.getElement(group).getIngestAggregator(); try { properties = elementConverter.getPropertiesFromValue(group, value);
@Override public Object getProperty(final String name, final Properties lazyProperties) { if (null == eDef) { eDef = schema.getElement(group); if (null == eDef) { throw new IllegalArgumentException("Element definition for " + group + " could not be found in the schema"); } } final Properties props; if (eDef.getGroupBy().contains(name)) { props = elementConverter.getPropertiesFromColumnQualifier(group, key.getColumnQualifierData().getBackingArray()); } else if (name.equals(schema.getVisibilityProperty())) { props = elementConverter.getPropertiesFromColumnVisibility(group, key.getColumnVisibilityData().getBackingArray()); } else if (name.equals(timestampProperty)) { props = elementConverter.getPropertiesFromTimestamp(group, key.getTimestamp()); } else { props = elementConverter.getPropertiesFromValue(group, value); } lazyProperties.putAll(props); return props.get(name); } }
protected Pair<FilterPredicate, Set<Path>> buildGroupValidatorFilter(final String group) throws SerialisationException { Pair<FilterPredicate, Set<Path>> groupFilter = null; final SchemaElementDefinition schemaElementDefinition = schemaUtils.getGafferSchema().getElement(group); final List<TupleAdaptedPredicate<String, ?>> validationFunctions = schemaElementDefinition.getValidator(false).getComponents(); if (null != validationFunctions) { for (final TupleAdaptedPredicate<String, ?> filterFunctionContext : validationFunctions) { final Pair<FilterPredicate, Set<Path>> filter = buildFilter(filterFunctionContext.getPredicate(), filterFunctionContext.getSelection(), group); groupFilter = andFilter(groupFilter, filter, filterFunctionContext.getSelection().length > 1); } } return groupFilter; }
if (needsValidatorsAndFiltersApplying) { final String group = e.getGroup(); final ElementFilter validatorFilter = gafferSchema.getElement(group).getValidator(false); if (skipValidation || validatorFilter == null || validatorFilter.test(e)) { if (elementFilter == null || elementFilter.test(e)) { final ElementFilter validatorFilter = gafferSchema.getElement(group).getValidator(false); if (skipValidation || validatorFilter == null || validatorFilter.test(e)) { if (elementFilter == null || elementFilter.test(e)) {
public SortGroupSplit(final String group, final String column, final ParquetStore store, final SparkSession spark, final int splitNumber) throws SerialisationException { final String tempFileDir = store.getTempFilesDir(); this.column = column; final SchemaElementDefinition groupGafferSchema = store.getSchemaUtils().getGafferSchema().getElement(group); this.isEntity = groupGafferSchema instanceof SchemaEntityDefinition; this.spark = spark; this.columnToPaths = store.getSchemaUtils().getColumnToPaths(group); if (isEntity) { this.inputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.VERTEX, tempFileDir) + AGGREGATED + SPLIT + splitNumber; this.outputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.VERTEX, tempFileDir) + SORTED + SPLIT + splitNumber; } else { this.inputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.SOURCE, tempFileDir) + AGGREGATED + SPLIT + splitNumber; this.outputDir = ParquetStore.getGroupDirectory(group, column, tempFileDir) + SORTED + SPLIT + splitNumber; } }
this.tempFileDir = store.getTempFilesDir(); final Schema gafferSchema = store.getSchemaUtils().getGafferSchema(); final SchemaElementDefinition groupGafferSchema = gafferSchema.getElement(group); this.isEntity = groupGafferSchema instanceof SchemaEntityDefinition; final String aggregateOnIngest = store.getProperties().get(ParquetStoreProperties.PARQUET_AGGREGATE_ON_INGEST, null);
public SortFullGroup(final String group, final String column, final ParquetStore store, final SparkSession spark, final int numberOfOutputFiles) throws SerialisationException { final String tempFileDir = store.getTempFilesDir(); this.numberOfOutputFiles = numberOfOutputFiles; this.column = column; final SchemaElementDefinition groupGafferSchema = store.getSchemaUtils().getGafferSchema().getElement(group); this.isEntity = groupGafferSchema instanceof SchemaEntityDefinition; this.spark = spark; this.columnToPaths = store.getSchemaUtils().getColumnToPaths(group); if (isEntity) { this.inputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.VERTEX, tempFileDir) + AGGREGATED + SPLIT; this.outputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.VERTEX, tempFileDir + SORTED); } else { this.inputDir = ParquetStore.getGroupDirectory(group, ParquetStoreConstants.SOURCE, tempFileDir) + AGGREGATED + SPLIT; this.outputDir = ParquetStore.getGroupDirectory(group, column, tempFileDir + SORTED); } }
/** * Clone an {@link Element}, based on a target {@link Schema}. * * @param element the element to clone * @param schema the schema * @return the cloned element */ public Element cloneElement(final Element element, final Schema schema) { try { final Element clone = element.emptyClone(); final SchemaElementDefinition sed = schema.getElement(clone.getGroup()); for (final String propertyName : element.getProperties().keySet()) { final Object property = element.getProperty(propertyName); if (null == sed.getPropertyTypeDef(propertyName) || null == sed.getPropertyTypeDef(propertyName).getSerialiser()) { // This can happen if transient properties are derived - they will not have serialisers. LOGGER.warn("Can't find Serialisation for {}, returning uncloned property", propertyName); clone.putProperty(propertyName, property); } else if (null != property) { final Serialiser serialiser = sed.getPropertyTypeDef(propertyName).getSerialiser(); clone.putProperty(propertyName, serialiser.deserialise(serialiser.serialise(property))); } else { clone.putProperty(propertyName, null); } } return clone; } catch (final SerialisationException e) { throw new RuntimeException("SerialisationException converting elements", e); } } }
Set<String> groupBy = elementDef.getGroupBy(); if (null == groupBy) { groupBy = schema.getElement(group).getGroupBy();