private void addToGroupByMap(final String group) { final SchemaElementDefinition sed = schema.getElement(group); groupToGroupByProperties.put(group, sed.getGroupBy()); if (!aggregatedGroups.contains(group)) { groupsWithNoAggregation.add(group); } final Set<String> nonGroupByProperties = new HashSet<>(sed.getProperties()); nonGroupByProperties.removeAll(sed.getGroupBy()); groupToNonGroupByProperties.put(group, nonGroupByProperties); } }
protected void reduceMultiValue(final KEY key, final Iterator<VALUE> iter, final VALUE firstValue, final Context context) throws IOException, InterruptedException { String group = null; try { group = getGroup(key, firstValue); final SchemaElementDefinition elementDef = schema.getElement(group); if (elementDef.isAggregate()) { Properties state; final ElementAggregator aggregator = elementDef.getIngestAggregator(); state = getValueProperties(key, firstValue, group); while (iter.hasNext()) { state = aggregator.apply(state, getValueProperties(key, iter.next(), group)); } context.write(key, createValue(key, firstValue, state, group)); } else { // The group has aggregation disabled - so write all values out. context.write(key, firstValue); while (iter.hasNext()) { context.write(key, iter.next()); } } } catch (final Exception e) { if (null == group) { group = "UNKNOWN"; } throw new RuntimeException("Failed to reduce values for group: " + group, e); } }
/** * Get the properties for a given group defined in the Schema as being * stored in the Accumulo timestamp column. * * @param group The {@link Element} type to be queried * @param timestamp the element timestamp property * @return The Properties stored within the Timestamp part of the * {@link Key} */ @Override public Properties getPropertiesFromTimestamp(final String group, final long timestamp) { final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); final Properties properties = new Properties(); // If the element group requires a timestamp property then add it. if (null != timestampProperty && elementDefinition.containsProperty(timestampProperty)) { properties.put(timestampProperty, timestamp); } return properties; }
final String aggregateOnIngest = store.getProperties().get(ParquetStoreProperties.PARQUET_AGGREGATE_ON_INGEST, null); if (null == aggregateOnIngest) { this.aggregate = groupGafferSchema.isAggregate(); } else { this.aggregate = Boolean.valueOf(aggregateOnIngest); this.aggregatorJson = JSONSerialiser.serialise(groupGafferSchema.getIngestAggregator()); this.gafferProperties = new String[groupGafferSchema.getProperties().size()]; groupGafferSchema.getProperties().toArray(this.gafferProperties); this.spark = spark; this.columnToPaths = store.getSchemaUtils().getColumnToPaths(group);
@SuppressWarnings("Convert2streamapi") @Override public Value getValueFromProperties(final String group, final Properties properties) { final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); for (final String propertyName : elementDefinition.getProperties()) { if (isStoredInValue(propertyName, elementDefinition)) { serialiseSizeAndPropertyValue(propertyName, elementDefinition, properties, stream); } } return new Value(stream.toByteArray()); }
protected boolean isStoredInValue(final String propertyName, final SchemaElementDefinition elementDef) { return !elementDef.getGroupBy().contains(propertyName) && !propertyName.equals(schema.getVisibilityProperty()) && !propertyName.equals(timestampProperty); }
structFieldList.add(new StructField(MATCHED_VERTEX_COL_NAME, DataTypes.StringType, true, Metadata.empty())); final Set<String> properties = elementDefn.getProperties(); for (final String property : properties) { if (!ReservedPropertyNames.contains(property)) { final String propertyClass = elementDefn.getPropertyClass(property).getCanonicalName(); DataType propertyType = getType(propertyClass); if (null != propertyType) { if (converter.canHandle(elementDefn.getPropertyClass(property))) { propertyNeedsConversion.put(property, true); propertyType = converter.convertedType();
/** * Clone an {@link Element}, based on a target {@link Schema}. * * @param element the element to clone * @param schema the schema * @return the cloned element */ public Element cloneElement(final Element element, final Schema schema) { try { final Element clone = element.emptyClone(); final SchemaElementDefinition sed = schema.getElement(clone.getGroup()); for (final String propertyName : element.getProperties().keySet()) { final Object property = element.getProperty(propertyName); if (null == sed.getPropertyTypeDef(propertyName) || null == sed.getPropertyTypeDef(propertyName).getSerialiser()) { // This can happen if transient properties are derived - they will not have serialisers. LOGGER.warn("Can't find Serialisation for {}, returning uncloned property", propertyName); clone.putProperty(propertyName, property); } else if (null != property) { final Serialiser serialiser = sed.getPropertyTypeDef(propertyName).getSerialiser(); clone.putProperty(propertyName, serialiser.deserialise(serialiser.serialise(property))); } else { clone.putProperty(propertyName, null); } } return clone; } catch (final SerialisationException e) { throw new RuntimeException("SerialisationException converting elements", e); } } }
@Override public Element call(final Element v1, final Element v2) { if (null == gafferSchema) { gafferSchema = Schema.fromJson(jsonGafferSchema); } final ElementAggregator aggregator = gafferSchema.getElement(v2.getGroup()).getIngestAggregator(); return aggregator.apply(v1, v2); } }
@Override public void init(final SortedKeyValueIterator<Key, Value> source, final Map<String, String> options, final IteratorEnvironment env) throws IOException { super.init(source, options, env); this.source = source; try { schema = Schema.fromJson(options.get(AccumuloStoreConstants.SCHEMA).getBytes(CommonConstants.UTF_8)); } catch (final UnsupportedEncodingException e) { throw new SchemaException("Unable to deserialise the schema", e); } LOGGER.debug("Initialising RowIDAggregator with schema {}", schema); final String elementConverterClass = options.get(AccumuloStoreConstants.ACCUMULO_ELEMENT_CONVERTER_CLASS); try { elementConverter = Class .forName(elementConverterClass) .asSubclass(AccumuloElementConverter.class) .getConstructor(Schema.class) .newInstance(schema); LOGGER.debug("Creating AccumuloElementConverter of class {}", elementConverterClass); } catch (final ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { throw new AggregationException("Failed to create element converter of the class name provided (" + elementConverterClass + ")", e); } group = options.get(AccumuloStoreConstants.COLUMN_FAMILY); LOGGER.debug("group is set to {}", group); aggregator = schema.getElement(group).getFullAggregator(); LOGGER.debug("Aggregator is set to {}", aggregator); }
@Override public Properties getPropertiesFromValue(final String group, final Value value) { final Properties properties = new Properties(); if (isNotEmpty(value)) { final byte[] bytes = value.get(); int delimiterPosition = 0; final int arrayLength = bytes.length; final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); final Iterator<String> propertyNames = elementDefinition.getProperties().iterator(); while (propertyNames.hasNext() && delimiterPosition < arrayLength) { final String propertyName = propertyNames.next(); try { if (isStoredInValue(propertyName, elementDefinition)) { delimiterPosition = addDeserialisedProperty(bytes, delimiterPosition, properties, elementDefinition, propertyName); } } catch (final SerialisationException e) { throw new AccumuloElementConversionException("Failed to deserialise property " + propertyName, e); } } } return properties; }
@Override public byte[] buildColumnQualifier(final String group, final Properties properties) { final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); for (final String groupByPropertyName : elementDefinition.getGroupBy()) { serialiseSizeAndPropertyValue(groupByPropertyName, elementDefinition, properties, stream); } return stream.toByteArray(); }
private int addDeserialisedProperty(final byte[] bytes, final int carriage, final Properties properties, final SchemaElementDefinition elementDefinition, final String propertyName) throws SerialisationException { int rtn = carriage; final TypeDefinition typeDefinition = elementDefinition.getPropertyTypeDef(propertyName); final ToBytesSerialiser serialiser = (null != typeDefinition) ? (ToBytesSerialiser) typeDefinition.getSerialiser() : null; if (null != serialiser) { final int numBytesForLength = CompactRawSerialisationUtils.decodeVIntSize(bytes[rtn]); final int currentPropLength = getCurrentPropLength(bytes, rtn); int from = rtn += numBytesForLength; rtn += currentPropLength; Object deserialisedObject = getDeserialisedObject(serialiser, bytes, from, currentPropLength); properties.put(propertyName, deserialisedObject); } return rtn; }
final ElementAggregator aggregator = schema.getElement(group).getIngestAggregator(); try { properties = elementConverter.getPropertiesFromValue(group, value);
private Element addNonAggElement(final Element element, final Schema schema, final MapImpl mapImpl) { final Element elementClone = element.emptyClone(); // Copy properties that exist in the schema final SchemaElementDefinition elementDef = schema.getElement(element.getGroup()); for (final String property : elementDef.getProperties()) { elementClone.putProperty(property, element.getProperty(property)); } mapImpl.addNonAggElement(elementClone); return elementClone; }
@Override public Properties getPropertiesFromColumnQualifier(final String group, final byte[] bytes) { final Properties properties = new Properties(); if (null != bytes && bytes.length != 0) { int delimiterPosition = 0; final int arrayLength = bytes.length; final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); final Iterator<String> propertyNames = elementDefinition.getGroupBy().iterator(); while (propertyNames.hasNext() && delimiterPosition < arrayLength) { final String propertyName = propertyNames.next(); try { delimiterPosition = addDeserialisedProperty(bytes, delimiterPosition, properties, elementDefinition, propertyName); } catch (final SerialisationException e) { throw new AccumuloElementConversionException("Failed to deserialise property " + propertyName, e); } } } return properties; }
@Override protected void validateSchema(final ValidationResult validationResult, final Serialiser serialiser) { super.validateSchema(validationResult, serialiser); final String timestampProperty = getSchema().getConfig(AccumuloStoreConstants.TIMESTAMP_PROPERTY); if (null != timestampProperty) { final Iterable<SchemaElementDefinition> defs = new ChainedIterable<>(getSchema().getEntities().values(), getSchema().getEdges().values()); for (final SchemaElementDefinition def : defs) { final TypeDefinition typeDef = def.getPropertyTypeDef(timestampProperty); if (null != typeDef && null != typeDef.getAggregateFunction() && !(typeDef.getAggregateFunction() instanceof Max)) { validationResult.addError("The aggregator for the " + timestampProperty + " property must be set to: " + Max.class.getName() + " this cannot be overridden for this Accumulo Store, as you have told Accumulo to store this property in the timestamp column."); } } } }
@Override public BytesAndRange getPropertiesAsBytesFromColumnQualifier(final String group, final byte[] bytes, final int numProps) { BytesAndRange rtn = new BytesAndRange(bytes, 0, 0); if (isColumnQualifierBytesValid(bytes, numProps)) { final SchemaElementDefinition elementDefinition = getSchemaElementDefinition(group); if (numProps == elementDefinition.getProperties().size()) { rtn = new BytesAndRange(bytes, 0, bytes.length); } else { int delimiterPosition = 0; final int arrayLength = bytes.length; int propIndex = 0; while (propIndex < numProps && delimiterPosition < arrayLength) { final int numBytesForLength = CompactRawSerialisationUtils.decodeVIntSize(bytes[delimiterPosition]); final int currentPropLength = getCurrentPropLength(bytes, delimiterPosition); delimiterPosition += currentPropLength + numBytesForLength; propIndex++; } rtn = new BytesAndRange(bytes, 0, delimiterPosition); } } return rtn; }
@Override public Tuple2<List<Object>, Element> call(final Element element) throws Exception { if (null == gafferSchema) { gafferSchema = Schema.fromJson(jsonGafferSchema); } final String group = element.getGroup(); final List<Object> list = new ArrayList<>(); list.add(group); if (gafferSchema.getEntityGroups().contains(group)) { final Entity entity = (Entity) element; list.add(entity.getVertex()); } else { final Edge edge = (Edge) element; list.add(edge.getSource()); list.add(edge.getDestination()); list.add(edge.getDirectedType()); } for (final String property : gafferSchema.getElement(group).getGroupBy()) { list.add(element.getProperty(property)); } return new Tuple2<>(list, element); } }
protected void serialiseSizeAndPropertyValue(final String propertyName, final SchemaElementDefinition elementDefinition, final Properties properties, final ByteArrayOutputStream stream) { try { final TypeDefinition typeDefinition = elementDefinition.getPropertyTypeDef(propertyName); final ToBytesSerialiser serialiser = (null == typeDefinition) ? null : (ToBytesSerialiser) typeDefinition.getSerialiser(); byte[] bytes; if (null == serialiser) { bytes = AccumuloStoreConstants.EMPTY_BYTES; } else { Object value = properties.get(propertyName); //serialiseNull could be different to AccumuloStoreConstants.EMPTY_BYTES bytes = (null == value) ? serialiser.serialiseNull() : serialiser.serialise(value); } writeBytes(bytes, stream); } catch (final IOException e) { throw new AccumuloElementConversionException("Failed to write serialised property to ByteArrayOutputStream" + propertyName, e); } }