public LinkIndex(final IndexManager indexManager, RepositoryManager repositoryManager) throws IndexNotFoundException, IOException, InterruptedException { metrics = new LinkIndexMetrics("linkIndex"); this.repositoryManager = repositoryManager; // About the structure of these indexes: // - the vtag comes after the recordid because this way we can delete all // entries for a record without having to know the vtags under which they occur // - the sourcefield will often by optional in queries, that's why it comes last final int schemaIdByteLength = 16; // see SchemaIdImpl { IndexDefinition indexDef = new IndexDefinition("links-forward"); // For the record ID we use a variable length byte array field of which the first two bytes are fixed length // The first byte is actually the record identifier byte. // The second byte really is the first byte of the record id. We put this in the fixed length part // (safely because a record id should at least be a single byte long) because this prevents BCD encoding // on the first byte, thus making it easier to configure table splitting based on the original input. indexDef.addVariableLengthByteField("source", 2); indexDef.addByteField("vtag", schemaIdByteLength); indexDef.addByteField("sourcefield", schemaIdByteLength); forwardIndex = indexManager.getIndex(indexDef); } { IndexDefinition indexDef = new IndexDefinition("links-backward"); // Same remark as in the forwardIndex. indexDef.addVariableLengthByteField("target", 2); indexDef.addByteField("vtag", schemaIdByteLength); indexDef.addByteField("sourcefield", schemaIdByteLength); backwardIndex = indexManager.getIndex(indexDef); } }
/** * Get the values to be serialized into a byte array, in index definition order. Missing fields are inserted as * null values. * * @return the values to be serialized into a byte array */ Object[] getFieldValuesInSerializationOrder() { final List<Object> values = new ArrayList<Object>(definition.getFields().size() + 1); for (IndexFieldDefinition indexFieldDefinition : definition.getFields()) { final Object fieldValueOrNull = fields.get(indexFieldDefinition.getName()); values.add(fieldValueOrNull); } values.add(identifier); return values.toArray(); }
public VariableLengthByteIndexFieldDefinition addVariableLengthByteField(String name, int fixedPrefixLength) { validateName(name); final VariableLengthByteIndexFieldDefinition definition = new VariableLengthByteIndexFieldDefinition(name, fixedPrefixLength); add(definition); return definition; }
IndexNotFoundException { if (indexDef.getFields().size() == 0) { throw new IllegalArgumentException("An IndexDefinition should contain at least one field."); HTableDescriptor tableDescr = new HTableDescriptor(indexDef.getName()); HColumnDescriptor family = new HColumnDescriptor(IndexDefinition.DATA_FAMILY, 1, HColumnDescriptor.DEFAULT_COMPRESSION, throw new RuntimeException("Index " + indexDef.getName() + " exists but its definition does not match the supplied definition."); return instantiateIndex(indexDef.getName(), table);
@Override public boolean filterRowKey(byte[] buffer, int offset, int length) { final StructRowKey structRowKey = indexDefinition.asStructRowKey(); structRowKey.iterateOver(buffer, offset); final StructIterator fieldsIterator = structRowKey.iterator(); final List<IndexFieldDefinition> fieldDefinitions = indexDefinition.getFields(); // for all defined field definitions for (IndexFieldDefinition field : fieldDefinitions) { // check if the field should be filtered if (indexFilter.getFields().contains(field.getName())) { final Object nextField = fieldsIterator.next(); if (indexFilter.filterField(field.getName(), nextField)) { return true; // this result is ignored } } else { try { fieldsIterator.skip(); } catch (IOException e) { throw new RuntimeException("failed to skip, index inconsistency?", e); } } } return false; // nothing was skipped }
private IndexDefinition deserialize(String name, byte[] jsonData) throws IOException { ObjectMapper mapper = new ObjectMapper(); return new IndexDefinition(name, mapper.readValue(jsonData, 0, jsonData.length, ObjectNode.class)); }
private Object decodeIndexFieldFrom(String fieldName, byte[] rowKey) throws IOException { final StructRowKey structRowKey = definition.asStructRowKey(); structRowKey.iterateOver(rowKey); final StructIterator iterator = structRowKey.iterator(); int fieldPosition = definition.getFieldPosition(fieldName); if (fieldPosition == -1) { throw new MalformedQueryException("field [" + fieldName + "] is not part of the index"); } // skip all fields up to fieldPosition for (int i = 0; i < fieldPosition; i++) { iterator.skip(); } // return the requested field return iterator.next(); }
/** * Build the index row key. * * <p>The format is as follows: * * <pre> * ([encoded value][terminator for variable length fields])*[identifier] * </pre> */ private byte[] buildRowKey(IndexEntry entry) throws IOException { final StructRowKey indexEntryRowKeySerializer = definition.asStructRowKey(); return indexEntryRowKeySerializer.serialize(entry.getFieldValuesInSerializationOrder()); }
public IndexDefinition(String name, ObjectNode jsonObject) { this.name = name; if (jsonObject.get("identifierOrder") != null) { setIdentifierOrder(Order.valueOf(jsonObject.get("identifierOrder").getTextValue())); } else { setIdentifierOrder(Order.ASCENDING); } try { ObjectNode fields = (ObjectNode) jsonObject.get("fields"); Iterator<Map.Entry<String, JsonNode>> fieldsIt = fields.getFields(); while (fieldsIt.hasNext()) { Map.Entry<String, JsonNode> entry = fieldsIt.next(); String className = entry.getValue().get("class").getTextValue(); Class<IndexFieldDefinition> clazz = (Class<IndexFieldDefinition>) getClass().getClassLoader().loadClass(className); Constructor<IndexFieldDefinition> constructor = clazz.getConstructor(String.class, ObjectNode.class); IndexFieldDefinition field = constructor.newInstance(entry.getKey(), entry.getValue()); add(field); } } catch (Exception e) { throw new RuntimeException("Error instantiating IndexDefinition.", e); } }
/** * Find the set of record ids (and corresponding version tags) on which a given record (in a given version tag) * depends. * * @param parentRecordId record id of the record to find dependencies for * @param vtag vtag of the record to find dependencies for * @return the record ids and vtags on which the given record depends */ Set<DependencyEntry> findDependencies(AbsoluteRecordId parentRecordId, SchemaId vtag) throws IOException { final Query query = new Query(); query.addEqualsCondition("dependant_recordid", parentRecordId.toBytes()); query.addEqualsCondition("dependant_vtag", vtag.getBytes()); final Set<DependencyEntry> result; final QueryResult queryResult = forwardDerefIndex.performQuery(query); if (queryResult.next() != null) { final byte[] serializedEntries = queryResult.getData(DEPENDENCIES_KEY); result = this.serializationUtil.deserializeDependenciesForward(serializedEntries); if (queryResult.next() != null) { throw new IllegalStateException( "Expected only a single matching entry in " + forwardDerefIndex.getDefinition().getName()); } } else { result = new HashSet<DependencyEntry>(); } // Not closed in finally block: avoid HBase contact when there could be connection problems. Closer.close(queryResult); return result; }
public void addField(String name, Object value) { definition.checkFieldSupport(name, value); fields.put(name, value); }
/** * Validates that all fields used in the query actually exist in the index definition. * * TODO: shouldn't we also validate that the requested sort order corresponds with the indexed order etc? * * @param query query to validate */ private void validateQuery(Query query) { for (Query.EqualsCondition eqCond : query.getEqConditions()) { if (definition.getField(eqCond.getName()) == null) { throw new MalformedQueryException( String.format("The query refers to a field which does not exist in this index: %1$s", eqCond.getName())); } } if (query.getRangeCondition() != null && definition.getField(query.getRangeCondition().getName()) == null) { throw new MalformedQueryException( String.format("The query refers to a field which does not exist in this index: %1$s", query.getRangeCondition().getName())); } }
private byte[] decodeIdentifierFrom(byte[] rowKey) throws IOException { final StructRowKey structRowKey = definition.asStructRowKey(); structRowKey.iterateOver(rowKey); final StructIterator iterator = structRowKey.iterator(); int nbrFields = structRowKey.getFields().length; // ignore all but last field (i.e. the identifier) for (int i = 0; i < nbrFields - 1; i++) { iterator.skip(); } // read the last field (i.e. the identifier) return (byte[]) iterator.next(); }
/** * Private constructor. Clients should use static factory methods {@link #delete(String, * org.apache.hadoop.conf.Configuration)} and {@link #create(String, Configuration, HBaseTableFactory, * IdGenerator)} */ private DerefMapHbaseImpl(final String owningRepoName, final String indexName, final Configuration hbaseConfiguration, final HBaseTableFactory tableFactory, final IdGenerator idGenerator) throws IndexNotFoundException, IOException, InterruptedException { this.serializationUtil = new DerefMapSerializationUtil(idGenerator); final IndexManager indexManager = new IndexManager(hbaseConfiguration, tableFactory); IndexDefinition forwardIndexDef = new IndexDefinition(forwardIndexName(indexName)); // For the record ID we use a variable length byte array field of which the first two bytes are fixed length // The first byte is actually the record identifier byte. // The second byte really is the first byte of the record id. We put this in the fixed length part // (safely because a record id should at least be a single byte long) because this prevents BCD encoding // on the first byte, thus making it easier to configure table splitting based on the original input. forwardIndexDef.addVariableLengthByteField("dependant_recordid", 2); forwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH); forwardDerefIndex = indexManager.getIndex(owningRepoName, forwardIndexDef); IndexDefinition backwardIndexDef = new IndexDefinition(backwardIndexName(indexName)); // Same remark as in the forward index. backwardIndexDef.addVariableLengthByteField("dependency_masterrecordid", 2); backwardIndexDef.addByteField("dependant_vtag", DerefMapSerializationUtil.SCHEMA_ID_BYTE_LENGTH); backwardIndexDef.addVariableLengthByteField("variant_properties_pattern"); backwardDerefIndex = indexManager.getIndex(owningRepoName, backwardIndexDef); }
public StringIndexFieldDefinition addStringField(String name) { validateName(name); StringIndexFieldDefinition definition = new StringIndexFieldDefinition(name); add(definition); return definition; }
final List<Object> fromKeyComponents = new ArrayList<Object>(definition.getFields().size()); byte[] fromKey = null; byte[] toKey = null; for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) { final IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex); if (definedFieldsIndex < definition.getFields().size() && usedConditionsCount < query.getEqConditions().size() + (rangeCond != null ? 1 : 0)) { StringBuilder message = new StringBuilder(); message.append( "the previous equals condition or followed after a range condition on a field. The fields are: "); for (; definedFieldsIndex < definition.getFields().size(); definedFieldsIndex++) { IndexFieldDefinition fieldDef = definition.getFields().get(definedFieldsIndex); if (query.getCondition(fieldDef.getName()) != null) { message.append(fieldDef.getName());
public IntegerIndexFieldDefinition addIntegerField(String name) { validateName(name); IntegerIndexFieldDefinition definition = new IntegerIndexFieldDefinition(name); add(definition); return definition; }
public VariableLengthByteIndexFieldDefinition addVariableLengthByteField(String name) { validateName(name); final VariableLengthByteIndexFieldDefinition definition = new VariableLengthByteIndexFieldDefinition(name); add(definition); return definition; }
public FloatIndexFieldDefinition addFloatField(String name) { validateName(name); FloatIndexFieldDefinition definition = new FloatIndexFieldDefinition(name); add(definition); return definition; }
public LongIndexFieldDefinition addLongField(String name) { validateName(name); LongIndexFieldDefinition definition = new LongIndexFieldDefinition(name); add(definition); return definition; }