@Override public Observable<IndexOperationMessage> indexEdge( final ApplicationScope applicationScope, final Entity entity, final Edge edge ) { final Observable<IndexOperationMessage> batches = Observable.just( edge ).map( observableEdge -> { //if the node is the target node, generate our scope correctly if ( edge.getTargetNode().equals( entity.getId() ) ) { return generateScopeFromSource( edge ); } throw new IllegalArgumentException("target not equal to entity + "+entity.getId()); } ).map( indexEdge -> { final EntityIndex ei = entityIndexFactory.createEntityIndex(indexLocationStrategyFactory.getIndexLocationStrategy(applicationScope) ); final EntityIndexBatch batch = ei.createBatch(); if (logger.isDebugEnabled()) { logger.debug("adding edge {} to batch for entity {}", indexEdge, entity); } Optional<Set<String>> fieldsToIndex = getFilteredStringObjectMap( indexEdge ); batch.index( indexEdge, entity ,fieldsToIndex); return batch.build(); } ); return ObservableTimer.time( batches, addTimer ); }
@Override public Observable<IndexOperationMessage> deIndexEdge(final ApplicationScope applicationScope, final Edge edge, final Id entityId, final UUID entityVersion){ if (logger.isTraceEnabled()) { logger.trace("deIndexEdge edge={} entityId={} entityVersion={}", edge.toString(), entityId.toString(), entityVersion.toString()); } final EntityIndex ei = entityIndexFactory.createEntityIndex(indexLocationStrategyFactory.getIndexLocationStrategy(applicationScope)); final EntityIndexBatch entityBatch = ei.createBatch(); entityBatch.deindex(generateScopeFromSource( edge ), entityId, entityVersion); return Observable.just(entityBatch.build()); }
@Test public void testDeindex() { IndexEdge searchEdge = new IndexEdgeImpl( appId, "fastcars", SearchEdge.NodeType.SOURCE, 1 ); Map entityMap = new HashMap() {{ put( "name", "Ferrari 212 Inter" ); put( "introduced", 1952 ); put( "topspeed", 215 ); }}; Entity entity = EntityIndexMapUtils.fromMap( entityMap ); EntityUtils.setId(entity, new SimpleId( "fastcar" ) ); EntityUtils.setVersion(entity, UUIDGenerator.newTimeUUID() ); entity.setField(new UUIDField(IndexingUtils.ENTITY_ID_FIELDNAME, UUID.randomUUID() ) ); indexProducer.put(entityIndex.createBatch().index( searchEdge, entity ).build()).subscribe(); entityIndex.refreshAsync().toBlocking().first(); CandidateResults candidateResults = entityIndex .search( searchEdge, SearchTypes.fromTypes( entity.getId().getType() ), "name contains 'Ferrari*'", 10, 0, false ); assertEquals( 1, candidateResults.size() ); EntityIndexBatch batch = entityIndex.createBatch(); batch.deindex( searchEdge, entity ); indexProducer.put(batch.build()).subscribe();; entityIndex.refreshAsync().toBlocking().first(); candidateResults = entityIndex .search(searchEdge, SearchTypes.fromTypes( entity.getId().getType() ), "name contains 'Ferrari*'", 10, 0, false ); assertEquals(0, candidateResults.size()); }
@Override public Observable<IndexOperationMessage> put(EntityIndexBatch message) { return put(message.build()); }
IndexEdge edge = new IndexEdgeImpl( indexEdge.getNodeId(), indexEdge.getEdgeName(), SearchEdge.NodeType.SOURCE, edgeCounter.incrementAndGet() ); entityIndexBatch.index( edge, entity ); } ).doOnNext( entityIndexBatch -> { log.info( "Indexing next {} in batch", entityIndexBatch.size() ); IndexOperationMessage message = entityIndexBatch.build(); indexProducer.put(message);
/** * Takes in candidate results and uses the iterator to create batch commands */ public EntityIndexBatch deindexBatchIteratorResolver(IndexEdge edge,CandidateResults edgesToBeDeindexed, EntityIndexBatch batch){ Iterator itr = edgesToBeDeindexed.iterator(); while( itr.hasNext() ) { batch.deindex( edge, ( CandidateResult ) itr.next()); } return batch; }
private void insertJsonBlob( List<Object> sampleJson, EntityIndexBatch batch, String entityType, IndexEdge indexEdge, final int max, final int startIndex ) throws IOException { int count = 0; StopWatch timer = new StopWatch(); timer.start(); if ( startIndex > 0 ) { for ( int i = 0; i < startIndex; i++ ) { sampleJson.remove( 0 ); } } for ( Object o : sampleJson ) { Map<String, Object> item = ( Map<String, Object> ) o; Entity entity = new Entity( entityType ); entity = EntityIndexMapUtils.fromMap( entity, item ); EntityUtils.setVersion( entity, UUIDGenerator.newTimeUUID() ); entity.setField( new UUIDField( IndexingUtils.ENTITY_ID_FIELDNAME, UUID.randomUUID() ) ); batch.index( indexEdge, entity ); if ( ++count > max ) { break; } } timer.stop(); logger.info( "Total time to index {} entries {}ms, average {}ms/entry", new Object[] { count, timer.getTime(), timer.getTime() / count } ); }
@Test public void deleteVerification() throws Throwable { Id ownerId = new SimpleId( "owner" ); IndexEdge indexSCope = new IndexEdgeImpl( ownerId, "user", SearchEdge.NodeType.SOURCE, 10 ); final String middleName = "middleName" + UUIDUtils.newTimeUUID(); Map entityMap = new HashMap() {{ put( "username", "edanuff" ); put( "email", "ed@anuff.com" ); put( "middlename", middleName ); }}; Entity user = EntityIndexMapUtils.fromMap( entityMap ); EntityUtils.setId( user, new SimpleId( "edanuff" ) ); EntityUtils.setVersion( user, UUIDGenerator.newTimeUUID() ); EntityIndexBatch batch = entityIndex.createBatch(); batch.index( indexSCope, user ); indexProducer.put(batch.build()).subscribe();; entityIndex.refreshAsync().toBlocking().first(); final String query = "where username = 'edanuff'"; CandidateResults r = entityIndex.search( indexSCope, SearchTypes.fromTypes( "edanuff" ), query, 10, 0, false); assertEquals( user.getId(), r.get( 0 ).getId()); batch.deindex( indexSCope, user.getId(), user.getVersion() ); indexProducer.put(batch.build()).subscribe();; entityIndex.refreshAsync().toBlocking().first(); // EntityRef r = entityIndex.search( indexSCope, SearchTypes.fromTypes( "edanuff" ), query, 10, 0, false ); assertFalse( r.iterator().hasNext() ); }
/** * Merge our candidates and our entity set into results */ public void merge() { for ( final FilterResult<Candidate> candidateResult : candidateResults ) { validate( candidateResult ); } indexProducer.put( batch.build()).toBlocking().lastOrDefault(null);//want to rethrow if batch fails }
batch.deindex(searchEdge, entity.getId(), candidateVersion);
@Override public Observable<IndexOperationMessage> indexEntity( final ApplicationScope applicationScope, final Entity entity ) { //bootstrap the lower modules from their caches final GraphManager gm = graphManagerFactory.createEdgeManager( applicationScope ); final EntityIndex ei = entityIndexFactory.createEntityIndex(indexLocationStrategyFactory.getIndexLocationStrategy(applicationScope)); final Id entityId = entity.getId(); //we always index in the target scope final Observable<Edge> edgesToTarget = edgesObservable.edgesToTarget( gm, entityId, true); //we may have to index we're indexing from source->target here final Observable<IndexEdge> sourceEdgesToIndex = edgesToTarget.map( edge -> generateScopeFromSource( edge ) ); //do our observable for batching //try to send a whole batch if we can final Observable<IndexOperationMessage> batches = sourceEdgesToIndex .buffer(indexFig.getIndexBatchSize() ) //map into batches based on our buffer size .flatMap( buffer -> Observable.from( buffer ) //collect results into a single batch .collect( () -> ei.createBatch(), ( batch, indexEdge ) -> { if (logger.isDebugEnabled()) { logger.debug("adding edge {} to batch for entity {}", indexEdge, entity); } final Optional<Set<String>> fieldsToIndex = getFilteredStringObjectMap( indexEdge ); batch.index( indexEdge, entity ,fieldsToIndex); } ) //return the future from the batch execution .map( batch -> batch.build() ) ); return ObservableTimer.time( batches, indexTimer ); }
@Override public Observable<IndexOperationMessage> deIndexOldVersions(final ApplicationScope applicationScope, final Id entityId, final List<UUID> versions) { final EntityIndex ei = entityIndexFactory. createEntityIndex(indexLocationStrategyFactory.getIndexLocationStrategy(applicationScope) ); // use LONG.MAX_VALUE in search edge because this value is not used elsewhere in lower code for de-indexing // previously .timsetamp() was used on entityId, but some entities do not have type-1 UUIDS ( legacy data) final SearchEdge searchEdgeFromSource = createSearchEdgeFromSource( new SimpleEdge( applicationScope.getApplication(), CpNamingUtils.getEdgeTypeFromCollectionName( InflectionUtils.pluralize( entityId.getType() ) ), entityId, Long.MAX_VALUE ) ); final EntityIndexBatch batch = ei.createBatch(); versions.forEach( version -> { batch.deindex(searchEdgeFromSource, entityId, version); }); return Observable.just(batch.build()); }
/** * Merge our candidates and our entity set into results */ public void merge(boolean keepStaleEntries, String query, boolean isDirectQuery) { if (!isDirectQuery) { filterDuplicateCandidates(query); } else { // remove direct query duplicates or missing entities (names that don't exist will have null ids) Set<UUID> foundUUIDs = new HashSet<>(); for (FilterResult<Candidate> candidateFilterResult : candidateResults) { Id id = candidateFilterResult.getValue().getCandidateResult().getId(); if (id != null) { UUID uuid = id.getUuid(); if (!foundUUIDs.contains(uuid)) { dedupedCandidateResults.add(candidateFilterResult); foundUUIDs.add(uuid); } } } } for (final FilterResult<Candidate> candidateResult : dedupedCandidateResults) { validate(candidateResult, keepStaleEntries, query, isDirectQuery); } // no index requests made for direct query, so no need to modify index if (!isDirectQuery) { indexProducer.put(batch.build()).toBlocking().lastOrDefault(null); // want to rethrow if batch fails } }
/** * Validate each candidate results vs the data loaded from cass */ private void validate( final FilterResult<Candidate> filterCandidate ) { final CandidateResult candidateResult = filterCandidate.getValue().getCandidateResult(); final SearchEdge searchEdge = filterCandidate.getValue().getSearchEdge(); final MvccLogEntry logEntry = versionSet.getMaxVersion( candidateResult.getId() ); final UUID candidateVersion = candidateResult.getVersion(); final UUID entityVersion = logEntry.getVersion(); final Id entityId = logEntry.getEntityId(); //entity is newer than ES version if ( UUIDComparator.staticCompare( entityVersion, candidateVersion ) > 0 ) { logger.warn( "Deindexing stale entity on edge {} for entityId {} and version {}", searchEdge, entityId, entityVersion ); batch.deindex( searchEdge, entityId, entityVersion ); return; } //ES is newer than cass, it means we haven't repaired the record in Cass, we don't want to //remove the ES record, since the read in cass should cause a read repair, just ignore if ( UUIDComparator.staticCompare( candidateVersion, entityVersion ) > 0 ) { logger.warn( "Found a newer version in ES over cassandra for edge {} for entityId {} and version {}. Repair should be run", searchEdge, entityId, entityVersion ); } //they're the same add it final FilterResult<Id> result = new FilterResult<>( entityId, filterCandidate.getPath() ); results.add( result ); } }
@Test public void testSizeByEdge(){ final String type = UUID.randomUUID().toString(); Id ownerId = new SimpleId( "owner" ); final Entity first = new Entity( type ); first.setField( new StringField( "string", "I ate a sammich" ) ); first.setSize(100); EntityUtils.setVersion( first, UUIDGenerator.newTimeUUID() ); final Entity second = new Entity( type ); second.setSize(100); second.setField( new StringField( "string", "I drank a beer" ) ); EntityUtils.setVersion( second, UUIDGenerator.newTimeUUID() ); EntityIndexBatch batch = entityIndex.createBatch(); //get ordering, so 2 is before 1 when both match IndexEdge indexScope1 = new IndexEdgeImpl( ownerId,type , SearchEdge.NodeType.SOURCE, 10 ); batch.index( indexScope1, first ); IndexEdge indexScope2 = new IndexEdgeImpl( ownerId, type+"er", SearchEdge.NodeType.SOURCE, 11 ); batch.index( indexScope2, second); indexProducer.put(batch.build()).subscribe();; entityIndex.refreshAsync().toBlocking().first(); long size = entityIndex.getTotalEntitySizeInBytes(new SearchEdgeImpl(ownerId,type, SearchEdge.NodeType.SOURCE)); assertTrue( size == 100 ); }
@Test public void testDeleteWithAlias() throws IOException { final String entityType = "thing"; IndexEdge searchEdge = new IndexEdgeImpl( appId, "things", SearchEdge.NodeType.SOURCE, 1 ); final SearchTypes searchTypes = SearchTypes.fromTypes( entityType ); insertJsonBlob( entityType, searchEdge, "/sample-large.json", 1, 0 ); entityIndex.addIndex(UUID.randomUUID() + "v2", 1, 0, "one"); entityIndex.refreshAsync().toBlocking().first(); insertJsonBlob( entityType, searchEdge, "/sample-large.json", 1, 1 ); CandidateResults crs = testQuery( searchEdge, searchTypes, "name = 'Bowers Oneil'", 1 ); EntityIndexBatch entityIndexBatch = entityIndex.createBatch(); entityIndexBatch.deindex(searchEdge, crs.get(0)); indexProducer.put(entityIndexBatch.build()).subscribe(); entityIndex.refreshAsync().toBlocking().first(); //Hilda Youn testQuery(searchEdge, searchTypes, "name = 'Bowers Oneil'", 0); }
private void insertJsonBlob( String entityType, IndexEdge indexEdge, String filePath, final int max, final int startIndex ) throws IOException { InputStream is = this.getClass().getResourceAsStream( filePath ); ObjectMapper mapper = new ObjectMapper(); List<Object> sampleJson = mapper.readValue(is, new TypeReference<List<Object>>() { }); EntityIndexBatch batch = entityIndex.createBatch(); insertJsonBlob(sampleJson, batch, entityType, indexEdge, max, startIndex); indexProducer.put(batch.build()).subscribe();; EntityIndex.IndexRefreshCommandInfo info = entityIndex.refreshAsync().toBlocking().first(); long time = info.getExecutionTime(); logger.info("refresh took ms:" + time); }
indexProducer.put(batch.build()).subscribe();;