/** * A utility method to return the list of hosts a Batcher should use when talking * to this database. The list is retrieved by calling getPreferredHost() on each * Forest. * * @return the list of hosts a Batcher should use */ public default String[] getPreferredHosts() { return Stream.of(listForests()).map( (forest) -> forest.getPreferredHost()).distinct().toArray(String[]::new); } }
/** If less than minHosts are left, calls stopJob. * * @param numHosts the minimum number of hosts before this will call dataMovementMangaer.stopJob(batcher) * * @return this instance (for method chaining) */ public HostAvailabilityListener withMinHosts(int numHosts) { if (moveMgr.getConnectionType() == DatabaseClient.ConnectionType.GATEWAY) { if (numHosts != 1) { throw new IllegalArgumentException("numHosts must be 1 when using only the primary host for the connection"); } } else { if (numHosts <= 0) throw new IllegalArgumentException("numHosts must be > 0"); // TODO: use existing forest configuration instead of refreshing? int numConfigHosts = moveMgr.readForestConfig().getPreferredHosts().length; if (numHosts > numConfigHosts) throw new IllegalArgumentException ("numHosts must be less than or equal to the number of hosts in the cluster"); } this.minHosts = numHosts; return this; }
@Override public synchronized QueryBatcher withForestConfig(ForestConfiguration forestConfig) { super.withForestConfig(forestConfig); Forest[] forests = forestConfig.listForests(); Set<Forest> oldForests = new HashSet<>(forestResults.keySet()); Map<String,Forest> hosts = new HashMap<>(); for ( Forest forest : forests ) { if ( forest.getPreferredHost() == null ) throw new IllegalStateException("Hostname must not be null for any forest"); hosts.put(forest.getPreferredHost(), forest); if ( forestResults.get(forest) == null ) forestResults.put(forest, new AtomicLong()); if ( forestIsDone.get(forest) == null ) forestIsDone.put(forest, new AtomicBoolean(false)); if ( retryForestMap.get(forest) == null ) retryForestMap.put(forest, new AtomicInteger(0)); } logger.info("(withForestConfig) Using forests on {} hosts for \"{}\"", hosts.keySet(), forests[0].getDatabaseName()); List<DatabaseClient> newClientList = new ArrayList<>(); for ( String host : hosts.keySet() ) { Forest forest = hosts.get(host); DatabaseClient client = getMoveMgr().getForestClient(forest); newClientList.add(client); } clientList.set(newClientList); boolean started = (threadPool != null); if ( started == true && oldForests.size() > 0 ) calucluateDeltas(oldForests, forests); return this; }
private void scheduleForestResynch(Batcher batcher, String host) { // cancel any previously scheduled re-sync if ( future != null ) future.cancel(false); // schedule a re-sync with the server forest config future = Executors.newScheduledThreadPool(1) .schedule( () -> { if ( batcher.isStopped() ) { logger.debug("Job \"{}\" is stopped, so cancelling re-sync with the server forest config", batcher.getJobName()); } else { ForestConfiguration updatedForestConfig = moveMgr.readForestConfig(); logger.info("it's been {} since host {} failed, opening communication to all server hosts [{}]", suspendTimeForHostUnavailable.toString(), host, Arrays.asList(updatedForestConfig.getPreferredHosts())); // set the forestConfig back to whatever the server says it is batcher.withForestConfig(updatedForestConfig); } } , suspendTimeForHostUnavailable.toMillis(), TimeUnit.MILLISECONDS); }
private synchronized void initialize() { if ( threadCountSet == false ) { if ( query != null ) { Forest[] forests = getForestConfig().listForests(); logger.warn("threadCount not set--defaulting to number of forests ({})", forests.length); withThreadCount(forests.length); } else { int hostCount = clientList.get().size(); logger.warn("threadCount not set--defaulting to number of hosts ({})", hostCount); withThreadCount( hostCount ); } // now we've set the threadCount threadCountSet = true; } // If we are iterating and if we have the thread count to 1, we have a single thread acting as both // consumer and producer of the ThreadPoolExecutor queue. Hence, we produce till the maximum and start // consuming and produce again. Since the thread count is 1, there is no worry about thread utilization. if(getThreadCount() == 1) { isSingleThreaded = true; } logger.info("Starting job batchSize={}, threadCount={}, onUrisReady listeners={}, failure listeners={}", getBatchSize(), getThreadCount(), urisReadyListeners.size(), failureListeners.size()); threadPool = new QueryThreadPoolExecutor(getThreadCount(), this); }
if ( isHostUnavailableException == true ) { ForestConfiguration existingForestConfig = batcher.getForestConfig(); Set<String> preferredHostsList = new HashSet<String>(Arrays.asList(existingForestConfig.getPreferredHosts())); if(existingForestConfig instanceof FilteredForestConfiguration) { FilteredForestConfiguration existingFilteredForestConfiguration = (FilteredForestConfiguration) existingForestConfig;
private void retry(QueryEvent queryEvent, boolean callFailListeners) { if ( isStopped() == true ) { logger.warn("Job is now stopped, aborting the retry"); return; } Forest retryForest = null; for ( Forest forest : getForestConfig().listForests() ) { if ( forest.equals(queryEvent.getForest()) ) { // while forest and queryEvent.getForest() have equivalent forest id, // we expect forest to have the currently available host info retryForest = forest; break; } } if ( retryForest == null ) { throw new IllegalStateException("Forest for queryEvent (" + queryEvent.getForest().getForestName() + ") is not in current getForestConfig()"); } // we're obviously not done with this forest forestIsDone.get(retryForest).set(false); retryForestMap.get(retryForest).incrementAndGet(); long start = queryEvent.getForestResultsSoFar() + 1; logger.trace("retryForest {} on retryHost {} at start {}", retryForest.getForestName(), retryForest.getPreferredHost(), start); QueryTask runnable = new QueryTask(getMoveMgr(), this, retryForest, query, queryEvent.getForestBatchNumber(), start, queryEvent.getJobBatchNumber(), callFailListeners); runnable.run(); } /*
private synchronized void startQuerying() { boolean consistentSnapshotFirstQueryHasRun = false; for ( Forest forest : getForestConfig().listForests() ) { QueryTask runnable = new QueryTask(getMoveMgr(), this, forest, query, 1, 1); if ( consistentSnapshot == true && consistentSnapshotFirstQueryHasRun == false ) { // let's run this first time in-line so we'll have the serverTimestamp set // before we launch all the parallel threads runnable.run(); consistentSnapshotFirstQueryHasRun = true; } else { threadPool.execute(runnable); } } }
Forest[] renamedForests = Stream.of(wrappedForestConfig.listForests()).map( forest -> { String openReplicaHost = forest.getOpenReplicaHost(); if ( openReplicaHost != null ) openReplicaHost = openReplicaHost.toLowerCase();
if (forestConfig == null) throw new IllegalArgumentException("forestConfig must not be null"); Forest[] forests = forestConfig.listForests(); if ( forests.length == 0 ) { throw new IllegalStateException("WriteBatcher requires at least one writeable forest");
@Test public void testWithWriteAndQueryBatcher() throws Exception{ if (moveMgr.getConnectionType() == DatabaseClient.ConnectionType.GATEWAY) return; ForestConfiguration forestConfig = moveMgr.readForestConfig(); long hostNum = Stream.of(forestConfig.listForests()).map(forest->forest.getPreferredHost()).distinct().count(); if ( hostNum <= 1 ) return; // we're not in a cluster, so this test isn't valid String host1 = forestConfig.listForests()[0].getPreferredHost(); FilteredForestConfiguration ffg = new FilteredForestConfiguration(forestConfig) .withRenamedHost(host1, Inet4Address.getByName(host1).getHostAddress()); runWithWriteAndQueryBatcher(ffg); ffg = new FilteredForestConfiguration(forestConfig) .withWhiteList(host1); runWithWriteAndQueryBatcher(ffg); ffg = new FilteredForestConfiguration(forestConfig) .withBlackList(host1); runWithWriteAndQueryBatcher(ffg); }
assertEquals(numExpected, searchResults.getTotalResults()); if ( queryBatcher.getForestConfig().listForests().length == 3 ) { for ( String forest : matchesByForest.keySet() ) { String[] expected = matchesByForest.get(forest);
@Test public void testBadQueryAndThrowException() { RawStructuredQueryDefinition query = client.newQueryManager().newRawStructuredQueryDefinition( new StringHandle("<this is not a valid structured query>").withFormat(JSON)); // we'll see one failure per forest List<String> urisIterator = testQueryExceptions(query, 0, moveMgr.readForestConfig().listForests().length); // without any matching uris, there will be no success or failure batches testIteratorExceptions(urisIterator, 0, 0); }
@Override public void retryListener(QueryBatch batch, QueryBatchListener queryBatchListener) { // We get the batch and modify the client alone in order to make use // of the new forest client in case if the original host is unavailable. DatabaseClient client = null; Forest[] forests = batch.getBatcher().getForestConfig().listForests(); for(Forest forest : forests) { if(forest.equals(batch.getForest())) client = getMoveMgr().getForestClient(forest); } QueryBatchImpl retryBatch = new QueryBatchImpl() .withClient( client ) .withBatcher( batch.getBatcher() ) .withTimestamp( batch.getTimestamp() ) .withServerTimestamp( batch.getServerTimestamp() ) .withItems( batch.getItems() ) .withJobTicket( batch.getJobTicket() ) .withJobBatchNumber( batch.getJobBatchNumber() ) .withJobResultsSoFar( batch.getJobResultsSoFar() ) .withForestBatchNumber( batch.getForestBatchNumber() ) .withForestResultsSoFar( batch.getForestResultsSoFar() ) .withForest( batch.getForest() ) .withJobTicket( batch.getJobTicket() ); queryBatchListener.processEvent(retryBatch); }
@Test public void testArgs() throws Exception { if (moveMgr.getConnectionType() == DatabaseClient.ConnectionType.GATEWAY) return; int defaultPort = client.getPort(); Class<?> defaultAuthContext = client.getSecurityContext().getClass(); ForestConfiguration forestConfig = moveMgr.readForestConfig(); Forest[] forests = forestConfig.listForests(); String defaultDatabase = forests[0].getDatabaseName(); // expect three forests per node assertTrue(forests.length % 3 == 0); for ( Forest forest : forests ) { DatabaseClient forestClient = ((DataMovementManagerImpl) moveMgr).getForestClient(forest); // not all forests for a database are on the same host, so all we // can check is that the hostname is not null assertNotNull(forest.getHost()); // not all hosts have the original REST server, but all hosts have the uber port assertEquals(defaultPort, forestClient.getPort()); assertEquals(defaultDatabase, forest.getDatabaseName()); assertEquals(defaultAuthContext, forestClient.getSecurityContext().getClass()); assertEquals(true, forest.isUpdateable()); if ( forest.getForestName() == null || ! forest.getForestName().startsWith("java-unittest-") ) { fail("Unexpected forestName \"" + forest.getForestName() + "\""); } } } }
@Test public void testWithInvalidHosts() throws Exception{ if (moveMgr.getConnectionType() == DatabaseClient.ConnectionType.GATEWAY) return; ForestConfiguration forestConfig = moveMgr.readForestConfig(); String host1 = forestConfig.listForests()[0].getPreferredHost(); FilteredForestConfiguration ffg = new FilteredForestConfiguration(forestConfig) .withRenamedHost("someInvalidHostName", "anotherInvalidHostName"); runWithWriteAndQueryBatcher(ffg); ffg = new FilteredForestConfiguration(forestConfig) .withBlackList("someInvalidHostName"); runWithWriteAndQueryBatcher(ffg); ffg = new FilteredForestConfiguration(forestConfig) .withWhiteList("someInvalidHostName") .withWhiteList(host1); runWithWriteAndQueryBatcher(ffg); }