new JavaTestKit(system) {{ final Deadline deadline = new FiniteDuration(3, TimeUnit.MINUTES).fromNow(); Configuration flinkConfig = new Configuration(); YarnConfiguration yarnConfig = new YarnConfiguration(); SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService( resourceManager = system.actorOf( Props.create( TestingYarnFlinkResourceManager.class, )); leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID); Await.ready(taskManagerRegisteredFuture, deadline.timeLeft()); leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID); int numberOfRegisteredResources = (Integer) Await.result(numberOfRegisteredResourcesFuture, deadline.timeLeft()); resourceManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
/** * Shut down an actor system and wait for termination. * On failure debug output will be logged about the remaining actors in the system. * <p> * * If verifySystemShutdown is true, then an exception will be thrown on failure. */ public static void shutdownActorSystem(ActorSystem actorSystem, Duration duration, Boolean verifySystemShutdown) { boolean vss = verifySystemShutdown != null ? verifySystemShutdown : false; Duration dur = duration != null ? duration : FiniteDuration.create(10, TimeUnit.SECONDS); TestKit.shutdownActorSystem(actorSystem, dur, vss); }
LOG.debug("keytabPath: {}", keytabPath); config.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, keytabPath); config.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, remoteKeytabPrincipal); final String amPortRange = config.getString( YarnConfigOptions.APPLICATION_MASTER_PORT); final String akkaHostname = AkkaUtils.getAddress(actorSystem).host().get(); final int akkaPort = (Integer) AkkaUtils.getAddress(actorSystem).port().get(); LOG); ActorRef resourceMaster = actorSystem.actorOf(resourceMasterProps); actorSystem.actorOf( Props.create(ProcessReaper.class, resourceMaster, LOG, ACTOR_DIED_EXIT_CODE), "YARN_Resource_Master_Process_Reaper"); actorSystem.actorOf( Props.create(ProcessReaper.class, jobManager, LOG, ACTOR_DIED_EXIT_CODE), "JobManager_Process_Reaper"); AkkaUtils.getTimeout(config).toMillis(), TimeUnit.MILLISECONDS, futureExecutor,
ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster()); Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow(); Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); for (JobStatusMessage job : jobs) { if (job.getJobState() == JobStatus.RUNNING) { try { savepointPath = clusterClient.triggerSavepoint(jobId, null) .get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } catch (Exception cause) { LOG.info("Failed to trigger savepoint. Retrying...", cause); invokeThread.join(deadline.timeLeft().toMillis()); assertFalse("Program invoke thread still running", invokeThread.isAlive());
protected void runAndCancelJob(Plan plan, final int msecsTillCanceling, int maxTimeTillCanceled) throws Exception { // submit job final JobGraph jobGraph = getJobGraph(plan); ClusterClient<?> client = CLUSTER.getClusterClient(); client.setDetached(true); JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, CancelingTestBase.class.getClassLoader()); Deadline submissionDeadLine = new FiniteDuration(2, TimeUnit.MINUTES).fromNow(); JobStatus jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatus != JobStatus.RUNNING && submissionDeadLine.hasTimeLeft()) { Thread.sleep(50); jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatus != JobStatus.RUNNING) { Assert.fail("Job not in state RUNNING."); } Thread.sleep(msecsTillCanceling); client.cancel(jobSubmissionResult.getJobID()); Deadline cancelDeadline = new FiniteDuration(maxTimeTillCanceled, TimeUnit.MILLISECONDS).fromNow(); JobStatus jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatusAfterCancel != JobStatus.CANCELED && cancelDeadline.hasTimeLeft()) { Thread.sleep(50); jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatusAfterCancel != JobStatus.CANCELED) { Assert.fail("Failed to cancel job with ID " + jobSubmissionResult.getJobID() + '.'); } }
@RetryOnFailure(times = 1) public void testCheckpointRecoveryFailure() throws Exception { final Deadline testDeadline = TestTimeOut.fromNow(); final String zooKeeperQuorum = ZooKeeper.getConnectString(); final String fileStateBackendPath = temporaryFolder.newFolder().toString(); fileStateBackendPath); config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2); testActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<>(new Tuple2<String, Object>("localhost", 0))); leaderListener.waitForNewLeader(testDeadline.timeLeft().toMillis()); ActorRef leaderRef = AkkaUtils.getActorRef( leaderAddress, testActorSystem, testDeadline.timeLeft()); ActorGateway leader = new AkkaActorGateway(leaderRef, leaderId); JobGraph jobGraph = new JobGraph(blockingVertex); jobGraph.getJobID(), JobStatus.RUNNING, leader, taskManagerSystem.shutdown(); testActorSystem.shutdown();
BlobServer blobServer) throws Exception { checkNotNull(config); checkNotNull(blobServer); config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER); timeout = AkkaUtils.getTimeout(config); } catch (NumberFormatException e) { throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage()); new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis()); final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL); final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl( stackTraceSampleCoordinator, cleanUpInterval, config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES), config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL), Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY)));
public DefaultQuarantineHandler(Time timeout, int exitCode, Logger log) { Preconditions.checkNotNull(timeout); this.timeout = new FiniteDuration(timeout.getSize(), timeout.getUnit()); this.exitCode = exitCode; this.log = Preconditions.checkNotNull(log); }
public StandaloneMiniCluster(Configuration configuration) throws Exception { this.configuration = Preconditions.checkNotNull(configuration); timeout = AkkaUtils.getTimeout(configuration); 0); port = configuration.getInteger(JobManagerOptions.PORT); highAvailabilityServices, LOCAL_HOSTNAME, Option.<String>empty(), true, TaskManager.class); taskManager, TaskManagerMessages.getNotifyWhenRegisteredAtJobManagerMessage(), timeout.toMillis()); Await.ready(registrationFuture, timeout);
final HighAvailabilityServices haServices) throws Exception { checkNotNull(configuration); checkNotNull(haServices); String taskManagerHostname = configuration.getString(TaskManagerOptions.HOST); Time lookupTimeout = Time.milliseconds(AkkaUtils.getLookupTimeout(configuration).toMillis()); final String portRangeDefinition = configuration.getString(TaskManagerOptions.RPC_PORT); return AkkaRpcServiceUtils.createRpcService(taskManagerHostname, portRangeDefinition, configuration);
@Override public void run(String[] args) throws Exception { try { ActorRef workerActor = actorSystem.actorOf(springExtension.props("workerActor"), "worker-actor"); workerActor.tell(new WorkerActor.Request(), null); workerActor.tell(new WorkerActor.Request(), null); workerActor.tell(new WorkerActor.Request(), null); FiniteDuration duration = FiniteDuration.create(1, TimeUnit.SECONDS); Future<Object> awaitable = Patterns.ask(workerActor, new WorkerActor.Response(), Timeout.durationToTimeout(duration)); logger.info("Response: " + Await.result(awaitable, duration)); } finally { actorSystem.terminate(); Await.result(actorSystem.whenTerminated(), Duration.Inf()); } } }
Protos.Credential.Builder credential = null; if (!flinkConfig.contains(MesosOptions.MASTER_URL)) { throw new IllegalConfigurationException(MesosOptions.MASTER_URL.key() + " must be configured."); String masterUrl = flinkConfig.getString(MesosOptions.MASTER_URL); Duration failoverTimeout = FiniteDuration.apply( flinkConfig.getInteger( MesosOptions.FAILOVER_TIMEOUT_SECONDS), TimeUnit.SECONDS); new MesosConfiguration(masterUrl, frameworkInfo, scala.Option.apply(credential));
final HighAvailabilityServices haServices) throws Exception { checkNotNull(configuration); checkNotNull(haServices); String taskManagerHostname = configuration.getString(ConfigConstants.TASK_MANAGER_HOSTNAME_KEY, null); Time lookupTimeout = Time.milliseconds(AkkaUtils.getLookupTimeout(configuration).toMillis()); final int rpcPort = configuration.getInteger(ConfigConstants.TASK_MANAGER_IPC_PORT_KEY, 0); Preconditions.checkState(rpcPort >= 0 && rpcPort <= 65535, "Invalid value for " + "'%s' (port for the TaskManager actor system) : %d - Leave config parameter empty or " + "use 0 to let the system choose port automatically.",
@Override public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { Preconditions.checkNotNull(scalaFuture); Preconditions.checkArgument(timeout >= 0L, "The timeout value has to be larger or " + "equal than 0."); try { return Await.result(scalaFuture, new FiniteDuration(timeout, unit)); } catch (InterruptedException | TimeoutException e) { throw e; } catch (Exception e) { throw new ExecutionException(e); } }
zkServer.getConnectString(), rootFolder.getPath()); configuration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, UUID.randomUUID().toString()); configuration.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, numJMs); configuration.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTMs); configuration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, numSlotsPerTM); configuration.setString(AkkaOptions.ASK_TIMEOUT, AkkaUtils.INF_TIMEOUT().toString()); receiver.setSlotSharingGroup(slotSharingGroup); final JobGraph graph = new JobGraph("Blocking test job", sender, receiver); cluster.start(); clientActorSystem = cluster.startJobClientActorSystem(graph.getJobID()); Deadline deadline = timeout.$times(3).fromNow(); Future<Object> future = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(graph.getJobID()), deadline.timeLeft()); Await.ready(future, deadline.timeLeft()); thread.join(deadline.timeLeft().toMillis()); Await.result(jobSubmission.resultPromise.future(), deadline.timeLeft());
Configuration config, HighAvailabilityServices highAvailabilityServices) throws Exception { Preconditions.checkNotNull(config, "Configuration"); String askTimeoutString = config.getString( ConfigConstants.AKKA_ASK_TIMEOUT, ConfigConstants.DEFAULT_AKKA_ASK_TIMEOUT); Duration timeout = FiniteDuration.apply(askTimeoutString); if (!timeout.isFinite()) { throw new IllegalConfigurationException(ConfigConstants.AKKA_ASK_TIMEOUT int lookupRetries = config.getInteger(QueryableStateOptions.CLIENT_LOOKUP_RETRIES); int lookupRetryDelayMillis = config.getInteger(QueryableStateOptions.CLIENT_LOOKUP_RETRY_DELAY); FiniteDuration.apply(lookupRetryDelayMillis, "ms")); this.actorSystem = AkkaUtils.createActorSystem(config, remoting); this.executionContext = actorSystem.dispatcher();
/** * Creates a instance that submits the programs to the JobManager defined in the * configuration. This method will try to resolve the JobManager hostname and throw an exception * if that is not possible. * * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer. * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down */ public ClusterClient( Configuration flinkConfig, HighAvailabilityServices highAvailabilityServices, boolean sharedHaServices) { this.flinkConfig = Preconditions.checkNotNull(flinkConfig); this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig); this.timeout = AkkaUtils.getClientTimeout(flinkConfig); this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig); this.actorSystemLoader = new LazyActorSystemLoader( highAvailabilityServices, Time.milliseconds(lookupTimeout.toMillis()), flinkConfig, log); this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices); this.sharedHaServices = sharedHaServices; }
stopTimeout = new FiniteDuration(1L, TimeUnit.SECONDS); stopFuture = Patterns.gracefulStop(queryService, stopTimeout); stopped = Await.result(stopFuture, stopTimeout); } catch (Exception e) { LOG.warn("Query actor did not properly stop.", e); queryService.tell(Kill.getInstance(), ActorRef.noSender());
@Test public void shouldSendMsgToDeadLetterWhenQueueIsFull() { final TestKit mockReceiver = new TestKit(actorSystem); actorSystem.eventStream().subscribe(mockReceiver.testActor(), DeadLetter.class); final FiniteDuration twentySeconds = new FiniteDuration(20, TimeUnit.SECONDS); ActorRef pingPongActor = actorSystem.actorOf(PingPongActor.props(lock).withMailbox(config.getMailBoxName()), "pingpongactor"); actorSystem.mailboxes().settings(); lock.lock(); try { //queue capacity = 10 //need to send 12 messages; 1 message is dequeued and actor waits on lock, //2nd to 11th messages are put on the queue //12th message is sent to dead letter. for (int i = 0; i < 12; i++) { pingPongActor.tell("ping", mockReceiver.testActor()); } mockReceiver.expectMsgClass(twentySeconds, DeadLetter.class); } finally { lock.unlock(); } mockReceiver.receiveN(11, twentySeconds); }
FiniteDuration delay = new FiniteDuration(delayNanos, TimeUnit.NANOSECONDS); RunAsync message = new RunAsync(runAsync.getRunnable(), timeToRun); getContext().system().scheduler().scheduleOnce(delay, getSelf(), message, getContext().dispatcher(), ActorRef.noSender());