/** * Constructs JvmPauseMonitor. */ public JvmPauseMonitor() { mGcSleepIntervalMs = Configuration.getMs(PropertyKey.JVM_MONITOR_SLEEP_INTERVAL_MS); mWarnThresholdMs = Configuration.getMs(PropertyKey.JVM_MONITOR_WARN_THRESHOLD_MS); mInfoThresholdMs = Configuration.getMs(PropertyKey.JVM_MONITOR_INFO_THRESHOLD_MS); }
protected CommonOptions() { mSyncIntervalMs = Configuration.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL); }
/** * Returns a new client for the zookeeper connection. The client is already started before * returning. * * @return a new {@link CuratorFramework} client to use for leader selection */ private CuratorFramework getNewCuratorClient() { CuratorFramework client = CuratorFrameworkFactory.newClient(mZookeeperAddress, (int) Configuration.getMs(PropertyKey.ZOOKEEPER_SESSION_TIMEOUT), (int) Configuration.getMs(PropertyKey.ZOOKEEPER_CONNECTION_TIMEOUT), new ExponentialBackoffRetry(Constants.SECOND_MS, 3)); client.start(); // Sometimes, if the master crashes and restarts too quickly (faster than the zookeeper // timeout), zookeeper thinks the new client is still an old one. In order to ensure a clean // state, explicitly close the "old" client and recreate a new one. client.close(); client = CuratorFrameworkFactory.newClient(mZookeeperAddress, (int) Configuration.getMs(PropertyKey.ZOOKEEPER_SESSION_TIMEOUT), (int) Configuration.getMs(PropertyKey.ZOOKEEPER_CONNECTION_TIMEOUT), new ExponentialBackoffRetry(Constants.SECOND_MS, 3)); client.start(); return client; } }
/** * Creates a {@link AsyncJournalWriter}. * * @param journalWriter the {@link JournalWriter} to use for writing */ public AsyncJournalWriter(JournalWriter journalWriter) { mJournalWriter = Preconditions.checkNotNull(journalWriter, "journalWriter"); mQueue = new ConcurrentLinkedQueue<>(); mCounter = new AtomicLong(0); mFlushCounter = new AtomicLong(0); mWriteCounter = new AtomicLong(0); // convert milliseconds to nanoseconds. mFlushBatchTimeNs = 1000000L * Configuration.getMs(PropertyKey.MASTER_JOURNAL_FLUSH_BATCH_TIME_MS); }
/** * Creates a {@link AsyncJournalWriter}. * * @param journalWriter a journal writer to write to */ public AsyncJournalWriter(JournalWriter journalWriter) { mJournalWriter = Preconditions.checkNotNull(journalWriter, "journalWriter"); mQueue = new ConcurrentLinkedQueue<>(); mCounter = new AtomicLong(0); mFlushCounter = new AtomicLong(0); mWriteCounter = new AtomicLong(0); // convert milliseconds to nanoseconds. mFlushBatchTimeNs = 1000000L * Configuration.getMs(PropertyKey.MASTER_JOURNAL_FLUSH_BATCH_TIME_MS); }
/** * Gets a local hostname for the host this JVM is running on. * * @return the local host name, which is not based on a loopback ip address */ public static synchronized String getLocalHostName() { if (sLocalHost != null) { return sLocalHost; } int hostResolutionTimeout = (int) Configuration.getMs(PropertyKey.NETWORK_HOST_RESOLUTION_TIMEOUT_MS); return getLocalHostName(hostResolutionTimeout); }
/** * Gets a local IP address for the host this JVM is running on. * * @return the local ip address, which is not a loopback address and is reachable */ public static synchronized String getLocalIpAddress() { if (sLocalIP != null) { return sLocalIP; } int hostResolutionTimeout = (int) Configuration.getMs(PropertyKey.NETWORK_HOST_RESOLUTION_TIMEOUT_MS); return getLocalIpAddress(hostResolutionTimeout); }
/** * Creates the {@link UfsJournalGarbageCollector} instance. * * @param journal the UFS journal handle */ UfsJournalGarbageCollector(UfsJournal journal) { mJournal = Preconditions.checkNotNull(journal, "journal"); mUfs = mJournal.getUfs(); mGc = mExecutor.scheduleAtFixedRate(new Runnable() { @Override public void run() { gc(); } }, Constants.SECOND_MS, Configuration.getMs(PropertyKey.MASTER_JOURNAL_GC_PERIOD_MS), TimeUnit.MILLISECONDS); }
@Override public void notifyRpcServerStarted() { // updates start time when Alluxio master waits for workers to register long waitTime = Configuration.getMs(PropertyKey.MASTER_WORKER_CONNECT_WAIT_TIME); LOG.info(String.format("Rpc server started, waiting %dms for workers to register", waitTime)); mWorkerConnectWaitStartTimeMs.set(mClock.millis(), true); }
/** * Constructor with specified {@link GroupMappingService}. Initializes the cache if enabled. * * @param service group mapping service */ public CachedGroupMapping(GroupMappingService service) { mService = service; long timeoutMs = Configuration.getMs(PropertyKey.SECURITY_GROUP_MAPPING_CACHE_TIMEOUT_MS); mCacheEnabled = timeoutMs > 0; if (mCacheEnabled) { mCache = CacheBuilder.newBuilder() // the maximum number of entries the cache may contain. .maximumSize(MAXSIZE) // active entries are eligible for automatic refresh once the specified time duration has // elapsed after the entry was last modified. .refreshAfterWrite(timeoutMs, TimeUnit.MILLISECONDS) // each entry should be automatically removed from the cache once the specified time // duration has elapsed after the entry was last modified. .expireAfterWrite(10 * timeoutMs, TimeUnit.MILLISECONDS) .build(new GroupMappingCacheLoader()); } }
/** * Creates a new instance of {@link UfsJournalCheckpointThread}. * * @param master the master to apply the journal entries to * @param journal the journal */ public UfsJournalCheckpointThread(JournalEntryStateMachine master, UfsJournal journal) { mMaster = Preconditions.checkNotNull(master, "master"); mJournal = Preconditions.checkNotNull(journal, "journal"); mShutdownQuietWaitTimeMs = journal.getQuietPeriodMs(); mJournalCheckpointSleepTimeMs = (int) Configuration.getMs(PropertyKey.MASTER_JOURNAL_TAILER_SLEEP_TIME_MS); mJournalReader = new UfsJournalReader(mJournal, 0, false); mCheckpointPeriodEntries = Configuration.getLong( PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES); }
@Override public void init() throws ServletException { super.init(); getServletContext().setAttribute(ALLUXIO_PROXY_SERVLET_RESOURCE_KEY, proxyProcess); getServletContext() .setAttribute(FILE_SYSTEM_SERVLET_RESOURCE_KEY, FileSystem.Factory.get()); getServletContext().setAttribute(STREAM_CACHE_SERVLET_RESOURCE_KEY, new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); } };
@Override public InputStream open(String path, OpenOptions options) throws IOException { IOException thrownException = null; RetryPolicy retryPolicy = new ExponentialBackoffRetry( (int) Configuration.getMs(PropertyKey.UNDERFS_OBJECT_STORE_READ_RETRY_BASE_SLEEP_MS), (int) Configuration.getMs(PropertyKey.UNDERFS_OBJECT_STORE_READ_RETRY_MAX_SLEEP_MS), Configuration.getInt(PropertyKey.UNDERFS_OBJECT_STORE_READ_RETRY_MAX_NUM)); while (retryPolicy.attempt()) { try { return openObject(stripPrefixIfPresent(path), options); } catch (IOException e) { LOG.warn("{} attempt to open {} failed with exception : {}", retryPolicy.getAttemptCount(), path, e.getMessage()); thrownException = e; } } throw thrownException; }
@Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); if (isLeader) { mLostWorkerDetectionService = getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), (int) Configuration.getMs(PropertyKey.MASTER_WORKER_HEARTBEAT_INTERVAL))); } }
/** * Garbage collects a file if necessary. * * @param file the file * @param checkpointSequenceNumber the first sequence number that has not been checkpointed */ private void gcFileIfStale(UfsJournalFile file, long checkpointSequenceNumber) { if (file.getEnd() > checkpointSequenceNumber && !file.isTmpCheckpoint()) { return; } long lastModifiedTimeMs; try { lastModifiedTimeMs = mUfs.getFileStatus(file.getLocation().toString()).getLastModifiedTime(); } catch (IOException e) { LOG.warn("Failed to get the last modified time for {}.", file.getLocation()); return; } long thresholdMs = file.isTmpCheckpoint() ? Configuration.getMs(PropertyKey.MASTER_JOURNAL_TEMPORARY_FILE_GC_THRESHOLD_MS) : Configuration.getMs(PropertyKey.MASTER_JOURNAL_GC_THRESHOLD_MS); if (System.currentTimeMillis() - lastModifiedTimeMs > thresholdMs) { deleteNoException(file.getLocation()); } }
@Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); if (isLeader) { getExecutorService().submit(new HeartbeatThread(HeartbeatContext.MASTER_CHECKPOINT_SCHEDULING, new CheckpointSchedulingExecutor(this, mFileSystemMaster), (int) Configuration.getMs(PropertyKey.MASTER_LINEAGE_CHECKPOINT_INTERVAL_MS))); getExecutorService().submit(new HeartbeatThread(HeartbeatContext.MASTER_FILE_RECOMPUTATION, new RecomputeExecutor(new RecomputePlanner(mLineageStore, mFileSystemMaster), mFileSystemMaster), (int) Configuration.getMs(PropertyKey.MASTER_LINEAGE_RECOMPUTE_INTERVAL_MS))); } }
@Override public boolean isInSafeMode() { // bails out early before expensive clock checks if (!mWorkerConnectWaitStartTimeMs.isMarked()) { return false; } Long startTime = mWorkerConnectWaitStartTimeMs.getReference(); if (startTime == null) { // master has not started waiting for workers yet return true; } // lazily updates safe mode state upon inquiry long waitTime = Configuration.getMs(PropertyKey.MASTER_WORKER_CONNECT_WAIT_TIME); if (mClock.millis() - startTime < waitTime) { return true; } if (mWorkerConnectWaitStartTimeMs.compareAndSet(startTime, null, true, false)) { LOG.debug("Exiting safe mode."); } return mWorkerConnectWaitStartTimeMs.isMarked(); } }
/** * Creates a new instance of {@link MetricsMaster}. * * @param masterContext the context for metrics master * @param clock the clock to use for determining the time * @param executorServiceFactory a factory for creating the executor service to use for running * maintenance threads */ DefaultMetricsMaster(MasterContext masterContext, Clock clock, ExecutorServiceFactory executorServiceFactory) { super(masterContext, clock, executorServiceFactory); mMetricsStore = new MetricsStore(); registerAggregators(); mClusterMetricsUpdater = new HeartbeatThread(HeartbeatContext.MASTER_CLUSTER_METRICS_UPDATER, new ClusterMetricsUpdater(), Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL)); }
@Override public void heartbeat() { long masterTimeoutMs = Configuration.getMs(PropertyKey.MASTER_HEARTBEAT_TIMEOUT); for (MasterInfo master : mMasters) { synchronized (master) { final long lastUpdate = mClock.millis() - master.getLastUpdatedTimeMs(); if (lastUpdate > masterTimeoutMs) { LOG.error("The master {}({}) timed out after {}ms without a heartbeat!", master.getId(), master.getAddress(), lastUpdate); mLostMasters.add(master); mMasters.remove(master); mMasterConfigStore.handleNodeLost(master.getAddress()); } } } }
@Override public void heartbeat() { long masterWorkerTimeoutMs = Configuration.getMs(PropertyKey.MASTER_WORKER_TIMEOUT_MS); for (MasterWorkerInfo worker : mWorkers) { synchronized (worker) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if (lastUpdate > masterWorkerTimeoutMs) { LOG.error("The worker {}({}) timed out after {}ms without a heartbeat!", worker.getId(), worker.getWorkerAddress(), lastUpdate); mLostWorkers.add(worker); mWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer<Address> function : mWorkerLostListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); } processWorkerRemovedBlocks(worker, worker.getBlocks()); } } } }