/** * Get config with metrics configuration and shared kafka configuration */ public static Config getKafkaAndMetricsConfigFromProperties(Properties props) { return ConfigUtils.propertiesToConfig(props, Optional.of(ConfigurationKeys.METRICS_CONFIGURATIONS_PREFIX)) .withFallback(ConfigUtils.propertiesToConfig(props, Optional.of(ConfigurationKeys.SHARED_KAFKA_CONFIG_PREFIX))); }
static public Consumer getKafkaConsumer(Config config) { List<String> brokers = ConfigUtils.getStringList(config, ConfigurationKeys.KAFKA_BROKERS); Properties props = new Properties(); props.put("bootstrap.servers", Joiner.on(",").join(brokers)); props.put("group.id", ConfigUtils.getString(config, ConfigurationKeys.JOB_NAME_KEY, StringUtils.EMPTY)); props.put("enable.auto.commit", "false"); Preconditions.checkArgument(config.hasPath(TOPIC_KEY_DESERIALIZER)); props.put("key.deserializer", config.getString(TOPIC_KEY_DESERIALIZER)); Preconditions.checkArgument(config.hasPath(TOPIC_VALUE_DESERIALIZER)); props.put("value.deserializer", config.getString(TOPIC_VALUE_DESERIALIZER)); // pass along any config scoped under source.kafka.config // one use case of this is to pass SSL configuration Config scopedConfig = ConfigUtils.getConfigOrEmpty(config, KAFKA_CONSUMER_CONFIG_PREFIX); props.putAll(ConfigUtils.configToProperties(scopedConfig)); Consumer consumer = null; try { consumer = new KafkaConsumer<>(props); } catch (Exception e) { LOG.error("Exception when creating Kafka consumer - {}", e); throw Throwables.propagate(e); } return consumer; }
private void configureIfNeeded(Config config) { if (!configured.get()) { num_parallelism = ConfigUtils.getInt(config, "source.numParallelism", DEFAULT_NUM_PARALLELISM); namespace = ConfigUtils.getString(config, "source.namespace", DEFAULT_NAMESPACE); table = ConfigUtils.getString(config, "source.table", DEFAULT_TABLE); numRecordsPerExtract = ConfigUtils.getInt(config, "source.numRecordsPerExtract", DEFAULT_NUM_RECORDS_PER_EXTRACT); sleepTimePerRecord = ConfigUtils.getLong(config, "source.sleepTimePerRecordMillis", DEFAULT_SLEEP_TIME_PER_RECORD_MILLIS); streaming = (ConfigUtils.getString(config, "task.executionMode", "BATCH").equalsIgnoreCase("STREAMING")); if (streaming) { numRecordsPerExtract = Integer.MAX_VALUE; } configured.set(true); } }
private Kafka08ConsumerClient(Config config) { super(config); bufferSize = ConfigUtils.getInt(config, CONFIG_KAFKA_BUFFER_SIZE_BYTES, CONFIG_KAFKA_BUFFER_SIZE_BYTES_DEFAULT); clientName = ConfigUtils.getString(config, CONFIG_KAFKA_CLIENT_NAME, CONFIG_KAFKA_CLIENT_NAME_DEFAULT); fetchCorrelationId = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID, CONFIG_KAFKA_FETCH_REQUEST_CORRELATION_ID_DEFAULT); fetchTopicRetries = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES, CONFIG_KAFKA_FETCH_TOPIC_NUM_TRIES_DEFAULT); fetchOffsetRetries = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES, CONFIG_KAFKA_FETCH_OFFSET_NUM_TRIES_DEFAULT); }
public AbstractBaseKafkaConsumerClient(Config config) { this.brokers = ConfigUtils.getStringList(config, ConfigurationKeys.KAFKA_BROKERS); if (this.brokers.isEmpty()) { throw new IllegalArgumentException("Need to specify at least one Kafka broker."); } this.socketTimeoutMillis = ConfigUtils.getInt(config, CONFIG_KAFKA_SOCKET_TIMEOUT_VALUE, CONFIG_KAFKA_SOCKET_TIMEOUT_VALUE_DEFAULT); this.fetchTimeoutMillis = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_TIMEOUT_VALUE, CONFIG_KAFKA_FETCH_TIMEOUT_VALUE_DEFAULT); this.fetchMinBytes = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_REQUEST_MIN_BYTES, CONFIG_KAFKA_FETCH_REQUEST_MIN_BYTES_DEFAULT); Preconditions.checkArgument((this.fetchTimeoutMillis < this.socketTimeoutMillis), "Kafka Source configuration error: FetchTimeout " + this.fetchTimeoutMillis + " must be smaller than SocketTimeout " + this.socketTimeoutMillis); }
/** * Create a work unit for each configuration defined or a single work unit if no configurations are defined * @param state see {@link org.apache.gobblin.configuration.SourceState} * @return list of workunits */ @Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); Config sourceConfig = ConfigUtils.getConfigOrEmpty(config, DATASET_CLEANER_SOURCE_PREFIX); List<String> configurationNames = ConfigUtils.getStringList(config, DATASET_CLEANER_CONFIGURATIONS); // use a dummy configuration name if none set if (configurationNames.isEmpty()) { configurationNames = ImmutableList.of("DummyConfig"); } for (String configurationName: configurationNames) { WorkUnit workUnit = WorkUnit.createEmpty(); // specific configuration prefixed by the configuration name has precedence over the source specific configuration // and the source specific configuration has precedence over the general configuration Config wuConfig = ConfigUtils.getConfigOrEmpty(sourceConfig, configurationName).withFallback(sourceConfig) .withFallback(config); workUnit.setProps(ConfigUtils.configToProperties(wuConfig), new Properties()); TaskUtils.setTaskFactoryClass(workUnit, DatasetCleanerTaskFactory.class); workUnits.add(workUnit); } return workUnits; }
public TimeBasedDatasetStoreDataset(Key key, List<DatasetStateStoreEntryManager> entries, Properties props) { super(key, entries); this.versionFinder = new TimestampedDatasetStateStoreVersionFinder(); Config propsAsConfig = ConfigUtils.propertiesToConfig(props); // strip the retention config namespace since the selection policy looks for configuration without the namespace Config retentionConfig = ConfigUtils.getConfigOrEmpty(propsAsConfig, ConfigurableCleanableDataset.RETENTION_CONFIGURATION_KEY); Config retentionConfigWithFallback = retentionConfig.withFallback(propsAsConfig); this.versionSelectionPolicy = createSelectionPolicy(ConfigUtils.getString(retentionConfigWithFallback, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS), retentionConfigWithFallback, props); }
static Properties getProducerProperties(Properties props) { Config config = ConfigUtils.propertiesToConfig(props); // get the "writer.kafka.producerConfig" config for producer config to pass along to Kafka with a fallback to the // shared config that start with "gobblin.kafka.sharedConfig" Config producerConfig = ConfigUtils.getConfigOrEmpty(config, KAFKA_PRODUCER_CONFIG_PREFIX_NO_DOT).withFallback( ConfigUtils.getConfigOrEmpty(config, ConfigurationKeys.SHARED_KAFKA_CONFIG_PREFIX)); Properties producerProperties = ConfigUtils.configToProperties(producerConfig); // Provide default properties if not set from above setDefaultIfUnset(producerProperties, KEY_SERIALIZER_CONFIG, DEFAULT_KEY_SERIALIZER); setDefaultIfUnset(producerProperties, VALUE_SERIALIZER_CONFIG, DEFAULT_VALUE_SERIALIZER); setDefaultIfUnset(producerProperties, CLIENT_ID_CONFIG, CLIENT_ID_DEFAULT); setDefaultIfUnset(producerProperties, KAFKA_SCHEMA_REGISTRY_SWITCH_NAME, KAFKA_SCHEMA_REGISTRY_SWITCH_NAME_DEFAULT); return producerProperties; }
private ConversionConfig(Config config, Table table, String destinationFormat) { super(config, table); // Required this.destinationFormat = destinationFormat; // Optional this.destinationViewName = Optional.fromNullable(resolveTemplate(ConfigUtils.getString(config, DESTINATION_VIEW_KEY, null), table)); this.updateViewAlwaysEnabled = ConfigUtils.getBoolean(config, UPDATE_VIEW_ALWAYS_ENABLED, true); }
@Override public DataWriter build() throws IOException { State state = this.destination.getProperties(); Properties taskProps = state.getProperties(); Config config = ConfigUtils.propertiesToConfig(taskProps); CouchbaseEnvironment couchbaseEnvironment = CouchbaseEnvironmentFactory.getInstance(config); //TODO: Read config to decide whether to build a blocking writer or an async writer double failureAllowance = ConfigUtils.getDouble(config, CouchbaseWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_CONFIG, CouchbaseWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0; boolean retriesEnabled = ConfigUtils.getBoolean(config, CouchbaseWriterConfigurationKeys.RETRIES_ENABLED, CouchbaseWriterConfigurationKeys.RETRIES_ENABLED_DEFAULT); int maxRetries = ConfigUtils.getInt(config, CouchbaseWriterConfigurationKeys.MAX_RETRIES, CouchbaseWriterConfigurationKeys.MAX_RETRIES_DEFAULT); // build an async couchbase writer AsyncDataWriter couchbaseWriter = new CouchbaseWriter(couchbaseEnvironment, config); return AsyncWriterManager.builder() .asyncDataWriter(couchbaseWriter) .failureAllowanceRatio(failureAllowance) .retriesEnabled(retriesEnabled) .numRetries(maxRetries) .config(config) .build(); } }
/** * Return string value at <code>path</code> if <code>config</code> has path. If not return an empty string * * @param config in which the path may be present * @param path key to look for in the config object * @return string value at <code>path</code> if <code>config</code> has path. If not return an empty string */ public static String emptyIfNotPresent(Config config, String path) { return getString(config, path, StringUtils.EMPTY); }
/** * Convert a given {@link Config} instance to a {@link Properties} instance. * * @param config the given {@link Config} instance * @param prefix only properties whose name starts with the prefix will be returned. * @return a {@link Properties} instance */ public static Properties configToProperties(Config config, String prefix) { return configToProperties(config, Optional.of(prefix)); }
public AzkabanProjectConfig(JobSpec jobSpec) { // Extract config objects this.jobSpec = jobSpec; Config defaultConfig = ConfigFactory.load(ServiceAzkabanConfigKeys.DEFAULT_AZKABAN_PROJECT_CONFIG_FILE); Config config = jobSpec.getConfig().withFallback(defaultConfig); // Azkaban Infrastructure this.azkabanServerUrl = config.getString(ServiceAzkabanConfigKeys.AZKABAN_SERVER_URL_KEY); // Azkaban Project Metadata this.azkabanProjectName = constructProjectName(jobSpec, config); this.azkabanProjectDescription = config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_DESCRIPTION_KEY); this.azkabanProjectFlowName = config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_FLOW_NAME_KEY); this.azkabanGroupAdminUsers = ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_GROUP_ADMINS_KEY, ""); this.azkabanUserToProxy = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_USER_TO_PROXY_KEY, null)); // Azkaban Project Zip this.azkabanZipJarNames = Optional.ofNullable(ConfigUtils.getStringList(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_NAMES_KEY)); this.azkabanZipJarUrlTemplate = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_URL_TEMPLATE_KEY, null)); this.azkabanZipJarVersion = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_VERSION_KEY, null)); if (config.hasPath(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY) && StringUtils.isNotBlank(config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY))) { this.azkabanZipAdditionalFiles = Optional.ofNullable( ConfigUtils.getStringList(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY)); } else { this.azkabanZipAdditionalFiles = Optional.empty(); } this.failIfJarNotFound = ConfigUtils.getBoolean(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_FAIL_IF_JARNOTFOUND_KEY, false); }
@Override public DataWriter build() throws IOException { State state = this.destination.getProperties(); Properties taskProps = state.getProperties(); Config config = ConfigUtils.propertiesToConfig(taskProps); long commitTimeoutMillis = ConfigUtils.getLong(config, BatchedEventhubDataWriter.COMMIT_TIMEOUT_MILLIS_CONFIG, BatchedEventhubDataWriter.COMMIT_TIMEOUT_MILLIS_DEFAULT); long commitStepWaitTimeMillis = ConfigUtils.getLong(config, BatchedEventhubDataWriter.COMMIT_STEP_WAIT_TIME_CONFIG, BatchedEventhubDataWriter.COMMIT_STEP_WAIT_TIME_DEFAULT); double failureAllowance = ConfigUtils.getDouble(config, BatchedEventhubDataWriter.FAILURE_ALLOWANCE_PCT_CONFIG, BatchedEventhubDataWriter.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0; return AsyncWriterManager.builder() .config(config) .commitTimeoutMillis(commitTimeoutMillis) .commitStepWaitTimeInMillis(commitStepWaitTimeMillis) .failureAllowanceRatio(failureAllowance) .retriesEnabled(false) .asyncDataWriter(getAsyncDataWriter(taskProps)).maxOutstandingWrites(10000) .build(); } }
/** * creates a new {@link BasicDataSource} * @param config the properties used for datasource instantiation * @return */ public static BasicDataSource newDataSource(Config config) { BasicDataSource basicDataSource = new BasicDataSource(); PasswordManager passwordManager = PasswordManager.getInstance(ConfigUtils.configToProperties(config)); basicDataSource.setDriverClassName(ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_DB_JDBC_DRIVER_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER)); // MySQL server can timeout a connection so need to validate connections before use basicDataSource.setValidationQuery("select 1"); basicDataSource.setTestOnBorrow(true); basicDataSource.setDefaultAutoCommit(false); basicDataSource.setTimeBetweenEvictionRunsMillis(60000); basicDataSource.setUrl(config.getString(ConfigurationKeys.STATE_STORE_DB_URL_KEY)); basicDataSource.setUsername(passwordManager.readPassword( config.getString(ConfigurationKeys.STATE_STORE_DB_USER_KEY))); basicDataSource.setPassword(passwordManager.readPassword( config.getString(ConfigurationKeys.STATE_STORE_DB_PASSWORD_KEY))); basicDataSource.setMinEvictableIdleTimeMillis( ConfigUtils.getLong(config, ConfigurationKeys.STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME)); return basicDataSource; }
public CouchbaseWriter(CouchbaseEnvironment couchbaseEnvironment, Config config) { List<String> hosts = ConfigUtils.getStringList(config, CouchbaseWriterConfigurationKeys.BOOTSTRAP_SERVERS); String password = ConfigUtils.getString(config, CouchbaseWriterConfigurationKeys.PASSWORD, ""); String bucketName = ConfigUtils.getString(config, CouchbaseWriterConfigurationKeys.BUCKET, CouchbaseWriterConfigurationKeys.BUCKET_DEFAULT); _cluster = CouchbaseCluster.create(couchbaseEnvironment, hosts); if(!password.isEmpty()) { _bucket = _cluster.openBucket(bucketName, password, Collections.<Transcoder<? extends Document, ?>>singletonList(_tupleDocumentTranscoder)); } else { _bucket = _cluster.openBucket(bucketName, Collections.<Transcoder<? extends Document, ?>>singletonList(_tupleDocumentTranscoder)); } _operationTimeout = ConfigUtils.getLong(config, CouchbaseWriterConfigurationKeys.OPERATION_TIMEOUT_MILLIS, CouchbaseWriterConfigurationKeys.OPERATION_TIMEOUT_DEFAULT); _operationTimeunit = TimeUnit.MILLISECONDS; _defaultWriteResponseMapper = new GenericWriteResponseWrapper<>(); log.info("Couchbase writer configured with: hosts: {}, bucketName: {}, operationTimeoutInMillis: {}", hosts, bucketName, _operationTimeout); }
/** * return an identifier for the data source based on the configuration * @param config configuration * @return a {@link String} to identify the data source */ public static String getDataSourceId(Config config) { PasswordManager passwordManager = PasswordManager.getInstance(ConfigUtils.configToProperties(config)); return ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_DB_JDBC_DRIVER_KEY, ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER) + "::" + config.getString(ConfigurationKeys.STATE_STORE_DB_URL_KEY) + "::" + passwordManager.readPassword(config.getString(ConfigurationKeys.STATE_STORE_DB_USER_KEY)); }
public GobblinHelixDistributeJobExecutionLauncher(Builder builder) throws Exception { if (builder.taskDriverHelixManager.isPresent()) { this.planningJobHelixManager = builder.taskDriverHelixManager.get(); } else { this.planningJobHelixManager = builder.jobHelixManager; } this.helixTaskDriver = new TaskDriver(this.planningJobHelixManager); this.sysProps = builder.sysProps; this.jobPlanningProps = builder.jobPlanningProps; this.jobSubmitted = false; Config combined = ConfigUtils.propertiesToConfig(jobPlanningProps) .withFallback(ConfigUtils.propertiesToConfig(sysProps)); this.workFlowExpiryTimeSeconds = ConfigUtils.getLong(combined, GobblinClusterConfigurationKeys.HELIX_WORKFLOW_EXPIRY_TIME_SECONDS, GobblinClusterConfigurationKeys.DEFAULT_HELIX_WORKFLOW_EXPIRY_TIME_SECONDS); this.planningJobLauncherMetrics = builder.planningJobLauncherMetrics; this.helixMetrics = builder.helixMetrics; this.jobsMapping = builder.jobsMapping; this.helixJobStopTimeoutSeconds = ConfigUtils.getLong(combined, GobblinClusterConfigurationKeys.HELIX_JOB_STOP_TIMEOUT_SECONDS, GobblinClusterConfigurationKeys.DEFAULT_HELIX_JOB_STOP_TIMEOUT_SECONDS); }
public StageableTableMetadata(Config config, @Nullable Table referenceTable) { Preconditions.checkArgument(config.hasPath(DESTINATION_TABLE_KEY), String.format("Key %s is not specified", DESTINATION_TABLE_KEY)); Preconditions.checkArgument(config.hasPath(DESTINATION_DB_KEY), String.format("Key %s is not specified", DESTINATION_DB_KEY)); Preconditions.checkArgument(config.hasPath(DESTINATION_DATA_PATH_KEY), String.format("Key %s is not specified", DESTINATION_DATA_PATH_KEY)); // Required this.destinationTableName = referenceTable == null ? config.getString(DESTINATION_TABLE_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_TABLE_KEY), referenceTable); this.destinationStagingTableName = String.format("%s_%s", this.destinationTableName, "staging"); // Fixed and non-configurable this.destinationDbName = referenceTable == null ? config.getString(DESTINATION_DB_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_DB_KEY), referenceTable); this.destinationDataPath = referenceTable == null ? config.getString(DESTINATION_DATA_PATH_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_DATA_PATH_KEY), referenceTable); // Optional this.destinationTableProperties = convertKeyValueListToProperties(ConfigUtils.getStringList(config, DESTINATION_TABLE_PROPERTIES_LIST_KEY)); this.clusterBy = ConfigUtils.getStringList(config, CLUSTER_BY_KEY); this.numBuckets = Optional.fromNullable(ConfigUtils.getInt(config, NUM_BUCKETS_KEY, null)); this.hiveRuntimeProperties = convertKeyValueListToProperties(ConfigUtils.getStringList(config, HIVE_RUNTIME_PROPERTIES_LIST_KEY)); this.evolutionEnabled = ConfigUtils.getBoolean(config, EVOLUTION_ENABLED, false); this.rowLimit = Optional.fromNullable(ConfigUtils.getInt(config, ROW_LIMIT_KEY, null)); this.sourceDataPathIdentifier = ConfigUtils.getStringList(config, SOURCE_DATA_PATH_IDENTIFIER_KEY); }
@Test public void testPropertiesToConfigToState() { Properties properties = new Properties(); properties.setProperty("k1.kk1", "v1"); properties.setProperty("k1.kk2", "v2"); properties.setProperty("k2.kk", "v3"); properties.setProperty("k3", "v4"); properties.setProperty("k3.kk1", "v5"); properties.setProperty("k3.kk1.kkk1", "v6"); Config conf = ConfigUtils.propertiesToConfig(properties); State state = ConfigUtils.configToState(conf); Assert.assertEquals(state.getProp("k1.kk1"), "v1"); Assert.assertEquals(state.getProp("k1.kk2"), "v2"); Assert.assertEquals(state.getProp("k2.kk"), "v3"); Assert.assertEquals(state.getProp("k3"), "v4"); Assert.assertEquals(state.getProp("k3.kk1"), "v5"); Assert.assertEquals(state.getProp("k3.kk1.kkk1"), "v6"); }