public HiddenFilterSelectionPolicy(Config config) { if (config.hasPath(HIDDEN_FILTER_HIDDEN_FILE_PREFIX_KEY)) { this.hiddenFilePrefixes = ConfigUtils.getStringList(config, HIDDEN_FILTER_HIDDEN_FILE_PREFIX_KEY); } else { this.hiddenFilePrefixes = Arrays.asList(DEFAULT_HIDDEN_FILE_PREFIXES); } }
/** * This method first retrieves the logical names of all the {@link org.apache.gobblin.runtime.api.SpecExecutor}s * for this edge and returns the SpecExecutors from the {@link TopologySpec} map. * @param edgeConfig containing the logical names of SpecExecutors for this edge. * @return a {@link List<SpecExecutor>}s for this edge. */ private List<SpecExecutor> getSpecExecutors(Config edgeConfig) throws URISyntaxException { //Get the logical names of SpecExecutors where the FlowEdge can be executed. List<String> specExecutorNames = ConfigUtils.getStringList(edgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY); //Load all the SpecExecutor configurations for this FlowEdge from the SpecExecutor Catalog. List<SpecExecutor> specExecutors = new ArrayList<>(); for (String specExecutorName: specExecutorNames) { URI specExecutorUri = new URI(specExecutorName); specExecutors.add(this.topologySpecMap.get(specExecutorUri).getSpecExecutor()); } return specExecutors; }
@SuppressWarnings("unchecked") public CombineSelectionPolicy(Config config, Properties jobProps) throws IOException { Preconditions.checkArgument(config.hasPath(VERSION_SELECTION_POLICIES_PREFIX), "Combine operation not specified."); ImmutableList.Builder<VersionSelectionPolicy<DatasetVersion>> builder = ImmutableList.builder(); for (String combineClassName : ConfigUtils.getStringList(config, VERSION_SELECTION_POLICIES_PREFIX)) { try { builder.add((VersionSelectionPolicy<DatasetVersion>) GobblinConstructorUtils .invokeFirstConstructor(Class.forName(combineClassName), ImmutableList.<Object>of(config), ImmutableList.<Object>of(jobProps))); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) { throw new IllegalArgumentException(e); } } this.selectionPolicies = builder.build(); if (this.selectionPolicies.size() == 0) { throw new IOException("No selection policies specified for " + CombineSelectionPolicy.class.getCanonicalName()); } this.combineOperation = CombineOperation.valueOf(config.getString(VERSION_SELECTION_COMBINE_OPERATION).toUpperCase()); }
static public Consumer getKafkaConsumer(Config config) { List<String> brokers = ConfigUtils.getStringList(config, ConfigurationKeys.KAFKA_BROKERS); Properties props = new Properties(); props.put("bootstrap.servers", Joiner.on(",").join(brokers)); props.put("group.id", ConfigUtils.getString(config, ConfigurationKeys.JOB_NAME_KEY, StringUtils.EMPTY)); props.put("enable.auto.commit", "false"); Preconditions.checkArgument(config.hasPath(TOPIC_KEY_DESERIALIZER)); props.put("key.deserializer", config.getString(TOPIC_KEY_DESERIALIZER)); Preconditions.checkArgument(config.hasPath(TOPIC_VALUE_DESERIALIZER)); props.put("value.deserializer", config.getString(TOPIC_VALUE_DESERIALIZER)); // pass along any config scoped under source.kafka.config // one use case of this is to pass SSL configuration Config scopedConfig = ConfigUtils.getConfigOrEmpty(config, KAFKA_CONSUMER_CONFIG_PREFIX); props.putAll(ConfigUtils.configToProperties(scopedConfig)); Consumer consumer = null; try { consumer = new KafkaConsumer<>(props); } catch (Exception e) { LOG.error("Exception when creating Kafka consumer - {}", e); throw Throwables.propagate(e); } return consumer; }
public StageableTableMetadata(Config config, @Nullable Table referenceTable) { Preconditions.checkArgument(config.hasPath(DESTINATION_TABLE_KEY), String.format("Key %s is not specified", DESTINATION_TABLE_KEY)); Preconditions.checkArgument(config.hasPath(DESTINATION_DB_KEY), String.format("Key %s is not specified", DESTINATION_DB_KEY)); Preconditions.checkArgument(config.hasPath(DESTINATION_DATA_PATH_KEY), String.format("Key %s is not specified", DESTINATION_DATA_PATH_KEY)); // Required this.destinationTableName = referenceTable == null ? config.getString(DESTINATION_TABLE_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_TABLE_KEY), referenceTable); this.destinationStagingTableName = String.format("%s_%s", this.destinationTableName, "staging"); // Fixed and non-configurable this.destinationDbName = referenceTable == null ? config.getString(DESTINATION_DB_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_DB_KEY), referenceTable); this.destinationDataPath = referenceTable == null ? config.getString(DESTINATION_DATA_PATH_KEY) : HiveDataset.resolveTemplate(config.getString(DESTINATION_DATA_PATH_KEY), referenceTable); // Optional this.destinationTableProperties = convertKeyValueListToProperties(ConfigUtils.getStringList(config, DESTINATION_TABLE_PROPERTIES_LIST_KEY)); this.clusterBy = ConfigUtils.getStringList(config, CLUSTER_BY_KEY); this.numBuckets = Optional.fromNullable(ConfigUtils.getInt(config, NUM_BUCKETS_KEY, null)); this.hiveRuntimeProperties = convertKeyValueListToProperties(ConfigUtils.getStringList(config, HIVE_RUNTIME_PROPERTIES_LIST_KEY)); this.evolutionEnabled = ConfigUtils.getBoolean(config, EVOLUTION_ENABLED, false); this.rowLimit = Optional.fromNullable(ConfigUtils.getInt(config, ROW_LIMIT_KEY, null)); this.sourceDataPathIdentifier = ConfigUtils.getStringList(config, SOURCE_DATA_PATH_IDENTIFIER_KEY); }
public AbstractBaseKafkaConsumerClient(Config config) { this.brokers = ConfigUtils.getStringList(config, ConfigurationKeys.KAFKA_BROKERS); if (this.brokers.isEmpty()) { throw new IllegalArgumentException("Need to specify at least one Kafka broker."); } this.socketTimeoutMillis = ConfigUtils.getInt(config, CONFIG_KAFKA_SOCKET_TIMEOUT_VALUE, CONFIG_KAFKA_SOCKET_TIMEOUT_VALUE_DEFAULT); this.fetchTimeoutMillis = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_TIMEOUT_VALUE, CONFIG_KAFKA_FETCH_TIMEOUT_VALUE_DEFAULT); this.fetchMinBytes = ConfigUtils.getInt(config, CONFIG_KAFKA_FETCH_REQUEST_MIN_BYTES, CONFIG_KAFKA_FETCH_REQUEST_MIN_BYTES_DEFAULT); Preconditions.checkArgument((this.fetchTimeoutMillis < this.socketTimeoutMillis), "Kafka Source configuration error: FetchTimeout " + this.fetchTimeoutMillis + " must be smaller than SocketTimeout " + this.socketTimeoutMillis); }
public AzkabanProjectConfig(JobSpec jobSpec) { // Extract config objects this.jobSpec = jobSpec; Config defaultConfig = ConfigFactory.load(ServiceAzkabanConfigKeys.DEFAULT_AZKABAN_PROJECT_CONFIG_FILE); Config config = jobSpec.getConfig().withFallback(defaultConfig); // Azkaban Infrastructure this.azkabanServerUrl = config.getString(ServiceAzkabanConfigKeys.AZKABAN_SERVER_URL_KEY); // Azkaban Project Metadata this.azkabanProjectName = constructProjectName(jobSpec, config); this.azkabanProjectDescription = config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_DESCRIPTION_KEY); this.azkabanProjectFlowName = config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_FLOW_NAME_KEY); this.azkabanGroupAdminUsers = ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_GROUP_ADMINS_KEY, ""); this.azkabanUserToProxy = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_USER_TO_PROXY_KEY, null)); // Azkaban Project Zip this.azkabanZipJarNames = Optional.ofNullable(ConfigUtils.getStringList(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_NAMES_KEY)); this.azkabanZipJarUrlTemplate = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_URL_TEMPLATE_KEY, null)); this.azkabanZipJarVersion = Optional.ofNullable(ConfigUtils.getString(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_JAR_VERSION_KEY, null)); if (config.hasPath(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY) && StringUtils.isNotBlank(config.getString(ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY))) { this.azkabanZipAdditionalFiles = Optional.ofNullable( ConfigUtils.getStringList(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_ADDITIONAL_FILE_URLS_KEY)); } else { this.azkabanZipAdditionalFiles = Optional.empty(); } this.failIfJarNotFound = ConfigUtils.getBoolean(config, ServiceAzkabanConfigKeys.AZKABAN_PROJECT_ZIP_FAIL_IF_JARNOTFOUND_KEY, false); }
this.destFormats = Sets.newHashSet(ConfigUtils.getStringList(this.datasetConfig, DESTINATION_CONVERSION_FORMATS_KEY));
List<String> entryList = ConfigUtils.getStringList(config, FIELDS_RENAME_MAP); this.fieldsRenameMap = HashBiMap.create(); for (String entry : entryList) { this.mapFieldName = mapFieldNames.get(0); for (String fieldToFlatten : ConfigUtils.getStringList(config, FIELDS_TO_FLATTEN)) { String newFieldName = this.fieldsRenameMap.containsKey(fieldToFlatten) ? this.fieldsRenameMap.get(fieldToFlatten) : fieldToFlatten;
super(actionConfig, fs, jobConfig); this.embeddedAccessControlActions = Lists.newArrayList(); for (String policy : ConfigUtils.getStringList(actionConfig, POLICIES_KEY)) { Preconditions.checkArgument( actionConfig.hasPath(policy),
/** * Helix participant cannot pre-configure tags before it connects to ZK. So this method can only be invoked after * {@link HelixManager#connect()}. However this will still work because tagged jobs won't be sent to a non-tagged instance. Hence * the job with EXAMPLE_INSTANCE_TAG will remain in the ZK until an instance with EXAMPLE_INSTANCE_TAG was found. */ private void addInstanceTags() { List<String> tags = ConfigUtils.getStringList(this.config, GobblinClusterConfigurationKeys.HELIX_INSTANCE_TAGS_KEY); HelixManager receiverManager = getReceiverManager(); if (receiverManager.isConnected()) { if (!tags.isEmpty()) { logger.info("Adding tags binding " + tags); tags.forEach(tag -> receiverManager.getClusterManagmentTool() .addInstanceTag(this.clusterName, this.helixInstanceName, tag)); logger.info("Actual tags binding " + receiverManager.getClusterManagmentTool() .getInstanceConfig(this.clusterName, this.helixInstanceName).getTags()); } } }
} else { List<String> hosts = ConfigUtils.getStringList(config, ElasticsearchWriterConfigurationKeys.ELASTICSEARCH_WRITER_HOSTS);
List<String> updatedDependenciesList = new ArrayList<>(); if (jobSpec.getConfig().hasPath(ConfigurationKeys.JOB_DEPENDENCIES)) { for (String dependency : ConfigUtils.getStringList(jobSpec.getConfig(), ConfigurationKeys.JOB_DEPENDENCIES)) { if (!templateToJobNameMap.containsKey(dependency)) {
/** * Create a work unit for each configuration defined or a single work unit if no configurations are defined * @param state see {@link org.apache.gobblin.configuration.SourceState} * @return list of workunits */ @Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); Config sourceConfig = ConfigUtils.getConfigOrEmpty(config, DATASET_CLEANER_SOURCE_PREFIX); List<String> configurationNames = ConfigUtils.getStringList(config, DATASET_CLEANER_CONFIGURATIONS); // use a dummy configuration name if none set if (configurationNames.isEmpty()) { configurationNames = ImmutableList.of("DummyConfig"); } for (String configurationName: configurationNames) { WorkUnit workUnit = WorkUnit.createEmpty(); // specific configuration prefixed by the configuration name has precedence over the source specific configuration // and the source specific configuration has precedence over the general configuration Config wuConfig = ConfigUtils.getConfigOrEmpty(sourceConfig, configurationName).withFallback(sourceConfig) .withFallback(config); workUnit.setProps(ConfigUtils.configToProperties(wuConfig), new Properties()); TaskUtils.setTaskFactoryClass(workUnit, DatasetCleanerTaskFactory.class); workUnits.add(workUnit); } return workUnits; }
public CouchbaseWriter(CouchbaseEnvironment couchbaseEnvironment, Config config) { List<String> hosts = ConfigUtils.getStringList(config, CouchbaseWriterConfigurationKeys.BOOTSTRAP_SERVERS); String password = ConfigUtils.getString(config, CouchbaseWriterConfigurationKeys.PASSWORD, ""); String bucketName = ConfigUtils.getString(config, CouchbaseWriterConfigurationKeys.BUCKET, CouchbaseWriterConfigurationKeys.BUCKET_DEFAULT); _cluster = CouchbaseCluster.create(couchbaseEnvironment, hosts); if(!password.isEmpty()) { _bucket = _cluster.openBucket(bucketName, password, Collections.<Transcoder<? extends Document, ?>>singletonList(_tupleDocumentTranscoder)); } else { _bucket = _cluster.openBucket(bucketName, Collections.<Transcoder<? extends Document, ?>>singletonList(_tupleDocumentTranscoder)); } _operationTimeout = ConfigUtils.getLong(config, CouchbaseWriterConfigurationKeys.OPERATION_TIMEOUT_MILLIS, CouchbaseWriterConfigurationKeys.OPERATION_TIMEOUT_DEFAULT); _operationTimeunit = TimeUnit.MILLISECONDS; _defaultWriteResponseMapper = new GenericWriteResponseWrapper<>(); log.info("Couchbase writer configured with: hosts: {}, bucketName: {}, operationTimeoutInMillis: {}", hosts, bucketName, _operationTimeout); }
/** * Returns the list of plugins as defined in the system configuration. These are the * defined in the PLUGINS_FULL_KEY config option. * The list also includes plugins that are automatically added by gobblin. * */ public List<GobblinInstancePluginFactory> getDefaultPlugins() { List<String> pluginNames = ConfigUtils.getStringList(getSysConfig().getConfig(), PLUGINS_FULL_KEY); List<GobblinInstancePluginFactory> pluginFactories = Lists.newArrayList(); // By default email notification plugin is added. if (!ConfigUtils.getBoolean(getSysConfig().getConfig(), EmailNotificationPlugin.EMAIL_NOTIFICATIONS_DISABLED_KEY, EmailNotificationPlugin.EMAIL_NOTIFICATIONS_DISABLED_DEFAULT)) { pluginFactories.add(new EmailNotificationPlugin.Factory()); } pluginFactories.addAll(Lists.transform(pluginNames, new Function<String, GobblinInstancePluginFactory>() { @Override public GobblinInstancePluginFactory apply(String input) { Class<? extends GobblinInstancePluginFactory> factoryClass; try { factoryClass = _aliasResolver.resolveClass(input); return factoryClass.newInstance(); } catch (ClassNotFoundException|InstantiationException|IllegalAccessException e) { throw new RuntimeException("Unable to instantiate plugin factory " + input + ": " + e, e); } } })); return pluginFactories; }
List<String> nestedKeys = ConfigUtils.getStringList(config, FIELDS_TO_FLATTEN);
Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(ImmutableMap.of("a.b", "1,2,3")), "a.b"), ImmutableList.of("1", "2", "3")); Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(ImmutableMap.of("a.b", "\"1\",\"2\",\"3\"")), "a.b"), ImmutableList.of("1", "2", "3")); Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(ImmutableMap.of("a.b", "\"1\",\"2,3\"")), "a.b"), ImmutableList.of("1", "2,3")); Assert.assertEquals(ConfigUtils.getStringList( ConfigFactory.empty().withValue("a.b", ConfigValueFactory.fromIterable(ImmutableList.of("1", "2","3"))), "a.b"), Assert.assertEquals(ConfigUtils.getStringList( ConfigFactory.empty().withValue("a.b", ConfigValueFactory.fromIterable(ImmutableList.of("\"1\"", "\"2\"","\"3\""))), "a.b"), Assert.assertEquals(ConfigUtils.getStringList( ConfigFactory.empty().withValue("a.b", ConfigValueFactory.fromIterable(ImmutableList.of("\"1\"", "\"2,3\""))), "a.b"), Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(ImmutableMap.of("key1", "value1,value2")), "key2"), ImmutableList.of()); Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(configMap), "key1"), ImmutableList.of()); Assert.assertEquals(ConfigUtils.getStringList(ConfigFactory.parseMap(configMap), "key2"), ImmutableList.of());
List<String> destNodeIds = ConfigUtils.getStringList(flowConfig, ServiceConfigKeys.FLOW_DESTINATION_IDENTIFIER_KEY); this.srcNode = this.flowGraph.getNode(srcNodeId); Preconditions.checkArgument(srcNode != null, "Flowgraph does not have a node with id " + srcNodeId);
FlowGraphConfigurationKeys.DEFAULT_FLOW_EDGE_FACTORY_CLASS)); FlowEdgeFactory flowEdgeFactory = (FlowEdgeFactory) GobblinConstructorUtils.invokeLongestConstructor(flowEdgeFactoryClass, config); List<String> specExecutorNames = ConfigUtils.getStringList(flowEdgeConfig, FlowGraphConfigurationKeys.FLOW_EDGE_SPEC_EXECUTORS_KEY); List<SpecExecutor> specExecutors = new ArrayList<>(); for (String specExecutorName: specExecutorNames) {