private static Injector createInjector(CConfiguration cConf, Configuration hConf) { MapReduceContextConfig mapReduceContextConfig = new MapReduceContextConfig(hConf); // principal will be null if running on a kerberos distributed cluster ProgramOptions programOptions = mapReduceContextConfig.getProgramOptions(); Arguments systemArgs = programOptions.getArguments(); String runId = systemArgs.getOption(ProgramOptionConstants.RUN_ID); return Guice.createInjector( new DistributedProgramContainerModule(cConf, hConf, mapReduceContextConfig.getProgramId().run(runId), systemArgs) ); } }
/** * Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using * the given program ClassLoader. */ private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) { Location programLocation; LocationFactory locationFactory = new LocalLocationFactory(); if (isLocal(contextConfig.getHConf())) { // Just create a local location factory. It's for temp usage only as the program location is always absolute. programLocation = locationFactory.create(contextConfig.getProgramJarURI()); } else { // In distributed mode, the program jar is localized to the container programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI()); } return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader); }
Parameters(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) { this(contextConfig.getCConf(), contextConfig.getHConf(), programClassLoader, contextConfig.getPlugins(), createPluginInstantiator(contextConfig, programClassLoader)); }
/** * Returns a new {@link PluginInstantiator} or {@code null} if no plugin is supported. */ @Nullable private static PluginInstantiator createPluginInstantiator(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) { String pluginArchive = contextConfig.getHConf().get(Constants.Plugin.ARCHIVE); if (pluginArchive == null) { return null; } return new PluginInstantiator(contextConfig.getCConf(), programClassLoader, new File(pluginArchive)); } }
public DistributedMapReduceTaskContextProvider(CConfiguration cConf, Configuration hConf, MapReduceClassLoader mapReduceClassLoader) { super(createInjector(cConf, hConf), mapReduceClassLoader); MapReduceContextConfig mapReduceContextConfig = new MapReduceContextConfig(hConf); Injector injector = getInjector(); Deque<Service> coreServices = new LinkedList<>(); coreServices.add(injector.getInstance(ZKClientService.class)); coreServices.add(injector.getInstance(MetricsCollectionService.class)); if (ProgramRunners.getClusterMode(mapReduceContextConfig.getProgramOptions()) == ClusterMode.ON_PREMISE) { coreServices.add(injector.getInstance(KafkaClientService.class)); } this.coreServices = coreServices; this.logAppenderInitializer = injector.getInstance(LogAppenderInitializer.class); this.mapReduceContextConfig = new MapReduceContextConfig(hConf); }
/** * Updates the {@link Configuration} of this class with the given parameters. * * @param context the context for the MapReduce program * @param conf the CDAP configuration * @param programJarURI The URI of the program JAR * @param localizedUserResources the localized resources for the MapReduce program */ public void set(BasicMapReduceContext context, CConfiguration conf, URI programJarURI, Map<String, String> localizedUserResources) { setProgramOptions(context.getProgramOptions()); setProgramId(context.getProgram().getId()); setApplicationSpecification(context.getApplicationSpecification()); setWorkflowProgramInfo(context.getWorkflowInfo()); setPlugins(context.getApplicationSpecification().getPlugins()); setProgramJarURI(programJarURI); setConf(conf); setLocalizedResources(localizedUserResources); setOutputs(context.getOutputs()); }
/** * Creates logging context for MapReduce program. If the program is started * by Workflow an instance of {@link WorkflowProgramLoggingContext} is returned, * otherwise an instance of {@link MapReduceLoggingContext} is returned. */ private LoggingContext createMapReduceLoggingContext() { MapReduceContextConfig contextConfig = new MapReduceContextConfig(parameters.getHConf()); ProgramId programId = contextConfig.getProgramId(); RunId runId = ProgramRunners.getRunId(contextConfig.getProgramOptions()); WorkflowProgramInfo workflowProgramInfo = contextConfig.getWorkflowProgramInfo(); if (workflowProgramInfo == null) { return new MapReduceLoggingContext(programId.getNamespace(), programId.getApplication(), programId.getProgram(), runId.getId()); } String workflowId = workflowProgramInfo.getName(); String workflowRunId = workflowProgramInfo.getRunId().getId(); return new WorkflowProgramLoggingContext(programId.getNamespace(), programId.getApplication(), workflowId, workflowRunId, ProgramType.MAPREDUCE, programId.getProgram(), runId.getId()); }
MapReduceContextConfig contextConfig = new MapReduceContextConfig(jobContext.getConfiguration()); ProgramId programId = contextConfig.getProgramId(); LOG.info("Setting up for MapReduce job: namespaceId={}, applicationId={}, program={}, runid={}", programId.getNamespace(), programId.getApplication(), programId.getProgram(), ProgramRunners.getRunId(contextConfig.getProgramOptions())); SystemArguments.getRetryStrategy(contextConfig.getProgramOptions().getUserArguments().asMap(), contextConfig.getProgramId().getType(), cConf); this.txClient = new RetryingLongTransactionSystemClient(injector.getInstance(TransactionSystemClient.class), retryStrategy); if (ProgramRunners.getClusterMode(contextConfig.getProgramOptions()) == ClusterMode.ON_PREMISE) { this.transaction = txClient.startLong(); this.outputs = Outputs.transform(contextConfig.getOutputs(), taskContext);
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration()); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration()); WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo(); DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions())); ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId)); MapReduceMetrics.TaskType taskType = null; String taskId = null; ProgramOptions options = contextConfig.getProgramOptions(); RuntimeArguments.extractScope( "task", taskType.toString().toLowerCase(), contextConfig.getProgramOptions().getUserArguments().asMap())), options.isDebug()); spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, transaction, programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService, mapReduceClassLoader, metadataReader, metadataPublisher, namespaceQueryAdmin
@Test public void testManyMacrosInAppSpec() { Configuration hConf = new Configuration(); MapReduceContextConfig cfg = new MapReduceContextConfig(hConf); StringBuilder appCfg = new StringBuilder(); for (int i = 0; i < 100; i++) { appCfg.append("${").append(i).append("}"); hConf.setInt(String.valueOf(i), i); } ApplicationSpecification appSpec = new DefaultApplicationSpecification( "name", "desc", appCfg.toString(), new ArtifactId("artifact", new ArtifactVersion("1.0.0"), ArtifactScope.USER), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap() ); cfg.setApplicationSpecification(appSpec); Assert.assertEquals(appSpec.getConfiguration(), cfg.getApplicationSpecification().getConfiguration()); }
/** * Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using * the given program ClassLoader. */ private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) { Location programLocation; LocationFactory locationFactory = new LocalLocationFactory(); // Use the program jar location regardless if local or distributed, since it is valid for both programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI()); return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader); }
@Test public void testGetPluginsWithMacrosMoreThan20() { Configuration hConf = new Configuration(); MapReduceContextConfig cfg = new MapReduceContextConfig(hConf); Map<String, Plugin> mockPlugins = new HashMap<>(); ArtifactId artifactId = new ArtifactId("plugins", new ArtifactVersion("1.0.0"), ArtifactScope.SYSTEM); Map<String, String> properties = new HashMap<>(); properties.put("path", "${input.directory}/${a}${b}${c}${d}${e}${f}${g}${h}${i}${j}" + "${k}${l}${m}${n}${o}${p}${q}${r}${s}${t}${u}${v}${w}${x}${y}${z}.txt"); hConf.set("input.directory", "/dummy/path"); String[] alphabetsArr = {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}; for (String alphabet : alphabetsArr) { hConf.set(alphabet, alphabet); } Set<ArtifactId> parents = new LinkedHashSet<>(); Plugin filePlugin1 = new Plugin(parents, artifactId, new PluginClass("type", "name", "desc", "clsname", "cfgfield", Collections.emptyMap()), PluginProperties.builder().addAll(properties).build()); mockPlugins.put("File1", filePlugin1); hConf.set(MapReduceContextConfig.HCONF_ATTR_PLUGINS, GSON.toJson(mockPlugins)); Map<String, Plugin> plugins = cfg.getPlugins(); Assert.assertEquals(plugins, mockPlugins); } }
/** * Creates a program {@link ClassLoader} based on the MR job config. */ private static ClassLoader createProgramClassLoader(MapReduceContextConfig contextConfig) { // In distributed mode, the program is created by expanding the program jar. // The program jar is localized to container with the program jar name. // It's ok to expand to a temp dir in local directory, as the YARN container will be gone. Location programLocation = Locations.toLocation(new File(contextConfig.getProgramJarName())); try { File unpackDir = DirUtils.createTempDir(new File(System.getProperty("user.dir"))); LOG.info("Create ProgramClassLoader from {}, expand to {}", programLocation, unpackDir); BundleJarUtil.unJar(programLocation, unpackDir); return new ProgramClassLoader(contextConfig.getCConf(), unpackDir, FilterClassLoader.create(contextConfig.getHConf().getClassLoader())); } catch (IOException e) { LOG.error("Failed to create ProgramClassLoader", e); throw Throwables.propagate(e); } }
MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); CConfiguration cConfCopy = CConfiguration.copy(cConf); if (hbaseDDLExecutorDirectory != null) { cConfCopy.set(Constants.HBaseDDLExecutor.EXTENSIONS_DIR, hbaseDDLExecutorDirectory); contextConfig.set(context, cConfCopy, programJar.toURI(), localizedUserResources);
private static MapReduceContextConfig createContextConfig() { Configuration conf = new Configuration(new YarnConfiguration()); conf.addResource(new Path(MRJobConfig.JOB_CONF_FILE)); return new MapReduceContextConfig(conf); }
/** * Updates the {@link Configuration} of this class with the given parameters. * * @param context the context for the MapReduce program * @param conf the CDAP configuration * @param programJarURI The URI of the program JAR * @param localizedUserResources the localized resources for the MapReduce program */ public void set(BasicMapReduceContext context, CConfiguration conf, URI programJarURI, Map<String, String> localizedUserResources) { setProgramOptions(context.getProgramOptions()); setProgramId(context.getProgram().getId()); setApplicationSpecification(context.getApplicationSpecification()); setWorkflowProgramInfo(context.getWorkflowInfo()); setPlugins(context.getApplicationSpecification().getPlugins()); setProgramJarURI(programJarURI); setConf(conf); setLocalizedResources(localizedUserResources); setOutputs(context.getOutputs()); }
/** * Creates logging context for MapReduce program. If the program is started * by Workflow an instance of {@link WorkflowProgramLoggingContext} is returned, * otherwise an instance of {@link MapReduceLoggingContext} is returned. */ private LoggingContext createMapReduceLoggingContext() { MapReduceContextConfig contextConfig = new MapReduceContextConfig(parameters.getHConf()); ProgramId programId = contextConfig.getProgramId(); RunId runId = ProgramRunners.getRunId(contextConfig.getProgramOptions()); WorkflowProgramInfo workflowProgramInfo = contextConfig.getWorkflowProgramInfo(); if (workflowProgramInfo == null) { return new MapReduceLoggingContext(programId.getNamespace(), programId.getApplication(), programId.getProgram(), runId.getId()); } String workflowId = workflowProgramInfo.getName(); String workflowRunId = workflowProgramInfo.getRunId().getId(); return new WorkflowProgramLoggingContext(programId.getNamespace(), programId.getApplication(), workflowId, workflowRunId, ProgramType.MAPREDUCE, programId.getProgram(), runId.getId()); }
MapReduceContextConfig contextConfig = new MapReduceContextConfig(jobContext.getConfiguration()); ProgramId programId = contextConfig.getProgramId(); LOG.info("Setting up for MapReduce job: namespaceId={}, applicationId={}, program={}, runid={}", programId.getNamespace(), programId.getApplication(), programId.getProgram(), ProgramRunners.getRunId(contextConfig.getProgramOptions())); SystemArguments.getRetryStrategy(contextConfig.getProgramOptions().getUserArguments().asMap(), contextConfig.getProgramId().getType(), cConf); this.txClient = new RetryingLongTransactionSystemClient(injector.getInstance(TransactionSystemClient.class), retryStrategy); if (ProgramRunners.getClusterMode(contextConfig.getProgramOptions()) == ClusterMode.ON_PREMISE) { this.transaction = txClient.startLong(); this.outputs = Outputs.transform(contextConfig.getOutputs(), taskContext);
MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration()); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration()); WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo(); DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions())); ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId)); MapReduceMetrics.TaskType taskType = null; String taskId = null; ProgramOptions options = contextConfig.getProgramOptions(); RuntimeArguments.extractScope( "task", taskType.toString().toLowerCase(), contextConfig.getProgramOptions().getUserArguments().asMap())), options.isDebug()); spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, transaction, programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService, mapReduceClassLoader, metadataReader, metadataPublisher
public DistributedMapReduceTaskContextProvider(CConfiguration cConf, Configuration hConf, MapReduceClassLoader mapReduceClassLoader) { super(createInjector(cConf, hConf), mapReduceClassLoader); MapReduceContextConfig mapReduceContextConfig = new MapReduceContextConfig(hConf); Injector injector = getInjector(); Deque<Service> coreServices = new LinkedList<>(); coreServices.add(injector.getInstance(ZKClientService.class)); coreServices.add(injector.getInstance(MetricsCollectionService.class)); if (ProgramRunners.getClusterMode(mapReduceContextConfig.getProgramOptions()) == ClusterMode.ON_PREMISE) { coreServices.add(injector.getInstance(KafkaClientService.class)); } this.coreServices = coreServices; this.logAppenderInitializer = injector.getInstance(LogAppenderInitializer.class); this.mapReduceContextConfig = new MapReduceContextConfig(hConf); }