final TezConfiguration tezConfig = new TezConfiguration(true); tezConfig.addResource(conf); tezConfig.set(TezConfiguration.TEZ_AM_STAGING_DIR, tezScratchDir.toUri().toString()); conf.stripHiddenConfigurations(tezConfig); n = Math.max(tezConfig.getInt( TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS_DEFAULT), n); tezConfig.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, n);
String env = conf.get(MRJobConfig.MR_AM_ADMIN_USER_ENV); if (conf.get(MRJobConfig.MR_AM_ENV) != null) { env = (env == null) ? conf.get(MRJobConfig.MR_AM_ENV) : env + "," + conf.get(MRJobConfig.MR_AM_ENV); conf.setIfUnset(TezConfiguration.TEZ_AM_LAUNCH_ENV, env); conf.setIfUnset(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, org.apache.tez.mapreduce.hadoop.MRHelpers.getJavaOptsForMRAM(conf)); String queueName = conf.get(JobContext.QUEUE_NAME, YarnConfiguration.DEFAULT_QUEUE_NAME); conf.setIfUnset(TezConfiguration.TEZ_QUEUE_NAME, queueName); int amMemMB = conf.getInt(MRJobConfig.MR_AM_VMEM_MB, MRJobConfig.DEFAULT_MR_AM_VMEM_MB); conf.setIfUnset(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, "" + amMemMB); int amCores = conf.getInt(MRJobConfig.MR_AM_CPU_VCORES, MRJobConfig.DEFAULT_MR_AM_CPU_VCORES); conf.setIfUnset(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, "" + amCores); conf.setIfUnset(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, "" + conf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS)); conf.setIfUnset(TezConfiguration.TEZ_AM_VIEW_ACLS, conf.get(MRJobConfig.JOB_ACL_VIEW_JOB, MRJobConfig.DEFAULT_JOB_ACL_VIEW_JOB)); conf.setIfUnset(TezConfiguration.TEZ_AM_MODIFY_ACLS, conf.get(MRJobConfig.JOB_ACL_MODIFY_JOB, MRJobConfig.DEFAULT_JOB_ACL_MODIFY_JOB)); if (conf.get(dep.getKey()) != null) {
protected void initConfig( Map<Object, Object> properties, TezConfiguration parentConfig ) { if( properties != null ) parentConfig = createConfig( properties, parentConfig ); if( parentConfig == null ) // this is ok, getJobConf will pass a default parent in return; flowConf = new TezConfiguration( parentConfig ); // prevent local values from being shared flowConf.set( "fs.http.impl", HttpFileSystem.class.getName() ); flowConf.set( "fs.https.impl", HttpFileSystem.class.getName() ); UserGroupInformation.setConfiguration( flowConf ); flowStagingPath = createStagingRoot(); }
/** * Creates and initializes a JobConf object that can be used to execute * the DAG. The configuration object will contain configurations from mapred-site * overlaid with key/value pairs from the conf object. Finally it will also * contain some hive specific configurations that do not change from DAG to DAG. * * @param hiveConf Current conf for the execution * @return JobConf base configuration for job execution * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); JobConf conf = new JobConf(new TezConfiguration(hiveConf)); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); conf.setBoolean("mapred.committer.job.task.cleanup.needed", false); conf.setClass("mapred.output.format.class", HiveOutputFormatImpl.class, OutputFormat.class); conf.set(MRJobConfig.OUTPUT_KEY_CLASS, HiveKey.class.getName()); conf.set(MRJobConfig.OUTPUT_VALUE_CLASS, BytesWritable.class.getName()); conf.set("mapred.partitioner.class", HiveConf.getVar(conf, HiveConf.ConfVars.HIVEPARTITIONER)); conf.set("tez.runtime.partitioner.class", MRPartitioner.class.getName()); // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); hiveConf.stripHiddenConfigurations(conf); return conf; }
@Override public TezConfiguration createInitializedConfig( FlowProcess<TezConfiguration> flowProcess, TezConfiguration parentConfig ) TezConfiguration stepConf = parentConfig == null ? new TezConfiguration() : new TezConfiguration( parentConfig ); stepConf.set( "cascading.version", versionString ); stepConf.set( CASCADING_FLOW_STEP_ID, getID() ); stepConf.set( "cascading.flow.step.num", Integer.toString( getOrdinal() ) ); String appJarPath = stepConf.get( AppProps.APP_JAR_PATH ); stepConf.set( TezConfiguration.TEZ_CLUSTER_ADDITIONAL_CLASSPATH_PREFIX, "$PWD/" + fileName + "/:$PWD/" + fileName + "/classes/:$PWD/" + fileName + "/lib/*:" );
@Override public TupleEntryCollector openTrapForWrite( Tap trap ) throws IOException { TezConfiguration jobConf = new TezConfiguration( getConfiguration() ); int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 ); int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 ); String partname = String.format( "-%05d-%05d-", stepNum, nodeNum ); jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname + "%05d" ); return trap.openForWrite( new Hadoop2TezFlowProcess( this, jobConf ), null ); // do not honor sinkmode as this may be opened across tasks }
tezConf = new TezConfiguration(getConf()); tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); tezConf.set("fs.defaultFS", "file:///"); tezConf.setBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB_DEFAULT)); capability.setVirtualCores( amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES_DEFAULT)); if (LOG.isDebugEnabled()) { String amLogLevelString = amConfig.getTezConfiguration().get( TezConfiguration.TEZ_AM_LOG_LEVEL, TezConfiguration.TEZ_AM_LOG_LEVEL_DEFAULT); tezConf = new TezConfiguration(amConfig.getTezConfiguration()); dag.getDagAccessControls().mergeIntoAmAcls(tezConf); String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); serviceData.put(auxiliaryService, amConfig.getTezConfiguration().getTrimmedStringCollection( TezConfiguration.TEZ_APPLICATION_TAGS); appContext.setCancelTokensWhenComplete(amConfig.getTezConfiguration().getBoolean( TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION, TezConfiguration.TEZ_CANCEL_DELEGATION_TOKENS_ON_COMPLETION_DEFAULT)); amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS_DEFAULT));
tezConf = new TezConfiguration(conf); } else { tezConf = new TezConfiguration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); UserGroupInformation.setConfiguration(tezConf); String stagingDirStr = tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + "BroadcastAndOneToOneExample" + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = fs.makeQualified(stagingDir);
private static void adjustAMConfig(TezConfiguration amConf, TezJobConfig tezJobConf) { String amLaunchOpts = amConf.get( TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT); int configuredAMMaxHeap = Utils.extractHeapSizeInMB(amLaunchOpts); int configuredAMResourceMB = amConf.getInt( TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB_DEFAULT); amConf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, requiredAMResourceMB); log.info("Increasing " + TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB + " from " amConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, amLaunchOpts + " -Xmx" + requiredAMMaxHeap + "M"); log.info("Increasing Tez AM Heap Size from " + ", max outputs = " + tezJobConf.getMaxOutputsinSingleVertex()); log.info("Value of " + TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS + " is now " + amConf.get(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS));
private TezConfiguration getDAGAMConfFromMRConf() { TezConfiguration finalConf = new TezConfiguration(this.tezConf); Map<String, String> mrParamToDAGParamMap = DeprecatedKeys .getMRToDAGParamMap(); for (Entry<String, String> entry : mrParamToDAGParamMap.entrySet()) { if (finalConf.get(entry.getKey()) != null) { finalConf.set(entry.getValue(), finalConf.get(entry.getKey())); finalConf.unset(entry.getKey()); if (LOG.isDebugEnabled()) { LOG.debug("MR->DAG Translating MR key: " + entry.getKey() + " to Tez key: " + entry.getValue() + " with value " + finalConf.get(entry.getValue())); } } } return finalConf; }
private UserPayload createIntermediatePayloadOutput( TezConfiguration config, EdgeValues edgeValues ) { config.set( TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, edgeValues.keyClassName ); config.set( TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, edgeValues.valueClassName ); config.set( TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, edgeValues.keyComparatorClassName ); config.set( TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, edgeValues.keyPartitionerClassName ); setWorkingDirectory( config ); return getPayload( config ); }
@Override public void initialize() throws Exception { configuration = new TezConfiguration( TezUtils.createConfFromUserPayload( getContext().getUserPayload() ) ); TezUtil.setMRProperties( getContext(), configuration, true ); try { HadoopUtil.initLog4j( configuration ); LOG.info( "cascading version: {}", configuration.get( "cascading.version", "" ) ); currentProcess = new Hadoop2TezFlowProcess( new FlowSession(), getContext(), configuration ); flowNode = deserializeBase64( configuration.getRaw( FlowNode.CASCADING_FLOW_NODE ), configuration, BaseFlowNode.class ); LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() ); logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" ); } catch( Throwable throwable ) { if( throwable instanceof CascadingException ) throw (CascadingException) throwable; throw new FlowException( "internal error during processor configuration", throwable ); } }
JobConf jobConf = new JobConf(new TezConfiguration(conf)); dagAMConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, javaOpts.toString()); if (envStr.length() > 0) { dagAMConf.set(TezConfiguration.TEZ_AM_LAUNCH_ENV, envStr); if (LOG.isDebugEnabled()) { LOG.debug("Setting MR AM env to : " + envStr); dagAMConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, jobSubmitDir); dagAMConf.set(TezConfiguration.TEZ_QUEUE_NAME, queueName); dagAMConf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, amMemMB); dagAMConf.setInt(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, amCores); dagAMConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, jobConf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
builder.setMinFraction(conf.getFloat( CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_SLOW_START_MIN_FRACTION, CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_SLOW_START_MIN_FRACTION_DEFAULT)); builder.setMaxFraction(conf.getFloat( CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_SLOW_START_MAX_FRACTION, CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_SLOW_START_MAX_FRACTION_DEFAULT)); builder.setMaxParallelism(conf.getInt( CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MAX_PARALLELISM, CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MAX_PARALLELISM_DEFAULT)); builder.setMinOpsPerWorker(conf.getLong( CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MIN_OPS_PER_WORKER, CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MIN_OPS_PER_WORKER_DEFAULT)); builder.setEnableGrouping(conf.getBoolean( CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING, CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING_DEFAULT)); if (conf.get(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_GROUPING_FRACTION) != null) { builder.setGroupingFraction(Float.parseFloat( conf.get(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_GROUPING_FRACTION))); Preconditions.checkArgument(0 < builder.getGroupingFraction() && builder.getGroupingFraction() <= 1, "grouping fraction should be larger than 0 and less" + " or equal to 1, current value: " + builder.getGroupingFraction()); if (conf.get(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS) != null) { builder.setNumPartitionsForFairCase(Integer.parseInt( conf.get(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS))); Preconditions.checkArgument(builder.getNumPartitionsForFairCase() > 0, "Number of partitions for fair cartesian product should be positive integer");
public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, JobID jobId) { this.conf = new TezConfiguration(conf); // Cloning for modifying. // For faster redirects from AM to HS. this.conf.setInt( CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES)); }
protected void internalNonBlockingStart() throws IOException { try { if( !isTimelineServiceEnabled( jobConfiguration ) ) flowStep.logWarn( "'" + YarnConfiguration.TIMELINE_SERVICE_ENABLED + "' is disabled, please enable to capture detailed metrics of completed flows, this may require starting the YARN timeline server daemon" ); TezConfiguration workingConf = new TezConfiguration( jobConfiguration ); // this could be problematic flowStep.logInfo( "tez session mode enabled: " + workingConf.getBoolean( TezConfiguration.TEZ_AM_SESSION_MODE, TezConfiguration.TEZ_AM_SESSION_MODE_DEFAULT ) ); prepareEnsureStagingDir( workingConf ); tezClient = TezClient.create( flowStep.getName(), workingConf, ( (Hadoop2TezFlowStep) flowStep ).getAllLocalResources(), null ); tezClient.start(); dagClient = tezClient.submitDAG( dag ); dagId = Util.returnInstanceFieldIfExistsSafe( dagClient, "dagId" ); flowStep.logInfo( "submitted tez dag to app master: {}, with dag id: {}", tezClient.getAppMasterApplicationId(), dagId ); } catch( TezException exception ) { this.throwable = exception; throw new CascadingException( exception ); } }
@Private protected TezClient(String name, TezConfiguration tezConf, boolean isSession, @Nullable Map<String, LocalResource> localResources, @Nullable Credentials credentials, ServicePluginsDescriptor servicePluginsDescriptor) { this.clientName = name; this.isSession = isSession; // Set in conf for local mode AM to figure out whether in session mode or not tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, isSession); try { InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { tezConf.set(TezConfigurationConstants.TEZ_SUBMIT_HOST, ip.getCanonicalHostName()); tezConf.set(TezConfigurationConstants.TEZ_SUBMIT_HOST_ADDRESS, ip.getHostAddress()); } } catch (UnknownHostException e) { LOG.warn("The host name of the client the tez application was submitted from was unable to be retrieved", e); } this.amConfig = new AMConfiguration(tezConf, localResources, credentials); this.apiVersionInfo = new TezApiVersionInfo(); this.servicePluginsDescriptor = servicePluginsDescriptor; this.maxSubmitDAGRequestSizeThroughIPC = tezConf.getInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT) - tezConf.getInt(TezConfiguration.TEZ_IPC_PAYLOAD_RESERVED_BYTES, TezConfiguration.TEZ_IPC_PAYLOAD_RESERVED_BYTES_DEFAULT); Limits.setConfiguration(tezConf); LOG.info("Tez Client Version: " + apiVersionInfo.toString()); }
frameworkClient.start(); if (this.amConfig.getTezConfiguration().getBoolean( TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED, TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED_DEFAULT)) { String javaOptsCheckerClassName = this.amConfig.getTezConfiguration().get( TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_CLASS, ""); if (!javaOptsCheckerClassName.isEmpty()) { clientTimeout = amConfig.getTezConfiguration().getInt( TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS, TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS_DEFAULT); if (!amConfig.getTezConfiguration().getBoolean( TezConfiguration.DAG_RECOVERY_ENABLED, TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT)) { amClientKeepAliveTimeoutIntervalMillis, 10); boolean isLocal = amConfig.getTezConfiguration().getBoolean( TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT); if (!isLocal && amClientKeepAliveTimeoutIntervalMillis > 0) {
static int getMaxConcurrentSteps( TezConfiguration jobConf ) { return jobConf.getInt( MAX_CONCURRENT_STEPS, 0 ); }