StepConfig enableDebugging = new StepConfig().withName("Enable Debugging").withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW).withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action).withHadoopJarStep(sparkStepConf); steps.add(sparkStep);
public StepConfig unmarshall(JsonUnmarshallerContext context) throws Exception { StepConfig stepConfig = new StepConfig();
private StepConfig configureHiveStep( String stagingS3qUrl, String cmdLineArgs ) { String[] cmdLineArgsArr; if ( cmdLineArgs == null ) { cmdLineArgsArr = new String[] { "" }; } else { List<String> cmdArgs = Arrays.asList( cmdLineArgs.split( "\\s+" ) ); List<String> updatedCmdArgs = cmdArgs.stream().map( e -> replaceDoubleS3( e ) ).collect( Collectors.toList() ); cmdLineArgsArr = updatedCmdArgs.toArray( new String[ updatedCmdArgs.size() ] ); } StepConfig hiveStepConfig = new StepConfig( "Hive", new StepFactory().newRunHiveScriptStep( stagingS3qUrl, cmdLineArgsArr ) ); if ( alive ) { hiveStepConfig.withActionOnFailure( ActionOnFailure.CANCEL_AND_WAIT ); } else { hiveStepConfig.withActionOnFailure( ActionOnFailure.TERMINATE_JOB_FLOW ); } return hiveStepConfig; }
StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); StepConfig installHive = new StepConfig() .withName("Install Hive") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newInstallHiveStep()); StepConfig runScript = new StepConfig() .withName("Run Script") .withActionOnFailure("TERMINATE_JOB_FLOW")
private StepConfig stepConfig(String defaultName, String tag, Config step) { String name = step.get("name", String.class, defaultName); return new StepConfig() .withName(name + " (" + tag + ")") // TERMINATE_JOB_FLOW | TERMINATE_CLUSTER | CANCEL_AND_WAIT | CONTINUE .withActionOnFailure(step.get("action_on_failure", String.class, defaultActionOnFailure)); } }
@Override public void runJob(MavenManagedJar defaultJar, List<String> jarArgs) throws Exception { String jarLocation=defaultJar.s3JarLocation(awsSoftwareBucket); count++; String stepName = UUID.randomUUID().toString(); StepConfig step = new StepConfig(stepName,new HadoopJarStepConfig(jarLocation).withArgs(jarArgs)); emrClient.addJobFlowSteps(new AddJobFlowStepsRequest() .withJobFlowId(runningCluster) .withSteps(step)); Thread.sleep(5000); // Enough time to transition out of WAITING? pollClusterForCompletion(runningCluster, stepName); }
List<StepConfig> createEmrSteps( List<FlowStep> innerSteps, List<String> flowArgs, String jarLocation, Map<String,Object> upperScopeVariables ) { List<StepConfig> steps= newArrayList(); Map<String,Object> local= newHashMap(upperScopeVariables); for(FlowStep that:innerSteps) if(that instanceof JobStep) { JobStep j=(JobStep) that; steps.add(new StepConfig( "main" ,new HadoopJarStepConfig(jarLocation) .withArgs(j.getStepArgs(local,flowArgs))) ); } else if(that instanceof AssignmentStep) { AssignmentStep ass=(AssignmentStep) that; local = ass.process(local, flowArgs); } else if(that instanceof ForeachStep) { ForeachStep step=(ForeachStep) that; for(Object v:step.getValues()) { local.put(step.getLoopVar(),v); steps.addAll(createEmrSteps(step.getFlowSteps(),flowArgs,jarLocation,local)); } } else{ throw new RuntimeException("Could not process step of type "+that.getClass()); } return steps; }
private StepConfig initHadoopStep( String jarUrl, String mainClass, List<String> jarStepArgs ) { StepConfig stepConfig = new StepConfig(); stepConfig.setName( "custom jar: " + jarUrl ); stepConfig.setHadoopJarStep( configureHadoopStep( jarUrl, mainClass, jarStepArgs ) ); if ( this.alive ) { stepConfig.withActionOnFailure( ActionOnFailure.CANCEL_AND_WAIT ); } else { stepConfig.withActionOnFailure( ActionOnFailure.TERMINATE_JOB_FLOW ); } return stepConfig; }
return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)); return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass) .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)); return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass) .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
@Override public StepConfig getEmrStepConfig(Object step) { EmrShellStep emrShellStep = (EmrShellStep) step; // Hadoop Jar provided by Amazon for running Shell Scripts String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR); // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // Add the script location List<String> argsList = new ArrayList<>(); argsList.add(emrShellStep.getScriptLocation().trim()); // Add the script arguments if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) { for (String argument : emrShellStep.getScriptArguments()) { argsList.add(argument.trim()); } } // Return the StepConfig object HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList); return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(jarConfig); }
@Override public StepConfig getEmrStepConfig(Object step) { EmrShellStep emrShellStep = (EmrShellStep) step; // Hadoop Jar provided by Amazon for running Shell Scripts String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR); // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // Add the script location List<String> argsList = new ArrayList<>(); argsList.add(emrShellStep.getScriptLocation().trim()); // Add the script arguments if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) { for (String argument : emrShellStep.getScriptArguments()) { argsList.add(argument.trim()); } } // Return the StepConfig object HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList); return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(jarConfig); }
return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim())); return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()])));
return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim())); return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()])));
@Override public StepConfig getEmrStepConfig(Object step) { EmrPigStep pigStep = (EmrPigStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim())); } // If there are arguments specified else { return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(new StepFactory() .newRunPigScriptStep(pigStep.getScriptLocation().trim(), pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()]))); } }
@Override public StepConfig getEmrStepConfig(Object step) { EmrPigStep pigStep = (EmrPigStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim())); } // If there are arguments specified else { return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(new StepFactory() .newRunPigScriptStep(pigStep.getScriptLocation().trim(), pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()]))); } }
new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion())); appSteps.add(installHive); new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion())); appSteps.add(installPig);
StepConfig emrStepConfig = new StepConfig();
@Override public void runJob(MavenManagedJar defaultJar,List<String> jarArgs) throws Exception { String jarLocation=defaultJar.s3JarLocation(awsSoftwareBucket); List<String> appArgs=newArrayList(skip(jarArgs,2)); if(!validateJarArgs(appArgs)) { throw new Exception("Arguments to JAR were not valid"); } StepConfig[] steps = { debugStep, new StepConfig("main",new HadoopJarStepConfig(jarLocation).withArgs(jarArgs)) }; String jobName = computeJobName(jarArgs); RunJobFlowRequest that=new RunJobFlowRequest() .withName(jobName) .withBootstrapActions(bootstrapActions()) .withSteps(steps) .withLogUri(awsLogUri) .withInstances(instances); RunJobFlowResult result = runJob(that); pollClusterForCompletion(result); fetchLogs.run(new String[] {result.getJobFlowId()}); }
public StepConfig unmarshall(JsonUnmarshallerContext context) throws Exception { StepConfig stepConfig = new StepConfig();