StepConfig enableDebugging = new StepConfig().withName("Enable Debugging").withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW).withHadoopJarStep(stepFactory.newEnableDebuggingStep()); steps.add(enableDebugging); StepConfig sparkStep = new StepConfig().withName("Spark Step").withActionOnFailure(action).withHadoopJarStep(sparkStepConf); steps.add(sparkStep);
@Override public int hashCode() { final int prime = 31; int hashCode = 1; hashCode = prime * hashCode + ((getName() == null) ? 0 : getName().hashCode()); hashCode = prime * hashCode + ((getActionOnFailure() == null) ? 0 : getActionOnFailure().hashCode()); hashCode = prime * hashCode + ((getHadoopJarStep() == null) ? 0 : getHadoopJarStep().hashCode()); return hashCode; }
public StepConfig unmarshall(JsonUnmarshallerContext context) throws Exception { StepConfig stepConfig = new StepConfig(); if (context.testExpression("Name", targetDepth)) { context.nextToken(); stepConfig.setName(context.getUnmarshaller(String.class).unmarshall(context)); stepConfig.setActionOnFailure(context.getUnmarshaller(String.class).unmarshall(context)); stepConfig.setHadoopJarStep(HadoopJarStepConfigJsonUnmarshaller.getInstance().unmarshall(context));
/** * Constructs a new StepConfig object. Callers should use the setter or fluent setter (with...) methods to * initialize any additional object members. * * @param name * The name of the step. * @param hadoopJarStep * The JAR file used for the step. */ public StepConfig(String name, HadoopJarStepConfig hadoopJarStep) { setName(name); setHadoopJarStep(hadoopJarStep); }
private StepConfig initHadoopStep( String jarUrl, String mainClass, List<String> jarStepArgs ) { StepConfig stepConfig = new StepConfig(); stepConfig.setName( "custom jar: " + jarUrl ); stepConfig.setHadoopJarStep( configureHadoopStep( jarUrl, mainClass, jarStepArgs ) ); if ( this.alive ) { stepConfig.withActionOnFailure( ActionOnFailure.CANCEL_AND_WAIT ); } else { stepConfig.withActionOnFailure( ActionOnFailure.TERMINATE_JOB_FLOW ); } return stepConfig; }
private StepConfig stepConfig(String defaultName, String tag, Config step) { String name = step.get("name", String.class, defaultName); return new StepConfig() .withName(name + " (" + tag + ")") // TERMINATE_JOB_FLOW | TERMINATE_CLUSTER | CANCEL_AND_WAIT | CONTINUE .withActionOnFailure(step.get("action_on_failure", String.class, defaultActionOnFailure)); } }
StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); StepConfig installHive = new StepConfig() .withName("Install Hive") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newInstallHiveStep()); StepConfig runScript = new StepConfig() .withName("Run Script") .withActionOnFailure("TERMINATE_JOB_FLOW")
private StepConfig configureHiveStep( String stagingS3qUrl, String cmdLineArgs ) { String[] cmdLineArgsArr; if ( cmdLineArgs == null ) { cmdLineArgsArr = new String[] { "" }; } else { List<String> cmdArgs = Arrays.asList( cmdLineArgs.split( "\\s+" ) ); List<String> updatedCmdArgs = cmdArgs.stream().map( e -> replaceDoubleS3( e ) ).collect( Collectors.toList() ); cmdLineArgsArr = updatedCmdArgs.toArray( new String[ updatedCmdArgs.size() ] ); } StepConfig hiveStepConfig = new StepConfig( "Hive", new StepFactory().newRunHiveScriptStep( stagingS3qUrl, cmdLineArgsArr ) ); if ( alive ) { hiveStepConfig.withActionOnFailure( ActionOnFailure.CANCEL_AND_WAIT ); } else { hiveStepConfig.withActionOnFailure( ActionOnFailure.TERMINATE_JOB_FLOW ); } return hiveStepConfig; }
@Test public void testEmrHadoopJarStepConfigWithArguments() throws Exception { List<String> arguments = new ArrayList<>(); arguments.add("arg1"); StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, arguments, false); assertNotNull("step not retuned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); assertNotNull("arguments not found", stepConfig.getHadoopJarStep().getArgs()); }
/** * <p> * The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, and * CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using TERMINATE_CLUSTER * instead. * </p> * * @param actionOnFailure * The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, * and CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using * TERMINATE_CLUSTER instead. * @see ActionOnFailure */ public void setActionOnFailure(ActionOnFailure actionOnFailure) { withActionOnFailure(actionOnFailure); }
@Override public boolean apply(@Nullable StepDetail input) { return input.getStepConfig().getName().equals(stepName); } }),null);
private void addStep(String name, CommandRunnerConfiguration configuration) throws IOException { FileReference configurationFileReference = ImmutableFileReference.builder() .type(FileReference.Type.DIRECT) .contents(objectMapper.writeValueAsBytes(configuration)) .filename("config.json") .build(); RemoteFile remoteConfigurationFile = prepareRemoteFile(configurationFileReference, false); StepConfig runStep = stepConfig(name, tag, step) .withHadoopJarStep(stepFactory().newScriptRunnerStep(runner.s3Uri().toString(), remoteConfigurationFile.s3Uri().toString())); configs.add(runStep); }
/** * <p> * The JAR file used for the step. * </p> * * @param hadoopJarStep * The JAR file used for the step. * @return Returns a reference to this object so that method calls can be chained together. */ public StepConfig withHadoopJarStep(HadoopJarStepConfig hadoopJarStep) { setHadoopJarStep(hadoopJarStep); return this; }
@Override public void runJob(MavenManagedJar defaultJar, List<String> jarArgs) throws Exception { String jarLocation=defaultJar.s3JarLocation(awsSoftwareBucket); count++; String stepName = UUID.randomUUID().toString(); StepConfig step = new StepConfig(stepName,new HadoopJarStepConfig(jarLocation).withArgs(jarArgs)); emrClient.addJobFlowSteps(new AddJobFlowStepsRequest() .withJobFlowId(runningCluster) .withSteps(step)); Thread.sleep(5000); // Enough time to transition out of WAITING? pollClusterForCompletion(runningCluster, stepName); }
@Test public void testEmrHadoopJarStepConfig() throws Exception { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, null, false); assertNotNull("step not retuned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); }
/** * <p> * The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, and * CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using TERMINATE_CLUSTER * instead. * </p> * * @param actionOnFailure * The action to take when the cluster step fails. Possible values are TERMINATE_CLUSTER, CANCEL_AND_WAIT, * and CONTINUE. TERMINATE_JOB_FLOW is provided for backward compatibility. We recommend using * TERMINATE_CLUSTER instead. * @see ActionOnFailure */ public void setActionOnFailure(ActionOnFailure actionOnFailure) { withActionOnFailure(actionOnFailure); }
private Submitter existingClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, String clusterId, Filer filer) { return () -> { List<String> stepIds = pollingRetryExecutor(state, "submission") .retryUnless(AmazonServiceException.class, Aws::isDeterministicException) .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5))) .runOnce(new TypeReference<List<String>>() {}, s -> { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); // Stage files to S3 filer.stageFiles(); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest() .withJobFlowId(clusterId) .withSteps(stepCompiler.stepConfigs()); int steps = request.getSteps().size(); logger.info("Submitting {} EMR step(s) to {}", steps, clusterId); AddJobFlowStepsResult result = emr.addJobFlowSteps(request); logSubmittedSteps(clusterId, steps, i -> request.getSteps().get(i).getName(), i -> result.getStepIds().get(i)); return ImmutableList.copyOf(result.getStepIds()); }); return SubmissionResult.ofExistingCluster(clusterId, stepIds); }; }
/** * Constructs a new StepConfig object. Callers should use the setter or fluent setter (with...) methods to * initialize any additional object members. * * @param name * The name of the step. * @param hadoopJarStep * The JAR file used for the step. */ public StepConfig(String name, HadoopJarStepConfig hadoopJarStep) { setName(name); setHadoopJarStep(hadoopJarStep); }
/** * <p> * The JAR file used for the step. * </p> * * @param hadoopJarStep * The JAR file used for the step. * @return Returns a reference to this object so that method calls can be chained together. */ public StepConfig withHadoopJarStep(HadoopJarStepConfig hadoopJarStep) { setHadoopJarStep(hadoopJarStep); return this; }
return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)); return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass) .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));