org.apache.beam.runners.dataflow.DataflowPipelineTranslator$JobSpecification java code examples

byte[] serializedProtoPipeline = jobSpecification.getPipelineProto().toByteArray();
DataflowPackage stagedPipeline =
  options.getStager().stageToFile(serializedProtoPipeline, PIPELINE_FILE_NAME);
Job newJob = jobSpecification.getJob();
try {
 newJob
    jobResult.getId(),
    options,
    jobSpecification.getStepNames());

/** Translates a {@link Pipeline} into a {@code JobSpecification}. */
public JobSpecification translate(
  Pipeline pipeline, DataflowRunner runner, List<DataflowPackage> packages) {
 // Capture the sdkComponents for look up during step translations
 SdkComponents sdkComponents = SdkComponents.create();
 sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
 RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
 LOG.debug("Portable pipeline proto:\n{}", TextFormat.printToString(pipelineProto));
 Translator translator = new Translator(pipeline, runner, sdkComponents);
 Job result = translator.translate(packages);
 return new JobSpecification(
   result, pipelineProto, Collections.unmodifiableMap(translator.stepNames));
}

Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);

Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();

Job job = result.getJob();
Components componentsProto = result.getPipelineProto().getComponents();
RehydratedComponents components = RehydratedComponents.forComponents(componentsProto);
RunnerApi.PTransform spkTransform = componentsProto.getTransformsOrThrow(fn);

Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();

Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();

/** This tests a few corner cases that should not crash. */
@Test
public void testGoodWildcards() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 Pipeline pipeline = Pipeline.create(options);
 DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
 applyRead(pipeline, "gs://bucket/foo");
 applyRead(pipeline, "gs://bucket/foo/");
 applyRead(pipeline, "gs://bucket/foo/*");
 applyRead(pipeline, "gs://bucket/foo/?");
 applyRead(pipeline, "gs://bucket/foo/[0-9]");
 applyRead(pipeline, "gs://bucket/foo/*baz*");
 applyRead(pipeline, "gs://bucket/foo/*baz?");
 applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
 applyRead(pipeline, "gs://bucket/foo/baz/*");
 applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
 applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
 applyRead(pipeline, "gs://bucket/foo*/baz");
 applyRead(pipeline, "gs://bucket/foo?/baz");
 applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
 // Check that translation doesn't fail.
 JobSpecification jobSpecification =
   t.translate(pipeline, DataflowRunner.fromOptions(options), Collections.emptyList());
 assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}

@Test
public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
 final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null;
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setMaxNumWorkers(42);
 options.setAutoscalingAlgorithm(noScaling);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertNull(
   job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
 assertEquals(
   42,
   job.getEnvironment()
     .getWorkerPools()
     .get(0)
     .getAutoscalingSettings()
     .getMaxNumWorkers()
     .intValue());
}

/**
 * Test that in translation the name for a collection (in this case just a Create output) is
 * overridden to be what the Dataflow service expects.
 */
@Test
public void testNamesOverridden() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 options.setStreaming(false);
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 // The Create step
 Step step = job.getSteps().get(0);
 // This is the name that is "set by the user" that the Dataflow translator must override
 String userSpecifiedName =
   getString(
     Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0),
     PropertyNames.USER_NAME);
 // This is the calculated name that must actually be used
 String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
 assertThat(userSpecifiedName, equalTo(calculatedName));
}

@Test
public void testToSingletonTranslationWithIsmSideInput() throws Exception {
 // A "change detector" test that makes sure the translation
 // of getting a PCollectionView<T> does not change
 // in bad ways during refactor
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply(Create.of(1)).apply(View.asSingleton());
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 assertAllStepOutputsHaveUniqueIds(job);
 List<Step> steps = job.getSteps();
 assertEquals(9, steps.size());
 @SuppressWarnings("unchecked")
 List<Map<String, Object>> toIsmRecordOutputs =
   (List<Map<String, Object>>) steps.get(7).getProperties().get(PropertyNames.OUTPUT_INFO);
 assertTrue(
   Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
 Step collectionToSingletonStep = steps.get(8);
 assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}

@Test
public void testScalingAlgorithmNone() throws IOException {
 final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling =
   DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.NONE;
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setAutoscalingAlgorithm(noScaling);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertEquals(
   "AUTOSCALING_ALGORITHM_NONE",
   job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
 assertEquals(
   0,
   job.getEnvironment()
     .getWorkerPools()
     .get(0)
     .getAutoscalingSettings()
     .getMaxNumWorkers()
     .intValue());
}

@Test
public void testScalingAlgorithmMissing() throws IOException {
 DataflowPipelineOptions options = buildPipelineOptions();
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 // Autoscaling settings are always set.
 assertNull(
   job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
 assertEquals(
   0,
   job.getEnvironment()
     .getWorkerPools()
     .get(0)
     .getAutoscalingSettings()
     .getMaxNumWorkers()
     .intValue());
}

@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
 // A "change detector" test that makes sure the translation
 // of getting a PCollectionView<Iterable<T>> does not change
 // in bad ways during refactor
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable());
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 assertAllStepOutputsHaveUniqueIds(job);
 List<Step> steps = job.getSteps();
 assertEquals(3, steps.size());
 @SuppressWarnings("unchecked")
 List<Map<String, Object>> toIsmRecordOutputs =
   (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO);
 assertTrue(
   Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
 Step collectionToSingletonStep = steps.get(2);
 assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}

@Test
public void testMultiGraphPipelineSerialization() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 Pipeline p = Pipeline.create(options);
 PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));
 input.apply(new UnrelatedOutputCreator());
 input.apply(new UnboundOutputCreator());
 DataflowPipelineTranslator t =
   DataflowPipelineTranslator.fromOptions(
     PipelineOptionsFactory.as(DataflowPipelineOptions.class));
 // Check that translation doesn't fail.
 JobSpecification jobSpecification =
   t.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList());
 assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}

@Test
public void testWorkerMachineTypeConfig() throws IOException {
 final String testMachineType = "test-machine-type";
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setWorkerMachineType(testMachineType);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
 assertEquals(testMachineType, workerPool.getMachineType());
}

@Test
public void testNetworkConfig() throws IOException {
 final String testNetwork = "test-network";
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setNetwork(testNetwork);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertEquals(testNetwork, job.getEnvironment().getWorkerPools().get(0).getNetwork());
}

@Test
public void testDiskSizeGbConfig() throws IOException {
 final Integer diskSizeGb = 1234;
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setDiskSizeGb(diskSizeGb);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}

@Test
public void testZoneConfig() throws IOException {
 final String testZone = "test-zone-1";
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setZone(testZone);
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertEquals(testZone, job.getEnvironment().getWorkerPools().get(0).getZone());
}

@Test
public void testNetworkConfigMissing() throws IOException {
 DataflowPipelineOptions options = buildPipelineOptions();
 Pipeline p = buildPipeline(options);
 p.traverseTopologically(new RecordingPipelineVisitor());
 Job job =
   DataflowPipelineTranslator.fromOptions(options)
     .translate(p, DataflowRunner.fromOptions(options), Collections.emptyList())
     .getJob();
 assertEquals(1, job.getEnvironment().getWorkerPools().size());
 assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
}

Javadoc

The result of a job translation.

Used to pass the result Job and any state that was used to construct the job that may be of use to other classes (eg the PTransform to StepName mapping).

Most used methods

getJob
getPipelineProto
<init>
getStepNames
Returns the mapping of AppliedPTransform to the internal step name for that AppliedPTransform.

Popular in Java

Running tasks concurrently on multiple threads
getSharedPreferences (Context)
getExternalFilesDir (Context)
getResourceAsStream (ClassLoader)
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Top Sublime Text plugins

How to useDataflowPipelineTranslator$JobSpecification in org.apache.beam.runners.dataflow

Best Java code snippets using org.apache.beam.runners.dataflow.DataflowPipelineTranslator$JobSpecification (Showing top 20 results out of 315)

How to use
DataflowPipelineTranslator$JobSpecification
in
org.apache.beam.runners.dataflow