@ProcessElement public void process(ProcessContext c) { c.output( String.format( "beam_load_%s_%s", c.getPipelineOptions().getJobName().replaceAll("-", ""), BigQueryHelpers.randomUUIDString())); } }))
private TableManifest buildGcsManifest(ProcessContext c, Iterable<GcsPath> files) { org.apache.beam.sdk.util.GcsUtil gcsUtil = c.getPipelineOptions().as(GcsOptions.class).getGcsUtil(); TableManifest.Builder result = TableManifest.newBuilder(); List<GcsPath> gcsPaths = new ArrayList<>(); files.forEach(gcsPaths::add); // Fetch object metadata from GCS List<String> checksums = FileChecksum.getGcsFileChecksums(gcsUtil, gcsPaths); for (int i = 0; i < gcsPaths.size(); i++) { String fileName = gcsPaths.get(i).getFileName().getObject(); String hash = checksums.get(i); result.addFilesBuilder().setName(fileName).setMd5(hash); } return result.build(); } }
@ProcessElement public void processElement(ProcessContext c) throws Exception { String jobUuid = c.element(); BigQuerySourceBase<T> source = createSource(jobUuid, coder); BigQueryOptions options = c.getPipelineOptions().as(BigQueryOptions.class); ExtractResult res = source.extractFiles(options); LOG.info("Extract job produced {} files", res.extractedFiles.size()); source.cleanupTempResource(options); for (ResourceId file : res.extractedFiles) { c.output(file.toString()); } c.output(tableSchemaTag, BigQueryHelpers.toJsonString(res.schema)); } })
@ProcessElement public void processElement(DoFn<String, KV<Integer, StreamReadsRequest>>.ProcessContext c) throws Exception { ShardOptions options = c.getPipelineOptions().as(ShardOptions.class); String readGroupSetId = c.element(); List<StreamReadsRequest> requests = null; if (options.isAllReferences()) { requests = ShardUtils.getReadRequests(readGroupSetId, sexChromosomeFilter, options.getBasesPerShard(), auth); } else { requests = ShardUtils.getReadRequests(Collections.singletonList(readGroupSetId), options.getReferences(), options.getBasesPerShard()); } for(StreamReadsRequest request : requests) { c.output(KV.of(request.hashCode(), request)); } } }
@ProcessElement public void getTempFilePrefix(ProcessContext c) { String tempLocationRoot; if (customGcsTempLocation != null) { tempLocationRoot = customGcsTempLocation.get(); } else { tempLocationRoot = c.getPipelineOptions().getTempLocation(); } String tempLocation = resolveTempLocation( tempLocationRoot, "BigQueryWriteTemp", c.sideInput(jobIdView)); LOG.info( "Writing BigQuery temporary files to {} before loading them.", tempLocation); c.output(tempLocation); } })
@ProcessElement public void process(ProcessContext c) throws IOException { ReadableFile file = c.element().getKey(); OffsetRange range = c.element().getValue(); FileBasedSource<T> source = CompressedSource.from(createSource.apply(file.getMetadata().resourceId().toString())) .withCompression(file.getCompression()); try (BoundedSource.BoundedReader<T> reader = source .createForSubrangeOfFile(file.getMetadata(), range.getFrom(), range.getTo()) .createReader(c.getPipelineOptions())) { for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } } }
public PipelineOptions getPipelineOptions() { return context.getPipelineOptions(); }
@Override public PipelineOptions pipelineOptions() { return outerContext.getPipelineOptions(); }
@ProcessElement public void processElement(ProcessContext c) { KV ans = KV.of(c.element().getKey(), (double) c.element().getValue() / (double) c.getPipelineOptions().as(Options.class).getBucketWidth()); c.output(ans); } }
@ProcessElement public void processElement(ProcessContext context) { Options options = context.getPipelineOptions().as(Options.class); int binSize = options.getBinSize(); Variant variant = context.element(); long startBin = getStartBin(binSize, variant); long endBin = getEndBin(binSize, variant); for (long bin = startBin; bin <= endBin; bin++) { context.output(KV.of(KV.of(variant.getReferenceName(), bin * binSize), variant)); } } }
@ProcessElement public void processElement(ProcessContext c) throws Exception { Assert.assertNotNull(c.getPipelineOptions()); Assert.assertEquals( options.getTestOption(), c.getPipelineOptions().as(MyOptions.class).getTestOption()); } }
@ProcessElement public void readSoruce(ProcessContext ctxt) throws IOException { BoundedSource.BoundedReader<T> reader = ctxt.element().createReader(ctxt.getPipelineOptions()); for (boolean more = reader.start(); more; more = reader.advance()) { ctxt.outputWithTimestamp(reader.getCurrent(), reader.getCurrentTimestamp()); } } }
@ProcessElement public void processElement(ProcessContext c) { KV ans = KV.of(c.element().getKey(), (double) c.element().getValue() / (double) c.getPipelineOptions().as(Options.class).getBucketWidth()); c.output(ans); } }
@ProcessElement public void processElement(DoFn<String, String>.ProcessContext c) throws Exception { final String result = combineShards( c.getPipelineOptions().as(Options.class), c.element(), c.sideInput(shards), c.sideInput(eofContents)); c.output(result); }
@ProcessElement public void processElement(ProcessContext c) throws Exception { Assert.assertNotNull(c.getPipelineOptions()); Assert.assertEquals( options.getTestOption(), c.getPipelineOptions().as(MyOptions.class).getTestOption()); } }
@ProcessElement public void processElement(DoFn<String, String>.ProcessContext c) throws Exception { final String result = combineShards( c.getPipelineOptions().as(Options.class), c.element(), c.sideInput(shards), c.sideInput(eofContents)); c.output(result); }
@ProcessElement public void processElement(ProcessContext context) { Options options = context.getPipelineOptions().as(Options.class); int binSize = options.getBinSize(); Variant variant = context.element(); long startBin = getStartBin(binSize, variant); long endBin = getEndBin(binSize, variant); for (long bin = startBin; bin <= endBin; bin++) { context.output(KV.of(KV.of(variant.getReferenceName(), bin * binSize), variant)); } } }
@Override public PipelineOptions getPipelineOptions() { return outerContext.getPipelineOptions(); }
@ProcessElement public void splitSource(ProcessContext ctxt) throws Exception { for (BoundedSource<T> split : source.split(bundleSize, ctxt.getPipelineOptions())) { ctxt.output(split); } } }
@ProcessElement public void processElement(ProcessContext c) throws Exception { Assert.assertNotNull(c.getPipelineOptions()); Assert.assertEquals( options.getTestOption(), c.getPipelineOptions().as(MyOptions.class).getTestOption()); } }