@Override public Iterator<E> iterator() { if (materialized == null) { this.result = pipeline.run(); if (result.succeeded() || !pipeline.getConfiguration().getBoolean("crunch.empty.materialize.on.failure", false)) { materialize(); } else { LOG.error("Pipeline run failed, returning empty iterator"); return Iterators.emptyIterator(); } } return materialized.iterator(); }
private boolean done(Pipeline job, boolean isVerbose) { if (isVerbose) { job.enableDebug(); job.getConfiguration().setBoolean("crunch.log.job.progress", true); // see class RuntimeParameters } String name = job.getName(); LOG.debug("Running pipeline: " + name); pipelineResult = job.done(); boolean success = pipelineResult.succeeded(); if (success) { LOG.info("Succeeded with pipeline: " + name + " " + getJobInfo(pipelineResult, isVerbose)); } else { LOG.error("Pipeline failed: " + name + " " + getJobInfo(pipelineResult, isVerbose)); } return success; }
return result.succeeded() ? 0 : 1;
if (result.succeeded()) { console.info("Added {} records to \"{}\"", task.getCount(), datasets.get(1));
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println(); System.err.println("Two and only two arguments are accepted."); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Aggregator used for summing up response size Aggregator<Long> agg = Aggregators.SUM_LONGS(); // Table of (ip, sum(response size)) PTable<String, Long> ipAddrResponseSize = lines .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs())).groupByKey() .combineValues(agg); pipeline.writeTextFile(ipAddrResponseSize, args[1]); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
public int run(String[] args) throws Exception { if (args.length != 1) { System.err.println(); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(TotalWordCount.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Define a function that splits each line in a PCollection of Strings into // a // PCollection made up of the individual words in the file. PCollection<Long> numberOfWords = lines.parallelDo(new DoFn<String, Long>() { public void process(String line, Emitter<Long> emitter) { emitter.emit((long)line.split("\\s+").length); } }, Writables.longs()); // Indicates the serialization format // The aggregate method groups a collection into a single PObject. PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first(); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.run(); System.out.println("Total number of words: " + totalCount.getValue()); pipeline.done(); return result.succeeded() ? 0 : 1; }
@Override public int run() throws IOException { Preconditions.checkArgument(datasets.size() == 1, "Cannot compact multiple datasets"); String uriOrName = datasets.get(0); View<Record> view = load(uriOrName, Record.class); if (isDatasetOrViewUri(uriOrName)) { Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); } CompactionTask task = new CompactionTask<Record>(view); task.setConf(getConf()); if (numWriters >= 0) { task.setNumWriters(numWriters); } if (filesPerPartition > 0) { task.setFilesPerPartition(filesPerPartition); } PipelineResult result = task.run(); if (result.succeeded()) { console.info("Compacted {} records in \"{}\"", task.getCount(), uriOrName); return 0; } else { return 1; } }
Target.WriteMode.APPEND); return run().succeeded() ? 0 : 1;
if (result.succeeded()) { console.info("Added {} records to \"{}\"", task.getCount(), datasets.get(1));
return pipeline.done().succeeded() ? 0 : 1;
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println(); System.err.println("Two and only two arguments are accepted."); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Aggregator used for summing up response size and count Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS()); // Table of (ip, sum(response size), count) PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines .parallelDo(extractResponseSize, Writables.tableOf(Writables.strings(), Writables.pairs(Writables.longs(), Writables.longs()))).groupByKey() .combineValues(agg); // Calculate average response size by ip address PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage, Writables.tableOf(Writables.strings(), Writables.doubles())); // write the result to a text file pipeline.writeTextFile(avgs, args[1]); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
if (result.succeeded()) { long count = task.getCount(); if (count > 0) {
if (result.succeeded()) { long count = task.getCount(); if (count > 0) {
if (result.succeeded()) { long count = task.getCount(); if (count > 0) {
@Override public int run(String[] args) throws Exception { final long startOfToday = startOfDay(); // the destination dataset Dataset<Record> persistent = Datasets.load( "dataset:file:/tmp/data/logs", Record.class); // the source: anything before today in the staging area Dataset<Record> staging = Datasets.load( "dataset:file:/tmp/data/logs_staging", Record.class); View<Record> ready = staging.toBefore("timestamp", startOfToday); ReadableSource<Record> source = CrunchDatasets.asSource(ready); PCollection<Record> stagedLogs = read(source); getPipeline().write(stagedLogs, CrunchDatasets.asTarget(persistent), Target.WriteMode.APPEND); PipelineResult result = run(); if (result.succeeded()) { // remove the source data partition from staging ready.deleteAll(); return 0; } else { return 1; } }