/** * Returns an Input defined by a dataset. * @param datasetName the name of the input dataset * @param arguments the arguments to use when instantiating the dataset */ public static Input ofDataset(String datasetName, Map<String, String> arguments) { return ofDataset(datasetName, arguments, null); }
/** * Returns an Input defined by a dataset. * * @param datasetName the name of the input dataset */ public static Input ofDataset(String datasetName) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS); }
/** * Returns an Input defined by a dataset. * @param datasetName the name of the input dataset * @param splits the data selection splits. If null, will use the splits defined by the dataset. If the dataset * type is not {@link BatchReadable}, splits will be ignored */ public static Input ofDataset(String datasetName, @Nullable Iterable<? extends Split> splits) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS, splits); }
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); // if macros were provided at runtime, dataset needs to be created now if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }
@Override public void initialize() { getContext().addInput(Input.ofDataset(PURCHASES, ImmutableMap.of("key", "value"))); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(DummyMapper.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, new Path(context.getRuntimeArguments().get("outputPath"))); PedanticTxAware input = context.getDataset("pedanticTxAware", ImmutableMap.of("value", "1")); context.addInput(Input.ofDataset("pedanticTxAware", ImmutableMap.of("value", "1"))); input.rememberTx(); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ReaderMapper.class); job.setNumReduceTasks(0); String row = context.getRuntimeArguments().get(ROW_TO_WRITE); job.getConfiguration().set(ROW_TO_WRITE, row); context.addInput(Input.ofDataset(PARTITIONED)); context.addOutput(Output.ofDataset(OUTPUT)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ReaderMapper.class); job.setNumReduceTasks(0); String row = context.getRuntimeArguments().get(ROW_TO_WRITE); job.getConfiguration().set(ROW_TO_WRITE, row); context.addInput(Input.ofDataset(TIME_PARTITIONED)); context.addOutput(Output.ofDataset(OUTPUT)); } }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_KEY))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_KEY))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(NoOpMapper.class); job.setReducerClass(NoOpReducer.class); context.addInput(Input.ofDataset(DATASET_NAME)); context.addOutput(Output.ofDataset(DATASET_NAME2)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ObjectStoreMapper.class); job.setReducerClass(KeyValueStoreReducer.class); context.addInput(Input.ofDataset("keys")); context.addOutput(Output.ofDataset("count")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset(INPUT)); context.addOutput(Output.ofDataset(PARTITIONED)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset(INPUT)); context.addOutput(Output.ofDataset(TIME_PARTITIONED)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(DummyMapper.class); job.setReducerClass(DummyReducer.class); context.addInput(Input.ofDataset("foo")); context.addOutput(Output.ofDataset("bar")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset("input")); context.addOutput(Output.ofDataset("output")); } }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(Tokenizer.class); job.setReducerClass(Counter.class); job.setNumReduceTasks(1); context.addInput(Input.ofDataset("lines")); context.addOutput(Output.ofDataset("counts")); // truncate the output dataset context.getAdmin().truncateDataset("counts"); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(DATASET_NAME2)); context.addOutput(Output.ofDataset(DATASET_NAME)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setReducerClass(FileReducer.class); // user can opt to define the mapper class through our APIs, instead of directly on the job context.addInput(Input.ofDataset(context.getSpecification().getProperty("input")), FileMapper.class); context.addOutput(Output.ofDataset(context.getSpecification().getProperty("output"))); } }
@Override protected void initialize() throws Exception { Job job = getContext().getHadoopJob(); job.setMapperClass(ClusterNameMapper.class); job.setReducerClass(ClusterNameReducer.class); getContext().addInput(Input.ofDataset(INPUT_FILE_SET)); getContext().addOutput(Output.ofDataset(OUTPUT_FILE_SET)); WorkflowInfo workflowInfo = getContext().getWorkflowInfo(); String prefix = workflowInfo == null ? "" : workflowInfo.getName() + "."; clusterNameTable.write(prefix + "mr.client.cluster.name", getContext().getClusterName()); }