co.cask.cdap.api.data.batch.Input.ofDataset java code examples

/**
 * Returns an Input defined by a dataset.
 *  @param datasetName the name of the input dataset
 * @param arguments the arguments to use when instantiating the dataset
 */
public static Input ofDataset(String datasetName, Map<String, String> arguments) {
 return ofDataset(datasetName, arguments, null);
}

/**
 * Returns an Input defined by a dataset.
 *
 * @param datasetName the name of the input dataset
 */
public static Input ofDataset(String datasetName) {
 return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS);
}

/**
 * Returns an Input defined by a dataset.
 *  @param datasetName the name of the input dataset
 * @param splits the data selection splits. If null, will use the splits defined by the dataset. If the dataset
 *               type is not {@link BatchReadable}, splits will be ignored
 */
public static Input ofDataset(String datasetName, @Nullable Iterable<? extends Split> splits) {
 return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS, splits);
}

 @Override
 public void prepareRun(BatchSourceContext context) throws DatasetManagementException {
  Map<String, String> properties = getProperties();
  // if macros were provided at runtime, dataset needs to be created now
  if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) {
   context.createDataset(properties.get(Properties.BatchReadableWritable.NAME),
              properties.get(Properties.BatchReadableWritable.TYPE),
              DatasetProperties.builder().addAll(properties).build());
  }

  context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME)));
 }
}

 @Override
 public void initialize() {
  getContext().addInput(Input.ofDataset(PURCHASES, ImmutableMap.of("key", "value")));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(DummyMapper.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, new Path(context.getRuntimeArguments().get("outputPath")));
  PedanticTxAware input = context.getDataset("pedanticTxAware", ImmutableMap.of("value", "1"));
  context.addInput(Input.ofDataset("pedanticTxAware", ImmutableMap.of("value", "1")));
  input.rememberTx();
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(ReaderMapper.class);
  job.setNumReduceTasks(0);
  String row = context.getRuntimeArguments().get(ROW_TO_WRITE);
  job.getConfiguration().set(ROW_TO_WRITE, row);
  context.addInput(Input.ofDataset(PARTITIONED));
  context.addOutput(Output.ofDataset(OUTPUT));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(ReaderMapper.class);
  job.setNumReduceTasks(0);
  String row = context.getRuntimeArguments().get(ROW_TO_WRITE);
  job.getConfiguration().set(ROW_TO_WRITE, row);
  context.addInput(Input.ofDataset(TIME_PARTITIONED));
  context.addOutput(Output.ofDataset(OUTPUT));
 }
}

@Override
public void initialize() {
 MapReduceContext context = getContext();
 context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_KEY)));
 context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_KEY)));
 Job hadoopJob = context.getHadoopJob();
 hadoopJob.setMapperClass(IdentityMapper.class);
 hadoopJob.setNumReduceTasks(0);
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(NoOpMapper.class);
  job.setReducerClass(NoOpReducer.class);
  context.addInput(Input.ofDataset(DATASET_NAME));
  context.addOutput(Output.ofDataset(DATASET_NAME2));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(ObjectStoreMapper.class);
  job.setReducerClass(KeyValueStoreReducer.class);
  context.addInput(Input.ofDataset("keys"));
  context.addOutput(Output.ofDataset("count"));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(SimpleMapper.class);
  job.setNumReduceTasks(0);
  context.addInput(Input.ofDataset(INPUT));
  context.addOutput(Output.ofDataset(PARTITIONED));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(SimpleMapper.class);
  job.setNumReduceTasks(0);
  context.addInput(Input.ofDataset(INPUT));
  context.addOutput(Output.ofDataset(TIME_PARTITIONED));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(DummyMapper.class);
  job.setReducerClass(DummyReducer.class);
  context.addInput(Input.ofDataset("foo"));
  context.addOutput(Output.ofDataset("bar"));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setMapperClass(SimpleMapper.class);
  job.setNumReduceTasks(0);
  context.addInput(Input.ofDataset("input"));
  context.addOutput(Output.ofDataset("output"));
 }
}

@Override
public void initialize() {
 MapReduceContext context = getContext();
 context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME))
           .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS)));
 context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME))
           .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS)));
 Job hadoopJob = context.getHadoopJob();
 hadoopJob.setMapperClass(IdentityMapper.class);
 hadoopJob.setNumReduceTasks(0);
}

@Override
public void initialize() throws Exception {
 MapReduceContext context = getContext();
 Job job = context.getHadoopJob();
 job.setMapperClass(Tokenizer.class);
 job.setReducerClass(Counter.class);
 job.setNumReduceTasks(1);
 context.addInput(Input.ofDataset("lines"));
 context.addOutput(Output.ofDataset("counts"));
 // truncate the output dataset
 context.getAdmin().truncateDataset("counts");
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  context.addInput(Input.ofDataset(DATASET_NAME2));
  context.addOutput(Output.ofDataset(DATASET_NAME));
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Job job = context.getHadoopJob();
  job.setReducerClass(FileReducer.class);
  // user can opt to define the mapper class through our APIs, instead of directly on the job
  context.addInput(Input.ofDataset(context.getSpecification().getProperty("input")), FileMapper.class);
  context.addOutput(Output.ofDataset(context.getSpecification().getProperty("output")));
 }
}

@Override
protected void initialize() throws Exception {
 Job job = getContext().getHadoopJob();
 job.setMapperClass(ClusterNameMapper.class);
 job.setReducerClass(ClusterNameReducer.class);
 getContext().addInput(Input.ofDataset(INPUT_FILE_SET));
 getContext().addOutput(Output.ofDataset(OUTPUT_FILE_SET));
 WorkflowInfo workflowInfo = getContext().getWorkflowInfo();
 String prefix = workflowInfo == null ? "" : workflowInfo.getName() + ".";
 clusterNameTable.write(prefix + "mr.client.cluster.name", getContext().getClusterName());
}

Javadoc

Returns an Input defined by a dataset.

Popular methods of Input

of
alias
fromNamespace
Sets the namespace of the input.
getAlias
getName
getNamespace
ofStream

Popular in Java

Parsing JSON documents to java classes using gson
notifyDataSetChanged (ArrayAdapter)
scheduleAtFixedRate (Timer)
setRequestProperty (URLConnection)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
Permission (java.security)
Legacy security code; do not use.
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Best IntelliJ plugins

How to use ofDatasetmethodin co.cask.cdap.api.data.batch.Input

Best Java code snippets using co.cask.cdap.api.data.batch.Input.ofDataset (Showing top 20 results out of 315)

How to use
ofDataset
method
in
co.cask.cdap.api.data.batch.Input