co.cask.cdap.api.data.batch.Input java code examples

/**
 * Returns an Input defined by a dataset.
 *  @param datasetName the name of the input dataset
 * @param arguments the arguments to use when instantiating the dataset
 */
public static Input ofDataset(String datasetName, Map<String, String> arguments) {
 return ofDataset(datasetName, arguments, null);
}

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.of(config.name, new InputFormatProvider() {
  @Override
  public String getInputFormatClassName() {
   return TextInputFormat.class.getCanonicalName();
  }
  @Override
  public Map<String, String> getInputFormatConfiguration() {
   return ImmutableMap.of(TextInputFormat.INPUT_DIR, config.dirName);
  }
 }));
}

private Input suffixInput(Input input) {
 String suffixedAlias = String.format("%s-%s", input.getAlias(), UUID.randomUUID());
 return input.alias(suffixedAlias);
}

@Override
public void addInput(Input input, @Nullable Class<?> mapperCls) {
 if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace())
  && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
  // trying to access system namespace from a program outside system namespace is not allowed
  throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " +
                            "is not allowed from the namespace %s",
                           input.getName(), getProgram().getNamespaceId()));
 }
 if (input instanceof Input.DatasetInput) {
  Input.DatasetInput datasetInput = (Input.DatasetInput) input;
  Input.InputFormatProviderInput createdInput = createInput(datasetInput);
  addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls);
 } else if (input instanceof Input.InputFormatProviderInput) {
  addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls);
 } else {
  // shouldn't happen unless user defines their own Input class
  throw new IllegalArgumentException(String.format("Input %s has unknown input class %s",
                           input.getName(), input.getClass().getCanonicalName()));
 }
}

private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) {
 String datasetName = datasetInput.getName();
 Map<String, String> datasetArgs = datasetInput.getArguments();
 // keep track of the original alias to set it on the created Input before returning it
 String originalAlias = datasetInput.getAlias();
 Dataset dataset;
 if (datasetInput.getNamespace() == null) {
  dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
 } else {
  dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ);
 }
 DatasetInputFormatProvider datasetInputFormatProvider =
  new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset,
                  datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class);
 return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias);
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Map<String, String> inputArgs = new HashMap<>();
  FileSetArguments.setInputPath(inputArgs, "inputFile");
  context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class);
  // A second input, aliasing so mapper gets the alias'd name
  context.addInput(Input.ofDataset(PURCHASES2, inputArgs).alias("secondPurchases"), FileMapper2.class);
  // since we set a Mapper class on the job itself, omitting the mapper in the addInput call will default to that
  context.addInput(Input.ofDataset(CUSTOMERS, inputArgs));
  Map<String, String> outputArgs = new HashMap<>();
  FileSetArguments.setOutputPath(outputArgs, "output");
  context.addOutput(Output.ofDataset(OUTPUT_DATASET, outputArgs));
  Job job = context.getHadoopJob();
  job.setMapperClass(FileMapper.class);
  job.setReducerClass(FileReducer.class);
 }
}

@Override
public void initialize() {
 MapReduceContext context = getContext();
 context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME))
           .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS)));
 context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME))
           .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS)));
 Job hadoopJob = context.getHadoopJob();
 hadoopJob.setMapperClass(IdentityMapper.class);
 hadoopJob.setNumReduceTasks(0);
}

private DatasetInput(String name, Map<String, String> arguments, @Nullable Iterable<? extends Split> splits,
           String namespace) {
 this(name, arguments, splits);
 super.fromNamespace(namespace);
}

private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) {
 String datasetName = datasetInput.getName();
 Map<String, String> datasetArgs = datasetInput.getArguments();
 // keep track of the original alias to set it on the created Input before returning it
 String originalAlias = datasetInput.getAlias();
 Dataset dataset;
 if (datasetInput.getNamespace() == null) {
  dataset = getDataset(datasetName, datasetArgs, AccessType.READ);
 } else {
  dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ);
 }
 DatasetInputFormatProvider datasetInputFormatProvider =
  new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset,
                  datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class);
 return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias);
}

@Override
public void addInput(Input input, @Nullable Class<?> mapperCls) {
 if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace())
  && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) {
  // trying to access system namespace from a program outside system namespace is not allowed
  throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " +
                            "is not allowed from the namespace %s",
                           input.getName(), getProgram().getNamespaceId()));
 }
 if (input instanceof Input.DatasetInput) {
  Input.DatasetInput datasetInput = (Input.DatasetInput) input;
  Input.InputFormatProviderInput createdInput = createInput(datasetInput);
  addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls);
 } else if (input instanceof Input.StreamInput) {
  Input.StreamInput streamInput = (Input.StreamInput) input;
  String namespace = streamInput.getNamespace();
  if (namespace == null) {
   namespace = getProgram().getNamespaceId();
  }
  addInput(input.getAlias(),
       new StreamInputFormatProvider(new NamespaceId(namespace), streamInput, streamAdmin),
       mapperCls);
 } else if (input instanceof Input.InputFormatProviderInput) {
  addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls);
 } else {
  // shouldn't happen unless user defines their own Input class
  throw new IllegalArgumentException(String.format("Input %s has unknown input class %s",
                           input.getName(), input.getClass().getCanonicalName()));
 }
}

/**
 * Returns an Input defined by a dataset.
 *
 * @param datasetName the name of the input dataset
 */
public static Input ofDataset(String datasetName) {
 return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS);
}

context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(XMLInputFormat.class, conf)));

/**
 * Returns an Input defined by a dataset.
 *  @param datasetName the name of the input dataset
 * @param splits the data selection splits. If null, will use the splits defined by the dataset. If the dataset
 *               type is not {@link BatchReadable}, splits will be ignored
 */
public static Input ofDataset(String datasetName, @Nullable Iterable<? extends Split> splits) {
 return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS, splits);
}

context.setInput(Input.of(config.getReferenceName(), new SourceInputFormatProvider(inputFormatClass, conf)));

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.ofDataset(config.tableName));
 if (config.metadataOperations != null) {
  // if there are metadata operations to be performed then apply them
  processsMetadata(context);
 }
}

@Override
public void prepareRun(BatchSourceContext batchSourceContext) throws Exception {
 excelInputreaderConfig.validate();
 createDatasets(null, batchSourceContext);
 Job job = JobUtils.createInstance();
 String processFiles = "";
 if (!Strings.isNullOrEmpty(excelInputreaderConfig.memoryTableName)) {
  processFiles = GSON.toJson(getAllProcessedFiles(batchSourceContext), ARRAYLIST_PREPROCESSED_FILES);
 }
 ExcelInputFormat.setConfigurations(job, excelInputreaderConfig.filePattern, excelInputreaderConfig.sheet,
                   excelInputreaderConfig.reprocess, excelInputreaderConfig.sheetValue,
                   excelInputreaderConfig.columnList, excelInputreaderConfig.skipFirstRow,
                   excelInputreaderConfig.terminateIfEmptyRow, excelInputreaderConfig.rowsLimit,
                   excelInputreaderConfig.ifErrorRecord, processFiles);
 // Sets the input path(s).
 ExcelInputFormat.addInputPaths(job, excelInputreaderConfig.filePath);
 // Sets the filter based on extended class implementation.
 ExcelInputFormat.setInputPathFilter(job, ExcelReaderRegexFilter.class);
 SourceInputFormatProvider inputFormatProvider = new SourceInputFormatProvider(ExcelInputFormat.class,
                                        job.getConfiguration());
 batchSourceContext.setInput(Input.of(excelInputreaderConfig.referenceName, inputFormatProvider));
}

@Override
public void prepareRun(BatchSourceContext context) throws Exception {
 context.setInput(Input.ofDataset(config.tableName));
 if (!context.datasetExists(config.runtimeDatasetName)) {
  context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY);
 }
}

sourceConf.put(ADDITIONAL_CONFIG, SOURCE_CONFIG);
context.addInput(Input.of("input", new InputFormatProvider() {
 @Override
 public String getInputFormatClassName() {

 @Override
 public void prepareRun(BatchSourceContext context) throws DatasetManagementException {
  Map<String, String> properties = getProperties();
  // if macros were provided at runtime, dataset needs to be created now
  if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) {
   context.createDataset(properties.get(Properties.BatchReadableWritable.NAME),
              properties.get(Properties.BatchReadableWritable.TYPE),
              DatasetProperties.builder().addAll(properties).build());
  }

  context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME)));
 }
}

 @Override
 public void initialize() throws Exception {
  // this write should be invalidated if any of the following fails
  KeyValueTable kvTable = getContext().getDataset("recorder");
  kvTable.write("initialized", "true");
  if (getContext().getRuntimeArguments().containsKey("failInput")) {
   getContext().addInput(Input.of("x", new FailingInputFormatProvider()));
  }
  if (getContext().getRuntimeArguments().containsKey("failOutput")) {
   getContext().addOutput(Output.of("x", new FailingOutputFormatProvider()));
  }
 }
}

Javadoc

Defines input to a program, such as MapReduce.

Most used methods

ofDataset
of
alias
fromNamespace
Sets the namespace of the input.
getAlias
getName
getNamespace
ofStream

Popular in Java

Reactive rest calls using spring rest template
getSystemService (Context)
setContentView (Activity)
runOnUiThread (Activity)
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Menu (java.awt)
Github Copilot alternatives

How to useInput in co.cask.cdap.api.data.batch

Best Java code snippets using co.cask.cdap.api.data.batch.Input (Showing top 20 results out of 315)

How to use
Input
in
co.cask.cdap.api.data.batch