/** * Returns an Input defined by a dataset. * @param datasetName the name of the input dataset * @param arguments the arguments to use when instantiating the dataset */ public static Input ofDataset(String datasetName, Map<String, String> arguments) { return ofDataset(datasetName, arguments, null); }
@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.of(config.name, new InputFormatProvider() { @Override public String getInputFormatClassName() { return TextInputFormat.class.getCanonicalName(); } @Override public Map<String, String> getInputFormatConfiguration() { return ImmutableMap.of(TextInputFormat.INPUT_DIR, config.dirName); } })); }
@Override public void addInput(Input input, @Nullable Class<?> mapperCls) { if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) { // trying to access system namespace from a program outside system namespace is not allowed throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " + "is not allowed from the namespace %s", input.getName(), getProgram().getNamespaceId())); } if (input instanceof Input.DatasetInput) { Input.DatasetInput datasetInput = (Input.DatasetInput) input; Input.InputFormatProviderInput createdInput = createInput(datasetInput); addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls); } else if (input instanceof Input.InputFormatProviderInput) { addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls); } else { // shouldn't happen unless user defines their own Input class throw new IllegalArgumentException(String.format("Input %s has unknown input class %s", input.getName(), input.getClass().getCanonicalName())); } }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "inputFile"); context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class); // A second input, aliasing so mapper gets the alias'd name context.addInput(Input.ofDataset(PURCHASES2, inputArgs).alias("secondPurchases"), FileMapper2.class); // since we set a Mapper class on the job itself, omitting the mapper in the addInput call will default to that context.addInput(Input.ofDataset(CUSTOMERS, inputArgs)); Map<String, String> outputArgs = new HashMap<>(); FileSetArguments.setOutputPath(outputArgs, "output"); context.addOutput(Output.ofDataset(OUTPUT_DATASET, outputArgs)); Job job = context.getHadoopJob(); job.setMapperClass(FileMapper.class); job.setReducerClass(FileReducer.class); } }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
private DatasetInput(String name, Map<String, String> arguments, @Nullable Iterable<? extends Split> splits, String namespace) { this(name, arguments, splits); super.fromNamespace(namespace); }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }
@Override public void addInput(Input input, @Nullable Class<?> mapperCls) { if (input.getNamespace() != null && input.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) { // trying to access system namespace from a program outside system namespace is not allowed throw new IllegalArgumentException(String.format("Accessing Input %s in system namespace " + "is not allowed from the namespace %s", input.getName(), getProgram().getNamespaceId())); } if (input instanceof Input.DatasetInput) { Input.DatasetInput datasetInput = (Input.DatasetInput) input; Input.InputFormatProviderInput createdInput = createInput(datasetInput); addInput(createdInput.getAlias(), createdInput.getInputFormatProvider(), mapperCls); } else if (input instanceof Input.StreamInput) { Input.StreamInput streamInput = (Input.StreamInput) input; String namespace = streamInput.getNamespace(); if (namespace == null) { namespace = getProgram().getNamespaceId(); } addInput(input.getAlias(), new StreamInputFormatProvider(new NamespaceId(namespace), streamInput, streamAdmin), mapperCls); } else if (input instanceof Input.InputFormatProviderInput) { addInput(input.getAlias(), ((Input.InputFormatProviderInput) input).getInputFormatProvider(), mapperCls); } else { // shouldn't happen unless user defines their own Input class throw new IllegalArgumentException(String.format("Input %s has unknown input class %s", input.getName(), input.getClass().getCanonicalName())); } }
/** * Returns an Input defined by a dataset. * * @param datasetName the name of the input dataset */ public static Input ofDataset(String datasetName) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS); }
context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(XMLInputFormat.class, conf)));
/** * Returns an Input defined by a dataset. * @param datasetName the name of the input dataset * @param splits the data selection splits. If null, will use the splits defined by the dataset. If the dataset * type is not {@link BatchReadable}, splits will be ignored */ public static Input ofDataset(String datasetName, @Nullable Iterable<? extends Split> splits) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS, splits); }
context.setInput(Input.of(config.getReferenceName(), new SourceInputFormatProvider(inputFormatClass, conf)));
@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.ofDataset(config.tableName)); if (config.metadataOperations != null) { // if there are metadata operations to be performed then apply them processsMetadata(context); } }
@Override public void prepareRun(BatchSourceContext batchSourceContext) throws Exception { excelInputreaderConfig.validate(); createDatasets(null, batchSourceContext); Job job = JobUtils.createInstance(); String processFiles = ""; if (!Strings.isNullOrEmpty(excelInputreaderConfig.memoryTableName)) { processFiles = GSON.toJson(getAllProcessedFiles(batchSourceContext), ARRAYLIST_PREPROCESSED_FILES); } ExcelInputFormat.setConfigurations(job, excelInputreaderConfig.filePattern, excelInputreaderConfig.sheet, excelInputreaderConfig.reprocess, excelInputreaderConfig.sheetValue, excelInputreaderConfig.columnList, excelInputreaderConfig.skipFirstRow, excelInputreaderConfig.terminateIfEmptyRow, excelInputreaderConfig.rowsLimit, excelInputreaderConfig.ifErrorRecord, processFiles); // Sets the input path(s). ExcelInputFormat.addInputPaths(job, excelInputreaderConfig.filePath); // Sets the filter based on extended class implementation. ExcelInputFormat.setInputPathFilter(job, ExcelReaderRegexFilter.class); SourceInputFormatProvider inputFormatProvider = new SourceInputFormatProvider(ExcelInputFormat.class, job.getConfiguration()); batchSourceContext.setInput(Input.of(excelInputreaderConfig.referenceName, inputFormatProvider)); }
@Override public void prepareRun(BatchSourceContext context) throws Exception { context.setInput(Input.ofDataset(config.tableName)); if (!context.datasetExists(config.runtimeDatasetName)) { context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
sourceConf.put(ADDITIONAL_CONFIG, SOURCE_CONFIG); context.addInput(Input.of("input", new InputFormatProvider() { @Override public String getInputFormatClassName() {
@Override public void prepareRun(BatchSourceContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); // if macros were provided at runtime, dataset needs to be created now if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.setInput(Input.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }
@Override public void initialize() throws Exception { // this write should be invalidated if any of the following fails KeyValueTable kvTable = getContext().getDataset("recorder"); kvTable.write("initialized", "true"); if (getContext().getRuntimeArguments().containsKey("failInput")) { getContext().addInput(Input.of("x", new FailingInputFormatProvider())); } if (getContext().getRuntimeArguments().containsKey("failOutput")) { getContext().addOutput(Output.of("x", new FailingOutputFormatProvider())); } } }