/** * Source which has a requirement */ @Plugin(type = BatchSource.PLUGIN_TYPE) @Name(IncapableSource.NAME) @Requirements(datasetTypes = {Table.TYPE}) public class IncapableSource extends BatchSource<byte[], Row, StructuredRecord> { public static final String NAME = "IncapableSource"; @Override public void prepareRun(BatchSourceContext context) throws Exception { } /** * @return {@link IncapableSource} as the ETLPlugin */ public static ETLPlugin getPlugin() { return new ETLPlugin(IncapableSource.NAME, BatchSource.PLUGIN_TYPE, Collections.emptyMap(), null); } }
/** * Get all the {@link co.cask.cdap.api.annotation.Requirements} specified by a plugin as {@link Requirements}. * The requirements are case insensitive and always represented in lowercase. * * @param cls the plugin class whose requirement needs to be found * * @return {@link Requirements} containing the requirements specified by the plugin (in lowercase). If the plugin does * not specify any {@link co.cask.cdap.api.annotation.Requirements} then the {@link Requirements} will be empty. */ @VisibleForTesting Requirements getPluginRequirements(Class<?> cls) { co.cask.cdap.api.annotation.Requirements annotation = cls.getAnnotation(co.cask.cdap.api.annotation.Requirements.class); if (annotation == null) { return Requirements.EMPTY; } return new Requirements(Arrays.stream(annotation.datasetTypes()).map(s -> s.trim().toLowerCase()) .filter(s -> !Strings.isNullOrEmpty(s)).collect(Collectors.toSet())); }
/** * Sink which has an requirement which can be meet with another special requirement */ @Plugin(type = BatchSink.PLUGIN_TYPE) @Name(IncapableSink.NAME) @Requirements(datasetTypes = {Table.TYPE, KeyValueTable.TYPE}) public class IncapableSink extends BatchSink<StructuredRecord, byte[], Put> { public static final String NAME = "IncapableSink"; @Override public void prepareRun(BatchSinkContext context) throws Exception { } /** * @return {@link IncapableSink} as the ETLPlugin */ public static ETLPlugin getPlugin() { return new ETLPlugin(IncapableSink.NAME, BatchSink.PLUGIN_TYPE, Collections.emptyMap(), null); } }
/** * Get all the {@link co.cask.cdap.api.annotation.Requirements} specified by a plugin as {@link Requirements}. * The requirements are case insensitive and always represented in lowercase. * * @param cls the plugin class whose requirement needs to be found * * @return {@link Requirements} containing the requirements specified by the plugin (in lowercase). If the plugin does * not specify any {@link co.cask.cdap.api.annotation.Requirements} then the {@link Requirements} will be empty. */ @VisibleForTesting Requirements getPluginRequirements(Class<?> cls) { co.cask.cdap.api.annotation.Requirements annotation = cls.getAnnotation(co.cask.cdap.api.annotation.Requirements.class); if (annotation == null) { return Requirements.EMPTY; } return new Requirements(Arrays.stream(annotation.datasetTypes()).map(s -> s.trim().toLowerCase()) .filter(s -> !Strings.isNullOrEmpty(s)).collect(Collectors.toSet())); }
/** * A {@link BatchSource} to read Avro record from {@link TimePartitionedFileSet} */ @Plugin(type = "batchsource") @Name("TPFSParquet") @Description("Reads from a TimePartitionedFileSet whose data is in Parquet format.") @Requirements(datasetTypes = TimePartitionedFileSet.TYPE) public class TimePartitionedFileSetDatasetParquetSource extends TimePartitionedFileSetSource<TPFSConfig> { public TimePartitionedFileSetDatasetParquetSource(TPFSConfig tpfsParquetConfig) { super(tpfsParquetConfig); } @Override protected String getInputFormatName() { return FileFormat.PARQUET.name().toLowerCase(); } @Override protected void addFileSetProperties(FileSetProperties.Builder properties) { FileSetUtil.configureParquetFileSet(config.getSchema().toString(), properties); } @Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } }
/** * Reads data written by a {@link SnapshotFileBatchAvroSource}. Reads only the most recent partition. */ @Plugin(type = "batchsource") @Name("SnapshotAvro") @Description("Reads the most recent snapshot that was written to a SnapshotAvro sink.") @Requirements(datasetTypes = PartitionedFileSet.TYPE) public class SnapshotFileBatchAvroSource extends SnapshotFileBatchSource<SnapshotFileSetSourceConfig> { public SnapshotFileBatchAvroSource(SnapshotFileSetSourceConfig config) { super(config); } @Override protected String getInputFormatName() { return FileFormat.AVRO.name().toLowerCase(); } @Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } @Override protected void addFileProperties(FileSetProperties.Builder propertiesBuilder) { FileSetUtil.configureAvroFileSet(config.getSchema().toString(), propertiesBuilder); } }
/** * A {@link BatchSource} to read Avro record from {@link TimePartitionedFileSet} */ @Plugin(type = "batchsource") @Name("TPFSAvro") @Description("Reads from a TimePartitionedFileSet whose data is in Avro format.") @Requirements(datasetTypes = TimePartitionedFileSet.TYPE) public class TimePartitionedFileSetDatasetAvroSource extends TimePartitionedFileSetSource<TPFSConfig> { public TimePartitionedFileSetDatasetAvroSource(TPFSConfig tpfsAvroConfig) { super(tpfsAvroConfig); } @Override protected String getInputFormatName() { return FileFormat.AVRO.name().toLowerCase(); } @Override protected void addFileSetProperties(FileSetProperties.Builder properties) { FileSetUtil.configureAvroFileSet(config.getSchema().toString(), properties); } @Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } }
/** * Reads data written by a {@link SnapshotFileBatchParquetSource}. Reads only the most recent partition. */ @Plugin(type = "batchsource") @Name("SnapshotParquet") @Description("Reads the most recent snapshot that was written to a SnapshotParquet sink.") @Requirements(datasetTypes = PartitionedFileSet.TYPE) public class SnapshotFileBatchParquetSource extends SnapshotFileBatchSource<SnapshotFileSetSourceConfig> { public SnapshotFileBatchParquetSource(SnapshotFileSetSourceConfig config) { super(config); } @Override protected String getInputFormatName() { return FileFormat.PARQUET.name().toLowerCase(); } @Override public void transform(KeyValue<NullWritable, StructuredRecord> input, Emitter<StructuredRecord> emitter) { emitter.emit(input.getValue()); } @Override protected void addFileProperties(FileSetProperties.Builder propertiesBuilder) { FileSetUtil.configureParquetFileSet(config.getSchema().toString(), propertiesBuilder); } }
@Name("TPFSAvro") @Description("Sink for a TimePartitionedFileSet that writes data in Avro format.") @Requirements(datasetTypes = TimePartitionedFileSet.TYPE) public class TimePartitionedFileSetDatasetAvroSink extends TimePartitionedFileSetSink<TimePartitionedFileSetDatasetAvroSink.TPFSAvroSinkConfig> {
@Name("TPFSParquet") @Description("Sink for a TimePartitionedFileSet that writes data in Parquet format.") @Requirements(datasetTypes = TimePartitionedFileSet.TYPE) public class TimePartitionedFileSetDatasetParquetSink extends TimePartitionedFileSetSink<TimePartitionedFileSetDatasetParquetSink.TPFSParquetSinkConfig> {
@Name("SnapshotText") @Description("Sink for a SnapshotFileSet that writes data in Text format.") @Requirements(datasetTypes = PartitionedFileSet.TYPE) public class SnapshotFileBatchTextSink extends SnapshotFileBatchSink<SnapshotFileBatchTextSink.Conf> { private Conf config;
@Name("SnapshotAvro") @Description("Sink for a SnapshotFileSet that writes data in Avro format.") @Requirements(datasetTypes = PartitionedFileSet.TYPE) public class SnapshotFileBatchAvroSink extends SnapshotFileBatchSink<SnapshotFileBatchAvroSink.Conf> { private final Conf config;
@Name("SnapshotParquet") @Description("Sink for a SnapshotFileSet that writes data in Parquet format.") @Requirements(datasetTypes = PartitionedFileSet.TYPE) public class SnapshotFileBatchParquetSink extends SnapshotFileBatchSink<SnapshotFileBatchParquetSink.Conf> { private final Conf config;
@Description("Reads the entire contents of a KeyValueTable. Outputs records with a 'key' field and a 'value' field. " + "Both fields are of type bytes.") @Requirements(datasetTypes = KeyValueTable.TYPE) public class KVTableSource extends BatchReadableSource<byte[], byte[], StructuredRecord> { private static final Schema SCHEMA = Schema.recordOf(
@Name("Cube") @Description("CDAP Cube Dataset Batch Sink") @Requirements(datasetTypes = Cube.TYPE) public class BatchCubeSink extends BatchWritableSink<StructuredRecord, byte[], CubeFact> { private final CubeSinkConfig config;
@Name("TPFSOrc") @Description("Sink for a TimePartitionedFileSet that writes data in ORC format.") @Requirements(datasetTypes = TimePartitionedFileSet.TYPE) public class TimePartitionedFileSetDataSetORCSink extends TimePartitionedFileSetSink<TimePartitionedFileSetDataSetORCSink.TPFSOrcSinkConfig> {
@Requirements(datasetTypes = Table.TYPE) @Description("Reads the entire contents of a CDAP Table. Outputs one record for each row in the Table.") public class TableSource extends BatchReadableSource<byte[], Row, StructuredRecord> {
@Plugin(type = PLUGIN_TYPE) @Name("NonTransactionalPlugin") @Description(PLUGIN_DESCRIPTION) @Requirements(datasetTypes = {"req1", "req2"}) public static class NonTransactionalPlugin { private PConfig pluginConf; public double doSomething() { return pluginConf.y; } } }
@Name("KVTable") @Description("Writes records to a KeyValueTable, using configurable fields from input records as the key and value.") @Requirements(datasetTypes = KeyValueTable.TYPE) public class KVTableSink extends BatchWritableSink<StructuredRecord, byte[], byte[]> {
@Description("Writes records to a Table with one record field mapping to the Table rowkey," + " and all other record fields mapping to Table columns.") @Requirements(datasetTypes = Table.TYPE) public class TableSink extends BatchWritableSink<StructuredRecord, byte[], Put> {