public void populateDAG(DAG dag, Configuration conf) { FSRecordReaderModule recordReader = dag.addModule("lineInput", FSRecordReaderModule.class); S3BytesOutputModule s3StringOutputModule = dag.addModule("s3TupleOutput", S3BytesOutputModule.class); dag.addStream("data", recordReader.records, s3StringOutputModule.input); }
@Override public void populateDAG(DAG dag, Configuration conf) { FSInputModule inputModule = dag.addModule("HDFSInputModule", new FSInputModule()); S3OutputModule outputModule = dag.addModule("S3OutputModule", new S3OutputModule()); dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) .setLocality(DAG.Locality.CONTAINER_LOCAL); dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(DAG.Locality.CONTAINER_LOCAL); } }
@Override public void populateDAG(DAG dag, Configuration conf) { S3InputModule inputModule = dag.addModule("S3InputModule", new S3InputModule()); HDFSFileCopyModule outputModule = dag.addModule("HDFSFileCopyModule", new HDFSFileCopyModule()); dag.addStream("FileMetaData", inputModule.filesMetadataOutput, outputModule.filesMetadataInput); dag.addStream("BlocksMetaData", inputModule.blocksMetadataOutput, outputModule.blocksMetadataInput) .setLocality(Locality.THREAD_LOCAL); dag.addStream("BlocksData", inputModule.messages, outputModule.blockData).setLocality(Locality.THREAD_LOCAL); }
@Override public void populateDAG(DAG dag, Configuration conf) { FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class); CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class); CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter()); StringFileOutputOperator fileOutput = dag.addOperator("fileOutput", new StringFileOutputOperator()); dag.addStream("record", recordReader.records, csvParser.in); dag.addStream("pojo", csvParser.out, formatter.in); dag.addStream("string", formatter.out, fileOutput.input); } }
@Override public void populateDAG(DAG dag, Configuration conf) { AvroFileToPojoModule avroFileToPojoModule = dag.addModule("avroFileToPojoModule", getAvroFileToPojoModule()); ConsoleOutputOperator consoleOutput = dag.addOperator("console", new ConsoleOutputOperator()); dag.addStream("POJO", avroFileToPojoModule.output, consoleOutput.input); } }
@Override public void populateDAG(DAG dag, Configuration conf) { FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class); CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class); FilterOperator filterOperator = dag.addOperator("filterOperator", new FilterOperator()); CsvFormatter selectedFormatter = dag.addOperator("selectedFormatter", new CsvFormatter()); CsvFormatter rejectedFormatter = dag.addOperator("rejectedFormatter", new CsvFormatter()); StringFileOutputOperator selectedOutput = dag.addOperator("selectedOutput", new StringFileOutputOperator()); StringFileOutputOperator rejectedOutput = dag.addOperator("rejectedOutput", new StringFileOutputOperator()); dag.addStream("record", recordReader.records, csvParser.in); dag.addStream("pojo", csvParser.out, filterOperator.input); dag.addStream("pojoSelected", filterOperator.truePort, selectedFormatter.in); dag.addStream("pojoRejected", filterOperator.falsePort, rejectedFormatter.in); dag.addStream("csvSelected", selectedFormatter.out, selectedOutput.input); dag.addStream("csvRejected", rejectedFormatter.out, rejectedOutput.input); } }
@Override public void populateDAG(DAG dag, Configuration conf) { if (readerMode == READ_FROM_S3) { S3TupleOutputModule.S3BytesOutputModule tupleBasedS3 = dag.addModule("S3Compaction", new S3TupleOutputModule.S3BytesOutputModule()); tupleBasedS3.setAccessKey(accessKey); tupleBasedS3.setSecretAccessKey(secretKey); tupleBasedS3.setBucketName(bucketName); tupleBasedS3.setOutputDirectoryPath(directoryName); if (maxLengthOfRollingFile != null) { tupleBasedS3.setMaxLength(maxLengthOfRollingFile); } input.set(tupleBasedS3.input); RedshiftJdbcTransactionableOutputOperator redshiftOutput = dag.addOperator("LoadToRedshift", createRedshiftOperator()); dag.addStream("load-to-redshift", tupleBasedS3.output, redshiftOutput.input); } else { FSRecordCompactionOperator<byte[]> hdfsWriteOperator = dag.addOperator("WriteToHDFS", new FSRecordCompactionOperator<byte[]>()); hdfsWriteOperator.setConverter(new GenericFileOutputOperator.NoOpConverter()); if (maxLengthOfRollingFile != null) { hdfsWriteOperator.setMaxLength(maxLengthOfRollingFile); } input.set(hdfsWriteOperator.input); RedshiftJdbcTransactionableOutputOperator redshiftOutput = dag.addOperator("LoadToRedshift", createRedshiftOperator()); dag.addStream("load-to-redshift", hdfsWriteOperator.output, redshiftOutput.input); } }