/** * Get the output format of the writer of type {@link WriterOutputFormat}. * * @param branches number of forked branches * @param index branch index * @return output format of the writer */ public WriterOutputFormat getWriterOutputFormat(int branches, int index) { String writerOutputFormatValue = this.taskState.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, branches, index), WriterOutputFormat.OTHER.name()); log.debug("Found writer output format value = {}", writerOutputFormatValue); WriterOutputFormat wof = Enums.getIfPresent(WriterOutputFormat.class, writerOutputFormatValue.toUpperCase()) .or(WriterOutputFormat.OTHER); log.debug("Returning writer output format = {}", wof); return wof; }
this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, workDir + "/event_data"); this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR, workDir + "/metadata"); this.setConfiguration(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.PARQUET.toString());
/** * Get the file name to be used by the writer. If a {@link org.apache.gobblin.writer.partitioner.WriterPartioner} is used, * the partition will be added as part of the file name. */ public String getFileName(State properties) { String extension = this.format.equals(WriterOutputFormat.OTHER) ? getExtension(properties) : this.format.getExtension(); String fileName = WriterUtils.getWriterFileName(properties, this.branches, this.branch, this.writerId, extension); if (this.partition.isPresent()) { fileName = getPartitionedFileName(properties, fileName); } List<StreamCodec> encoders = getEncoders(); if (!encoders.isEmpty()) { StringBuilder filenameBuilder = new StringBuilder(fileName); for (StreamCodec codec : encoders) { filenameBuilder.append('.'); filenameBuilder.append(codec.getTag()); } fileName = filenameBuilder.toString(); } return fileName; }
/** * Write output to avro files at the given input location. */ @CliObjectOption(description = "Write output to Avro files. Specify the output directory as argument.") public EmbeddedWikipediaExample avroOutput(String outputPath) { this.setConfiguration(ConfigurationKeys.WRITER_BUILDER_CLASS, AvroDataWriterBuilder.class.getName()); this.setConfiguration(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, Destination.DestinationType.HDFS.name()); this.setConfiguration(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.AVRO.name()); this.setConfiguration(ConfigurationKeys.WRITER_PARTITIONER_CLASS, WikipediaPartitioner.class.getName()); this.setConfiguration(ConfigurationKeys.JOB_DATA_PUBLISHER_TYPE, BaseDataPublisher.class.getName()); this.setConfiguration(ConfigurationKeys.CONVERTER_CLASSES_KEY, WikipediaConverter.class.getName()); this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, outputPath); return this; }
/** * Get the file name to be used by the writer. If a {@link org.apache.gobblin.writer.partitioner.WriterPartioner} is used, * the partition will be added as part of the file name. */ public String getFileName(State properties) { String extension = this.format.equals(WriterOutputFormat.OTHER) ? getExtension(properties) : this.format.getExtension(); String fileName = WriterUtils.getWriterFileName(properties, this.branches, this.branch, this.writerId, extension); if (this.partition.isPresent()) { fileName = getPartitionedFileName(properties, fileName); } List<StreamCodec> encoders = getEncoders(); if (!encoders.isEmpty()) { StringBuilder filenameBuilder = new StringBuilder(fileName); for (StreamCodec codec : encoders) { filenameBuilder.append('.'); filenameBuilder.append(codec.getTag()); } fileName = filenameBuilder.toString(); } return fileName; }
private void prepareWorkUnit(WorkUnit workUnit) { workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, TestHelper.TEST_TASK_ID); workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY, Long.toString(Id.parse(TestHelper.TEST_TASK_ID).getSequence())); workUnit.setProp(ConfigurationKeys.SOURCE_CLASS_KEY, SimpleJsonSource.class.getName()); workUnit.setProp(ConfigurationKeys.CONVERTER_CLASSES_KEY, SimpleJsonConverter.class.getName()); workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.AVRO.toString()); workUnit.setProp(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, Destination.DestinationType.HDFS.toString()); workUnit.setProp(ConfigurationKeys.WRITER_STAGING_DIR, this.appWorkDir.toString() + Path.SEPARATOR + "staging"); workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, this.taskOutputDir.toString()); workUnit.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestHelper.WRITER_FILE_NAME); workUnit.setProp(ConfigurationKeys.WRITER_FILE_PATH, TestHelper.REL_WRITER_FILE_PATH); workUnit.setProp(ConfigurationKeys.WRITER_BUILDER_CLASS, AvroDataWriterBuilder.class.getName()); workUnit.setProp(ConfigurationKeys.SOURCE_SCHEMA, TestHelper.SOURCE_SCHEMA); } }
jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".0", "OPTIONAL,OPTIONAL"); jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".1", "OPTIONAL,OPTIONAL"); jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".0", WriterOutputFormat.AVRO.name()); jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".1", WriterOutputFormat.AVRO.name()); jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".0", Destination.DestinationType.HDFS.name()); jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".1", Destination.DestinationType.HDFS.name());
this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, workDir + "/event_data"); this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR, workDir + "/metadata"); this.setConfiguration(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.PARQUET.toString());
jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".0", "OPTIONAL,OPTIONAL"); jobProps.setProperty(ConfigurationKeys.TASK_LEVEL_POLICY_LIST_TYPE + ".1", "OPTIONAL,OPTIONAL"); jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".0", WriterOutputFormat.AVRO.name()); jobProps.setProperty(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY + ".1", WriterOutputFormat.AVRO.name()); jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".0", Destination.DestinationType.HDFS.name()); jobProps.setProperty(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY + ".1", Destination.DestinationType.HDFS.name());
/** * Get the output format of the writer of type {@link WriterOutputFormat}. * * @param branches number of forked branches * @param index branch index * @return output format of the writer */ public WriterOutputFormat getWriterOutputFormat(int branches, int index) { String writerOutputFormatValue = this.taskState.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, branches, index), WriterOutputFormat.OTHER.name()); log.debug("Found writer output format value = {}", writerOutputFormatValue); WriterOutputFormat wof = Enums.getIfPresent(WriterOutputFormat.class, writerOutputFormatValue.toUpperCase()) .or(WriterOutputFormat.OTHER); log.debug("Returning writer output format = {}", wof); return wof; }
/** * Write output to avro files at the given input location. */ @CliObjectOption(description = "Write output to Avro files. Specify the output directory as argument.") public EmbeddedWikipediaExample avroOutput(String outputPath) { this.setConfiguration(ConfigurationKeys.WRITER_BUILDER_CLASS, AvroDataWriterBuilder.class.getName()); this.setConfiguration(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, Destination.DestinationType.HDFS.name()); this.setConfiguration(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.AVRO.name()); this.setConfiguration(ConfigurationKeys.WRITER_PARTITIONER_CLASS, WikipediaPartitioner.class.getName()); this.setConfiguration(ConfigurationKeys.JOB_DATA_PUBLISHER_TYPE, BaseDataPublisher.class.getName()); this.setConfiguration(ConfigurationKeys.CONVERTER_CLASSES_KEY, WikipediaConverter.class.getName()); this.setConfiguration(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, outputPath); return this; }