public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc) throws IOException { this.cfg = cfg; this.jssc = jssc; this.fs = FSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()); if (fs.exists(new Path(cfg.targetBasePath))) { HoodieTableMetaClient meta = new HoodieTableMetaClient(fs.getConf(), cfg.targetBasePath); this.commitTimelineOpt = Optional.of(meta.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants()); } else { this.commitTimelineOpt = Optional.empty(); } this.props = UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath)).getConfig(); log.info("Creating delta streamer with configs : " + props.toString()); this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jssc); this.keyGenerator = DataSourceUtils.createKeyGenerator(cfg.keyGeneratorClass, props); this.source = UtilHelpers.createSource(cfg.sourceClassName, props, jssc, schemaProvider); // register the schemas, so that shuffle does not serialize the full schemas List<Schema> schemas = Arrays.asList(schemaProvider.getSourceSchema(), schemaProvider.getTargetSchema()); jssc.sc().getConf().registerAvroSchemas(JavaConversions.asScalaBuffer(schemas).toList()); }
public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc) throws IOException { this.cfg = cfg; this.jssc = jssc; this.fs = FSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()); if (fs.exists(new Path(cfg.targetBasePath))) { HoodieTableMetaClient meta = new HoodieTableMetaClient(fs.getConf(), cfg.targetBasePath); this.commitTimelineOpt = Optional.of(meta.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants()); } else { this.commitTimelineOpt = Optional.empty(); } this.props = UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath)).getConfig(); log.info("Creating delta streamer with configs : " + props.toString()); this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jssc); this.keyGenerator = DataSourceUtils.createKeyGenerator(cfg.keyGeneratorClass, props); this.source = UtilHelpers.createSource(cfg.sourceClassName, props, jssc, schemaProvider); // register the schemas, so that shuffle does not serialize the full schemas List<Schema> schemas = Arrays.asList(schemaProvider.getSourceSchema(), schemaProvider.getTargetSchema()); jssc.sc().getConf().registerAvroSchemas(JavaConversions.asScalaBuffer(schemas).toList()); }