/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }
public Builder withIndexConfig(HoodieIndexConfig indexConfig) { props.putAll(indexConfig.getProps()); isIndexConfigSet = true; return this; }
public HoodieIndexConfig build() { HoodieIndexConfig config = new HoodieIndexConfig(props); setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES), BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP), BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP), BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP), BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP), BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL), BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL); // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP)); return config; } }
/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }
public Builder withIndexConfig(HoodieIndexConfig indexConfig) { props.putAll(indexConfig.getProps()); isIndexConfigSet = true; return this; }
public HoodieIndexConfig build() { HoodieIndexConfig config = new HoodieIndexConfig(props); setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES), BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES); setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP), BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP), BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP), BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP), BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING); setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL), BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL); // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP)); return config; } }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config); } }
private String getHoodiePropertyFromConfig(@NonNull final Configuration conf, @NotEmpty final String property, @NonNull final Optional<String> version) { final HoodieIndexConfiguration indexConf = new HoodieIndexConfiguration(conf, TABLE_KEY); final HoodieIndexConfig hoodieIndexConfig = indexConf.configureHoodieIndex(); final Properties props = hoodieIndexConfig.getProps(); return props.getProperty(property); }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config, false); }
/** * Configure the Hoodie HBase index. */ public HoodieIndexConfig configureHoodieIndex() { final String version; if (getVersion().isPresent()) { version = getVersion().get(); } else { version = DEFAULT_VERSION; } final String topicName = getTableName(); final HoodieIndexConfig.Builder builder = HoodieIndexConfig.newBuilder() .withIndexType(getHoodieIndexType()); if (HoodieIndex.IndexType.HBASE.equals(getHoodieIndexType())) { final String quorum = getHoodieIndexZookeeperQuorum(); final Integer port = getHoodieIndexZookeeperPort(); final String zkZnodeParent = getZkZnodeParent(); createHbaseIndexTableIfNotExists(topicName, quorum, port.toString(), zkZnodeParent, version); } return builder.build(); }
HoodieIndexConfig.newBuilder().fromProperties(props).build()); setDefaultOnCondition(props, !isStorageConfigSet, HoodieStorageConfig.newBuilder().fromProperties(props).build());
HoodieIndexConfig.newBuilder().fromProperties(props).build()); setDefaultOnCondition(props, !isStorageConfigSet, HoodieStorageConfig.newBuilder().fromProperties(props).build());
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath, String tblName, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true) .withPath(basePath).withAutoCommit(false) .withSchema(schemaStr).forTable(tblName).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder() .withPayloadClass(parameters.get( DataSourceWriteOptions .PAYLOAD_CLASS_OPT_KEY())) .build()) // override above with Hoodie configs specified as options. .withProps(parameters).build(); return new HoodieWriteClient<>(jssc, writeConfig); }
/** * Build Hoodie write client * * @param jsc Java Spark Context * @param basePath Base Path * @param schemaStr Schema * @param parallelism Parallelism */ public static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Optional<String> compactionStrategyClass) throws Exception { HoodieCompactionConfig compactionConfig = compactionStrategyClass.map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false) .withCompactionStrategy(ReflectionUtils.loadClass(strategy)) .build()).orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) .withParallelism(parallelism, parallelism).withSchema(schemaStr) .combineInput(true, true) .withCompactionConfig(compactionConfig) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build(); return new HoodieWriteClient(jsc, config); }
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath, String tblName, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true) .withPath(basePath).withAutoCommit(false) .withSchema(schemaStr).forTable(tblName).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder() .withPayloadClass(parameters.get( DataSourceWriteOptions .PAYLOAD_CLASS_OPT_KEY())) .build()) // override above with Hoodie configs specified as options. .withProps(parameters).build(); return new HoodieWriteClient<>(jssc, writeConfig); }
/** * Build Hoodie write client * * @param jsc Java Spark Context * @param basePath Base Path * @param schemaStr Schema * @param parallelism Parallelism */ public static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Optional<String> compactionStrategyClass) throws Exception { HoodieCompactionConfig compactionConfig = compactionStrategyClass.map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false) .withCompactionStrategy(ReflectionUtils.loadClass(strategy)) .build()).orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) .withParallelism(parallelism, parallelism).withSchema(schemaStr) .combineInput(true, true) .withCompactionConfig(compactionConfig) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build(); return new HoodieWriteClient(jsc, config); }
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .forTable(tableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build()) .withCompactionConfig( HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
private HoodieWriteConfig.Builder getConfigBuilder() { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(1, 1).withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false) .build()).withAutoCommit(false) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build()) .forTable("test-trip-table").withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE) .withHBaseIndexConfig( new HoodieHBaseIndexConfig.Builder() .hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort"))) .hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName) .hbaseIndexGetBatchSize(100).build()) .build()); } }