public void testRollingStatsWithSmallFileHandling() throws Exception { HoodieWriteConfig cfg = getConfigBuilder(false, IndexType.INMEMORY).withAutoCommit(false).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
final HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder(); try { builder.forTable(getTableName()); builder.withPath(getBasePath()); final boolean combineBeforeInsert = getProperty(HOODIE_COMBINE_BEFORE_INSERT, DEFAULT_HOODIE_COMBINE_BEFORE_INSERT); final boolean combineBeforeUpsert = getProperty(HOODIE_COMBINE_BEFORE_UPSERT, DEFAULT_HOODIE_COMBINE_BEFORE_UPSERT); builder.combineInput(combineBeforeInsert, combineBeforeUpsert); final String schemaPropertyKey = getTablePropertyKey(HOODIE_AVRO_SCHEMA, this.tableKey); final Optional<String> schema = this.conf.getProperty(schemaPropertyKey); builder.withSchema(schema.get()); builder.withParallelism(this.getInsertParallelism(), this.getUpsertParallelism()) .withBulkInsertParallelism(this.getBulkInsertParallelism()); builder.withAutoCommit(false); builder.withAssumeDatePartitioning(true); DEFAULT_HOODIE_COMPACTION_SMALL_FILE_SIZE_LIMIT)); compactionConfigBuilder.withAutoClean(shouldAutoClean()); builder.withCompactionConfig(compactionConfigBuilder.build()); builder.withIndexConfig(new HoodieIndexConfiguration(getConf(), getTableKey()).configureHoodieIndex()); builder.withMetricsConfig(hoodieMetricsConfigBuilder.build()); builder.withWriteStatusStorageLevel("DISK_ONLY"); final String writeStatusClassName =
HoodieWriteConfig cfg = getConfigBuilder().build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration());
client = new HoodieWriteClient(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(TRIP_EXAMPLE_SCHEMA) .withParallelism(2, 2) .withAutoCommit(false).withAssumeDatePartitioning(true).withCompactionConfig(HoodieCompactionConfig.newBuilder() .compactionSmallFileSize(1 * 1024).withInlineCompaction(false) .withMaxNumDeltaCommitsBeforeCompaction(1).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1 * 1024).build()) .forTable("test-trip-table").build()); client.startCommitWithTime(commitTime1);
) throws Exception { int maxVersions = 2; // keep upto 2 versions for each file HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) .retainFileVersions(maxVersions).build()) .withParallelism(1, 1).withBulkInsertParallelism(1) .withFinalizeWriteParallelism(1).withConsistencyCheckEnabled(true) .build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
@Test public void testArchiveDatasetWithArchival() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .withCompactionConfig( HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 4).build()) .forTable("test-trip-table").build(); HoodieTestUtils.init(hadoopConf, basePath);
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1) .build()).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
public void testRollingStatsInMetadata() throws Exception { HoodieWriteConfig cfg = getConfigBuilder(false, IndexType.INMEMORY).withAutoCommit(false).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33"); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
@Test public void testHoodieMergeHandleWriteStatMetrics() throws Exception { HoodieWriteConfig config = getConfigBuilder().build(); HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config); String newCommitTime = "100";
) throws Exception { int maxCommits = 3; // keep upto 3 commits from the past HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder() .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainCommits(maxCommits).build()) .withParallelism(1, 1).withBulkInsertParallelism(1) .withFinalizeWriteParallelism(1).withConsistencyCheckEnabled(true).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build()) .build();
public void testRollingStatsInMetadata() throws Exception { HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build(); Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();