/** * Build a test Hoodie WriteClient with dummy index to configure isGlobal flag * * @param isGlobal Flag to control HoodieIndex.isGlobal * @return Hoodie Write Client * @throws Exception in case of error */ private HoodieWriteClient getWriteClientWithDummyIndex(final boolean isGlobal) throws Exception { HoodieIndex index = mock(HoodieIndex.class); when(index.isGlobal()).thenReturn(isGlobal); return new HoodieWriteClient(jsc, getConfigBuilder().build(), false, index); }
private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception { // Prepare the AvroParquetIO String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8"); return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build(); }
@SuppressWarnings("unchecked") public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig .newBuilder() .withPath(parameters.get("path")) .withProps(parameters).build(); return dropDuplicates(jssc, incomingHoodieRecords, writeConfig); } }
private HoodieWriteConfig getConfig(Boolean autoCommit) { return getConfigBuilder(autoCommit).build(); }
/** * Build Hoodie Write Config for small data file sizes */ private HoodieWriteConfig getSmallInsertWriteConfig(int insertSplitSize) { HoodieWriteConfig.Builder builder = getConfigBuilder(); return builder.withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 15) .insertSplitSize(insertSplitSize).build()) // tolerate upto 15 records .withStorageConfig( HoodieStorageConfig.newBuilder().limitFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 20).build()) .build(); } }
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
@Test public void testArchiveEmptyDataset() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .forTable("test-trip-table").build(); HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, new HoodieTableMetaClient(dfs.getConf(), cfg.getBasePath(), true)); boolean result = archiveLog.archiveIfRequired(jsc); assertTrue(result); }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config, false); }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config); } }
/** * Test ReadFilter API after writing new records using HoodieWriteClient.bulkInsert */ @Test public void testReadFilterExistAfterBulkInsert() throws Exception { testReadFilterExist(getConfigBuilder().withBulkInsertParallelism(1).build(), HoodieWriteClient::bulkInsert); }
/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }
private HoodieWriteConfig getConfig() { return getConfigBuilder() .withCompactionConfig(HoodieCompactionConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()) .build(); }
/** * Get Default HoodieWriteConfig for tests * * @return Default Hoodie Write Config for tests */ protected HoodieWriteConfig getConfig() { return getConfigBuilder().build(); }
/** * Test tagLocation API after bulkInsertPrepped() */ @Test public void testTagLocationAfterBulkInsertPrepped() throws Exception { testTagLocation(getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, commitTime) -> writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()), HoodieWriteClient::upsertPreppedRecords, true); }
/** * Test tagLocation API after bulk-insert() */ @Test public void testTagLocationAfterBulkInsert() throws Exception { testTagLocation(getConfigBuilder().withBulkInsertParallelism(1).build(), HoodieWriteClient::bulkInsert, HoodieWriteClient::upsert, false); }
/** * Test UpsertPrepped API using temporary folders. */ @Test public void testUpsertsPreppedWithFinalizeWrite() throws Exception { HoodieWriteConfig hoodieWriteConfig = getConfigBuilder() .withUseTempFolderCopyOnWriteForCreate(true) .withUseTempFolderCopyOnWriteForMerge(true) .build(); testUpsertsInternal(hoodieWriteConfig, HoodieWriteClient::upsertPreppedRecords, true); }
/** * Test Upsert API using temporary folders. */ @Test public void testUpsertsWithFinalizeWrite() throws Exception { HoodieWriteConfig hoodieWriteConfig = getConfigBuilder() .withUseTempFolderCopyOnWriteForCreate(true) .withUseTempFolderCopyOnWriteForMerge(true) .build(); testUpsertsInternal(hoodieWriteConfig, HoodieWriteClient::upsert, false); }
/** * Test ReadFilter API after writing new records using HoodieWriteClient.bulkInsertPrepped */ @Test public void testReadFilterExistAfterBulkInsertPrepped() throws Exception { testReadFilterExist(getConfigBuilder().withBulkInsertParallelism(1).build(), (writeClient, recordRDD, commitTime) -> { return writeClient.bulkInsertPreppedRecords(recordRDD, commitTime, Option.empty()); }); }
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }