private void initState(String fileName, String partitionPath) throws HoodieIndexException { try { Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName); bloomFilter = ParquetUtils .readBloomFilterFromParquetMetadata(metaClient.getHadoopConf(), filePath); candidateRecordKeys = new ArrayList<>(); currentFile = fileName; currentPartitionPath = partitionPath; } catch (Exception e) { throw new HoodieIndexException("Error checking candidate keys against file.", e); } }
private void initState(String fileName, String partitionPath) throws HoodieIndexException { try { Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName); bloomFilter = ParquetUtils .readBloomFilterFromParquetMetadata(metaClient.getHadoopConf(), filePath); candidateRecordKeys = new ArrayList<>(); currentFile = fileName; currentParitionPath = partitionPath; } catch (Exception e) { throw new HoodieIndexException("Error checking candidate keys against file.", e); } }
@Test public void testHoodieWriteSupport() throws Exception { List<String> rowKeys = new ArrayList<>(); for (int i = 0; i < 1000; i++) { rowKeys.add(UUID.randomUUID().toString()); } String filePath = basePath + "/test.parquet"; writeParquetFile(filePath, rowKeys); // Read and verify List<String> rowKeysInFile = new ArrayList<>( ParquetUtils.readRowKeysFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath))); Collections.sort(rowKeysInFile); Collections.sort(rowKeys); assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile); BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)); for (String rowKey : rowKeys) { assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey)); } }
BloomFilter filter = ParquetUtils.readBloomFilterFromParquetMetadata(jsc.hadoopConfiguration(), parquetFilePath); for (HoodieRecord record : records) { assertTrue(filter.mightContain(record.getRecordKey())); BloomFilter updatedFilter = ParquetUtils.readBloomFilterFromParquetMetadata(jsc.hadoopConfiguration(), updatedParquetFilePath); for (HoodieRecord record : records) {