public static HoodieTableMetaClient init(String basePath) throws IOException { return initTableType(getDefaultHadoopConf(), basePath, HoodieTableType.COPY_ON_WRITE); }
public static final void createInflightCleanFiles(String basePath, String... commitTimes) throws IOException { createInflightCleanFiles(basePath, HoodieTestUtils.getDefaultHadoopConf(), commitTimes); }
public static void createCommitFile(String basePath, String commitTime) throws IOException { createCommitFile(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf()); }
public static void createCleanFiles(String basePath, String commitTime) throws IOException { createCleanFiles(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf()); }
private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf, Schema schema, String basePath) { List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(",")); String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); Configuration conf = HoodieTestUtils.getDefaultHadoopConf(); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); jobConf.set("partition_columns", "datestr"); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); conf.set("partition_columns", "datestr"); inputFormat.setConf(conf); jobConf.addResource(conf); }
@Test public void testFilterParquetRowKeys() throws Exception { List<String> rowKeys = new ArrayList<>(); Set<String> filter = new HashSet<>(); for (int i = 0; i < 1000; i++) { String rowKey = UUID.randomUUID().toString(); rowKeys.add(rowKey); if (i % 100 == 0) { filter.add(rowKey); } } String filePath = basePath + "/test.parquet"; writeParquetFile(filePath, rowKeys); // Read and verify Set<String> filtered = ParquetUtils.filterParquetRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath), filter); assertEquals("Filtered count does not match", filter.size(), filtered.size()); for (String rowKey : filtered) { assertTrue("filtered key must be in the given filter", filter.contains(rowKey)); } }
@Test public void testHoodieWriteSupport() throws Exception { List<String> rowKeys = new ArrayList<>(); for (int i = 0; i < 1000; i++) { rowKeys.add(UUID.randomUUID().toString()); } String filePath = basePath + "/test.parquet"; writeParquetFile(filePath, rowKeys); // Read and verify List<String> rowKeysInFile = new ArrayList<>( ParquetUtils.readRowKeysFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath))); Collections.sort(rowKeysInFile); Collections.sort(rowKeys); assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile); BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)); for (String rowKey : rowKeys) { assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey)); } }
@Test public void testEnvVarVariablesPickedup() { environmentVariables.set("HOODIE_ENV_fs_DOT_key1", "value1"); Configuration conf = FSUtils.prepareHadoopConf(HoodieTestUtils.getDefaultHadoopConf()); assertEquals("value1", conf.get("fs.key1")); conf.set("fs.key1", "value11"); conf.set("fs.key2", "value2"); assertEquals("value11", conf.get("fs.key1")); assertEquals("value2", conf.get("fs.key2")); } }
public HiveServer2 start() throws IOException { Preconditions.checkState(workDir != null, "The work dir must be set before starting cluster."); if (hadoopConf == null) { hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); } String localHiveLocation = getHiveLocation(workDir); if (clean) { LOG.info("Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh."); File file = new File(localHiveLocation); FileUtils.deleteDirectory(file); } HiveConf serverConf = configureHive(hadoopConf, localHiveLocation); executorService = Executors.newSingleThreadExecutor(); tServer = startMetaStore(bindIP, metastorePort, serverConf); hiveServer = startHiveServer(serverConf); String serverHostname; if (bindIP.equals("0.0.0.0")) { serverHostname = "localhost"; } else { serverHostname = bindIP; } if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) { throw new IOException("Waiting for startup of standalone server"); } LOG.info("Hive Minicluster service started."); return hiveServer; }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.initTableType(getDefaultHadoopConf(), tmpFolder.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); basePath = metaClient.getBasePath(); }
public MiniDFSCluster start(boolean format) throws IOException { Preconditions.checkState(workDir != null, "The work dir must be set before starting cluster."); hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); // If clean, then remove the work dir so we can start fresh. String localDFSLocation = getDFSLocation(workDir); if (format) { logger.info("Cleaning HDFS cluster data at: " + localDFSLocation + " and starting fresh."); File file = new File(localDFSLocation); FileUtils.deleteDirectory(file); } // Configure and start the HDFS cluster // boolean format = shouldFormatDFSCluster(localDFSLocation, clean); hadoopConf = configureDFSCluster(hadoopConf, localDFSLocation, bindIP, namenodeRpcPort, namenodeHttpPort, datanodePort, datanodeIpcPort, datanodeHttpPort); miniDfsCluster = new MiniDFSCluster.Builder(hadoopConf).numDataNodes(1).format(format).checkDataNodeAddrConfig(true) .checkDataNodeHostConfig(true).build(); logger.info("HDFS Minicluster service started."); return miniDfsCluster; }
@Before public void init() throws IOException { super.init(); metaClient = HoodieTestUtils.initTableType(getDefaultHadoopConf(), basePath, HoodieTableType.MERGE_ON_READ); client = new CompactionAdminClient(jsc, basePath); }
@Test public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException { // Use some fs like LocalFileSystem, that does not support appends Path localPartitionPath = new Path("file://" + partitionPath); FileSystem localFs = FSUtils.getFs(localPartitionPath.toString(), HoodieTestUtils.getDefaultHadoopConf()); Path testPath = new Path(localPartitionPath, "append_test"); localFs.mkdirs(testPath); // Some data & append two times. List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); for (int i = 0; i < 2; i++) { HoodieLogFormat.newWriterBuilder().onParentPath(testPath) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("") .withFs(localFs).build().appendBlock(dataBlock).close(); } // ensure there are two log file versions, with same data. FileStatus[] statuses = localFs.listStatus(testPath); assertEquals(2, statuses.length); }
@Before public void init() throws Exception { // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); this.basePath = folder.getRoot().getAbsolutePath(); HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath); // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestUpdateMapFunction")); }
@Before public void init() throws IOException { // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieCompactor")); // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); basePath = folder.getRoot().getAbsolutePath(); hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); fs = FSUtils.getFs(basePath, hadoopConf); HoodieTestUtils.initTableType(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ); dataGen = new HoodieTestDataGenerator(); compactor = new HoodieRealtimeTableCompactor(); }
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieTestUtils.getDefaultHadoopConf(), basePath); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); Iterator<List<WriteStatus>> insertResult = table.handleInsert("100", records.iterator()); Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100")); FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile); metaClient = new HoodieTableMetaClient(HoodieTestUtils.getDefaultHadoopConf(), basePath); String fileId = insertResult.next().get(0).getFileId(); System.out.println(fileId);
HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO)); HoodieParquetWriter writer = new HoodieParquetWriter(
FileSystem fs = FSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();