@Test public void testGetExistingPartitionsOnlyFilesExist() throws IOException { final Path partition0File = new Path(RAW_DATA_PATH, PARTITION0); this.fileSystem.create(partition0File); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); Assert.assertEquals(1, pm.getExistingPartitions().size()); Assert.assertEquals(RAW_DATA_PATH, pm.getNextPartition(getLatestCheckpoint(metadataManager)).get()); }
@Test public void testGetNextPartitionWithNonExistentCheckpoint() throws InterruptedException, IOException { final Path partitionPath = new Path(RAW_DATA_PATH, PARTITION1); final Path filePath = new Path(partitionPath, FILE1); this.fileSystem.create(filePath); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); final Optional<String> partition = pm.getNextPartition(getLatestCheckpoint(metadataManager)); Assert.assertTrue(partition.isPresent()); Assert.assertEquals(PARTITION1, partition.get()); }
@Test public void testGetNextPartitionCheckpointIsLargerThanPartition() throws InterruptedException, IOException { final Path partition2Path = new Path(RAW_DATA_PATH, PARTITION2); this.fileSystem.mkdirs(new Path(partition2Path, FILE1)); final StringValue val1 = new StringValue(PARTITION2); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Path partition1Path = new Path(RAW_DATA_PATH, PARTITION1); this.fileSystem.mkdirs(new Path(partition1Path, FILE1)); // Checkpoint value is greater than the partitions in the data folder so nothing new to process Assert.assertFalse(pm.getNextPartition(getLatestCheckpoint(metadataManager)).isPresent()); }
@Test public void testGetNextPartitionWithOnlyTempFileCheckpoints() throws InterruptedException, IOException { final Path partitionPath = new Path(RAW_DATA_PATH, PARTITION1); final Path filePath = new Path(partitionPath, FILE1); this.fileSystem.create(filePath); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); // if this metadata was saved successfully we would say there's no partition to process // but this will be in a temp file so it will be ignored metadataManager.set(MetadataConstants.CHECKPOINT_KEY, new StringValue(PARTITION2)); metadataManager.saveChanges(); final Optional<FileStatus> fs = metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); // move the metadata file back to a temp location this.fileSystem.rename(fs.get().getPath(), new Path(fs.get().getPath().toString() + MetadataConstants.TEMP_FILE_EXTENSION)); final Optional<String> partition = pm.getNextPartition(getLatestCheckpoint(metadataManager)); Assert.assertTrue(partition.isPresent()); Assert.assertEquals(PARTITION1, partition.get()); }
@Test public void testGetNextPartitionMultipleDataPartitions() throws IOException, InterruptedException { final StringValue val1 = new StringValue(PARTITION1); final Path partition2Path = new Path(RAW_DATA_PATH, PARTITION2); final Path partition3Path = new Path(RAW_DATA_PATH, PARTITION3); this.fileSystem.create(new Path(partition2Path, FILE1)); this.fileSystem.create(new Path(partition3Path, FILE1)); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Optional<StringValue> latestCheckpoint = getLatestCheckpoint(metadataManager); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).isPresent()); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).get().equals(PARTITION2)); }
@Test public void testGetNextPartitionSinglePartition() throws IOException, InterruptedException { final Path partitionPath = new Path(RAW_DATA_PATH, PARTITION2); final Path filePath = new Path(partitionPath, FILE1); this.fileSystem.create(filePath); final StringValue val1 = new StringValue(PARTITION1); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Optional<StringValue> latestCheckpoint = getLatestCheckpoint(metadataManager); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).isPresent()); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).get().equals(PARTITION2)); }
new AtomicBoolean(true)); final Optional<StringValue> latestCheckpoint = getLatestCheckpoint(metadataManager); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).isPresent()); Assert.assertEquals(pm.getNextPartition(latestCheckpoint).get(), PARTITION1);