fsWriter.getResumeRecoverableSerializer(), fsWriter.getCommitRecoverableSerializer(), bucketAssigner.getSerializer() );
private void rollPartFile(final long currentTime) throws IOException { closePartFile(); final Path partFilePath = assembleNewPartPath(); final RecoverableFsDataOutputStream stream = fsWriter.open(partFilePath); inProgressPart = partFileFactory.openNew(bucketId, stream, partFilePath, currentTime); if (LOG.isDebugEnabled()) { LOG.debug("Subtask {} opening new part file \"{}\" for bucket id={}.", subtaskIndex, partFilePath.getName(), bucketId); } partCounter++; }
private void restoreInProgressFile(final BucketState<BucketID> state) throws IOException { if (!state.hasInProgressResumableFile()) { return; } // we try to resume the previous in-progress file final ResumeRecoverable resumable = state.getInProgressResumableFile(); if (fsWriter.supportsResume()) { final RecoverableFsDataOutputStream stream = fsWriter.recover(resumable); inProgressPart = partFileFactory.resumeFrom( bucketId, stream, resumable, state.getInProgressFileCreationTime()); } else { // if the writer does not support resume, then we close the // in-progress part and commit it, as done in the case of pending files. fsWriter.recoverForCommit(resumable).commitAfterRecovery(); } if (fsWriter.requiresCleanupOfRecoverableState()) { fsWriter.cleanupRecoverableState(resumable); } }
final Path part = new Path(bucketPath, "part-" + i + '-' + j); final RecoverableFsDataOutputStream stream = writer.open(part); stream.write((PENDING_CONTENT + '-' + j).getBytes(Charset.forName("UTF-8"))); recoverables.add(stream.closeForCommit().getRecoverable()); final RecoverableFsDataOutputStream stream = writer.open(testBucket); stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8"))); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); writer.recoverForCommit(recoverable).commit();
@Test public void testCommitAfterRecovery() throws Exception { final Path testDir = getBasePathForTest(); final Path path = new Path(testDir, "part-0"); final RecoverableWriter initWriter = getNewFileSystemWriter(); final RecoverableWriter.CommitRecoverable recoverable; try (final RecoverableFsDataOutputStream stream = initWriter.open(path)) { stream.write(testData1.getBytes(StandardCharsets.UTF_8)); stream.persist(); stream.persist(); // and write some more data stream.write(testData2.getBytes(StandardCharsets.UTF_8)); recoverable = stream.closeForCommit().getRecoverable(); } final byte[] serializedRecoverable = initWriter.getCommitRecoverableSerializer().serialize(recoverable); // get a new serializer from a new writer to make sure that no pre-initialized state leaks in. final RecoverableWriter newWriter = getNewFileSystemWriter(); final SimpleVersionedSerializer<RecoverableWriter.CommitRecoverable> deserializer = newWriter.getCommitRecoverableSerializer(); final RecoverableWriter.CommitRecoverable recoveredRecoverable = deserializer.deserialize(deserializer.getVersion(), serializedRecoverable); final RecoverableFsDataOutputStream.Committer committer = newWriter.recoverForCommit(recoveredRecoverable); committer.commitAfterRecovery(); Map<Path, String> files = getFileContentByPath(testDir); Assert.assertEquals(1L, files.size()); for (Map.Entry<Path, String> fileContents : files.entrySet()) { Assert.assertEquals("part-0", fileContents.getKey().getName()); Assert.assertEquals(testData1 + testData2, fileContents.getValue()); } }
@Test public void testSerializationOnlyInProgress() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final Path testBucket = new Path(testFolder.getPath(), "test"); final RecoverableWriter writer = fs.createRecoverableWriter(); final RecoverableFsDataOutputStream stream = writer.open(testBucket); stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8"))); final RecoverableWriter.ResumeRecoverable current = stream.persist(); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, current, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); final byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); // to simulate that everything is over for file. stream.close(); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); FileStatus[] statuses = fs.listStatus(testBucket.getParent()); Assert.assertEquals(1L, statuses.length); Assert.assertTrue( statuses[0].getPath().getPath().startsWith( (new Path(testBucket.getParent(), ".test.inprogress")).toString()) ); }
try (final RecoverableFsDataOutputStream stream = initWriter.open(path)) { recoverables.put(INIT_EMPTY_PERSIST, stream.persist()); final SimpleVersionedSerializer<RecoverableWriter.ResumeRecoverable> serializer = initWriter.getResumeRecoverableSerializer(); final byte[] serializedRecoverable = serializer.serialize(recoverables.get(persistName)); final SimpleVersionedSerializer<RecoverableWriter.ResumeRecoverable> deserializer = newWriter.getResumeRecoverableSerializer(); final RecoverableWriter.ResumeRecoverable recoveredRecoverable = deserializer.deserialize(serializer.getVersion(), serializedRecoverable); try (final RecoverableFsDataOutputStream recoveredStream = newWriter.recover(recoveredRecoverable)) {
@Test(expected = IOException.class) public void testResumeAfterCommit() throws Exception { final Path testDir = getBasePathForTest(); final RecoverableWriter writer = getNewFileSystemWriter(); final Path path = new Path(testDir, "part-0"); RecoverableWriter.ResumeRecoverable recoverable; try (final RecoverableFsDataOutputStream stream = writer.open(path)) { stream.write(testData1.getBytes(StandardCharsets.UTF_8)); recoverable = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); stream.closeForCommit().commit(); } // this should throw an exception as the file is already committed writer.recover(recoverable); fail(); }
private void commitRecoveredPendingFiles(final BucketState<BucketID> state) throws IOException { // we commit pending files for checkpoints that precess the last successful one, from which we are recovering for (List<CommitRecoverable> committables: state.getCommittableFilesPerCheckpoint().values()) { for (CommitRecoverable committable: committables) { fsWriter.recoverForCommit(committable).commitAfterRecovery(); } } }
private void cleanupOutdatedResumables(long checkpointId) throws IOException { Iterator<Map.Entry<Long, ResumeRecoverable>> it = resumablesPerCheckpoint.headMap(checkpointId, false) .entrySet().iterator(); while (it.hasNext()) { final ResumeRecoverable recoverable = it.next().getValue(); final boolean successfullyDeleted = fsWriter.cleanupRecoverableState(recoverable); it.remove(); if (LOG.isDebugEnabled() && successfullyDeleted) { LOG.debug("Subtask {} successfully deleted incomplete part for bucket id={}.", subtaskIndex, bucketId); } } }
BucketState<BucketID> onReceptionOfCheckpoint(long checkpointId) throws IOException { prepareBucketForCheckpointing(checkpointId); ResumeRecoverable inProgressResumable = null; long inProgressFileCreationTime = Long.MAX_VALUE; if (inProgressPart != null) { inProgressResumable = inProgressPart.persist(); inProgressFileCreationTime = inProgressPart.getCreationTime(); // the following is an optimization so that writers that do not // require cleanup, they do not have to keep track of resumables // and later iterate over the active buckets. // (see onSuccessfulCompletionOfCheckpoint()) if (fsWriter.requiresCleanupOfRecoverableState()) { this.resumablesPerCheckpoint.put(checkpointId, inProgressResumable); } } return new BucketState<>(bucketId, bucketPath, inProgressFileCreationTime, inProgressResumable, pendingPartsPerCheckpoint); }
final Path part = new Path(bucketPath, "test-" + i + '-' + j); final RecoverableFsDataOutputStream stream = writer.open(part); stream.write((PENDING_CONTENT + '-' + j).getBytes(Charset.forName("UTF-8"))); recoverables.add(stream.closeForCommit().getRecoverable()); writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); writer.recoverForCommit(recoverable).commit();
@Test public void testResumeWithWrongOffset() throws Exception { // this is a rather unrealistic scenario, but it is to trigger // truncation of the file and try to resume with missing data. final Path testDir = getBasePathForTest(); final RecoverableWriter writer = getNewFileSystemWriter(); final Path path = new Path(testDir, "part-0"); final RecoverableWriter.ResumeRecoverable recoverable1; final RecoverableWriter.ResumeRecoverable recoverable2; try (final RecoverableFsDataOutputStream stream = writer.open(path)) { stream.write(testData1.getBytes(StandardCharsets.UTF_8)); recoverable1 = stream.persist(); stream.write(testData2.getBytes(StandardCharsets.UTF_8)); recoverable2 = stream.persist(); stream.write(testData3.getBytes(StandardCharsets.UTF_8)); } try (RecoverableFsDataOutputStream ignored = writer.recover(recoverable1)) { // this should work fine } catch (Exception e) { fail(); } // this should throw an exception try (RecoverableFsDataOutputStream ignored = writer.recover(recoverable2)) { fail(); } catch (IOException e) { // we expect this return; } fail(); }
void onSuccessfulCompletionOfCheckpoint(long checkpointId) throws IOException { checkNotNull(fsWriter); Iterator<Map.Entry<Long, List<CommitRecoverable>>> it = pendingPartsPerCheckpoint.headMap(checkpointId, true) .entrySet().iterator(); while (it.hasNext()) { Map.Entry<Long, List<CommitRecoverable>> entry = it.next(); for (CommitRecoverable committable : entry.getValue()) { fsWriter.recoverForCommit(committable).commit(); } it.remove(); } cleanupOutdatedResumables(checkpointId); }
private void cleanupOutdatedResumables(long checkpointId) throws IOException { Iterator<Map.Entry<Long, ResumeRecoverable>> it = resumablesPerCheckpoint.headMap(checkpointId, false) .entrySet().iterator(); while (it.hasNext()) { final ResumeRecoverable recoverable = it.next().getValue(); final boolean successfullyDeleted = fsWriter.cleanupRecoverableState(recoverable); it.remove(); if (LOG.isDebugEnabled() && successfullyDeleted) { LOG.debug("Subtask {} successfully deleted incomplete part for bucket id={}.", subtaskIndex, bucketId); } } }
BucketState<BucketID> onReceptionOfCheckpoint(long checkpointId) throws IOException { prepareBucketForCheckpointing(checkpointId); ResumeRecoverable inProgressResumable = null; long inProgressFileCreationTime = Long.MAX_VALUE; if (inProgressPart != null) { inProgressResumable = inProgressPart.persist(); inProgressFileCreationTime = inProgressPart.getCreationTime(); // the following is an optimization so that writers that do not // require cleanup, they do not have to keep track of resumables // and later iterate over the active buckets. // (see onSuccessfulCompletionOfCheckpoint()) if (fsWriter.requiresCleanupOfRecoverableState()) { this.resumablesPerCheckpoint.put(checkpointId, inProgressResumable); } } return new BucketState<>(bucketId, bucketPath, inProgressFileCreationTime, inProgressResumable, pendingPartsPerCheckpoint); }
private void restoreInProgressFile(final BucketState<BucketID> state) throws IOException { if (!state.hasInProgressResumableFile()) { return; } // we try to resume the previous in-progress file final ResumeRecoverable resumable = state.getInProgressResumableFile(); if (fsWriter.supportsResume()) { final RecoverableFsDataOutputStream stream = fsWriter.recover(resumable); inProgressPart = partFileFactory.resumeFrom( bucketId, stream, resumable, state.getInProgressFileCreationTime()); } else { // if the writer does not support resume, then we close the // in-progress part and commit it, as done in the case of pending files. fsWriter.recoverForCommit(resumable).commitAfterRecovery(); } if (fsWriter.requiresCleanupOfRecoverableState()) { fsWriter.cleanupRecoverableState(resumable); } }
@Test public void testSerializationEmpty() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final RecoverableWriter writer = fs.createRecoverableWriter(); final Path testBucket = new Path(testFolder.getPath(), "test"); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, null, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); Assert.assertNull(recoveredState.getInProgressResumableFile()); Assert.assertTrue(recoveredState.getCommittableFilesPerCheckpoint().isEmpty()); }
@Test public void testCloseWithNoData() throws Exception { final RecoverableWriter writer = getNewFileSystemWriter(); final Path testDir = getBasePathForTest(); final Path path = new Path(testDir, "part-0"); final RecoverableFsDataOutputStream stream = writer.open(path); for (Map.Entry<Path, String> fileContents : getFileContentByPath(testDir).entrySet()) { Assert.assertTrue(fileContents.getKey().getName().startsWith(".part-0.inprogress.")); Assert.assertTrue(fileContents.getValue().isEmpty()); } stream.closeForCommit().commit(); for (Map.Entry<Path, String> fileContents : getFileContentByPath(testDir).entrySet()) { Assert.assertEquals("part-0", fileContents.getKey().getName()); Assert.assertTrue(fileContents.getValue().isEmpty()); } }
private void commitRecoveredPendingFiles(final BucketState<BucketID> state) throws IOException { // we commit pending files for checkpoints that precess the last successful one, from which we are recovering for (List<CommitRecoverable> committables: state.getCommittableFilesPerCheckpoint().values()) { for (CommitRecoverable committable: committables) { fsWriter.recoverForCommit(committable).commitAfterRecovery(); } } }