/** * Verifies that all operator states can be mapped to an execution job vertex. * * @param allowNonRestoredState if false an exception will be thrown if a state could not be mapped * @param operatorStates operator states to map * @param tasks task to map to */ private static void checkStateMappingCompleteness( boolean allowNonRestoredState, Map<OperatorID, OperatorState> operatorStates, Map<JobVertexID, ExecutionJobVertex> tasks) { Set<OperatorID> allOperatorIDs = new HashSet<>(); for (ExecutionJobVertex executionJobVertex : tasks.values()) { allOperatorIDs.addAll(executionJobVertex.getOperatorIDs()); } for (Map.Entry<OperatorID, OperatorState> operatorGroupStateEntry : operatorStates.entrySet()) { OperatorState operatorState = operatorGroupStateEntry.getValue(); //----------------------------------------find operator for state--------------------------------------------- if (!allOperatorIDs.contains(operatorGroupStateEntry.getKey())) { if (allowNonRestoredState) { LOG.info("Skipped checkpoint state for operator {}.", operatorState.getOperatorID()); } else { throw new IllegalStateException("There is no operator for the state " + operatorState.getOperatorID()); } } } }
/** * Verifies that all operator states can be mapped to an execution job vertex. * * @param allowNonRestoredState if false an exception will be thrown if a state could not be mapped * @param operatorStates operator states to map * @param tasks task to map to */ private static void checkStateMappingCompleteness( boolean allowNonRestoredState, Map<OperatorID, OperatorState> operatorStates, Map<JobVertexID, ExecutionJobVertex> tasks) { Set<OperatorID> allOperatorIDs = new HashSet<>(); for (ExecutionJobVertex executionJobVertex : tasks.values()) { allOperatorIDs.addAll(executionJobVertex.getOperatorIDs()); } for (Map.Entry<OperatorID, OperatorState> operatorGroupStateEntry : operatorStates.entrySet()) { OperatorState operatorState = operatorGroupStateEntry.getValue(); //----------------------------------------find operator for state--------------------------------------------- if (!allOperatorIDs.contains(operatorGroupStateEntry.getKey())) { if (allowNonRestoredState) { LOG.info("Skipped checkpoint state for operator {}.", operatorState.getOperatorID()); } else { throw new IllegalStateException("There is no operator for the state " + operatorState.getOperatorID()); } } } }
/** * Verifies that all operator states can be mapped to an execution job vertex. * * @param allowNonRestoredState if false an exception will be thrown if a state could not be mapped * @param operatorStates operator states to map * @param tasks task to map to */ private static void checkStateMappingCompleteness( boolean allowNonRestoredState, Map<OperatorID, OperatorState> operatorStates, Map<JobVertexID, ExecutionJobVertex> tasks) { Set<OperatorID> allOperatorIDs = new HashSet<>(); for (ExecutionJobVertex executionJobVertex : tasks.values()) { allOperatorIDs.addAll(executionJobVertex.getOperatorIDs()); } for (Map.Entry<OperatorID, OperatorState> operatorGroupStateEntry : operatorStates.entrySet()) { OperatorState operatorState = operatorGroupStateEntry.getValue(); //----------------------------------------find operator for state--------------------------------------------- if (!allOperatorIDs.contains(operatorGroupStateEntry.getKey())) { if (allowNonRestoredState) { LOG.info("Skipped checkpoint state for operator {}.", operatorState.getOperatorID()); } else { throw new IllegalStateException("There is no operator for the state " + operatorState.getOperatorID()); } } } }
public static OperatorState getOperatorState(Savepoint savepoint, OperatorID opId) { return savepoint .getOperatorStates() .stream() .filter(os -> os.getOperatorID().equals(opId)) .findAny() .orElseThrow(() -> new RuntimeException("No operator state with id " + opId.toString())); }
/** * Verifies that all operator states can be mapped to an execution job vertex. * * @param allowNonRestoredState if false an exception will be thrown if a state could not be mapped * @param operatorStates operator states to map * @param tasks task to map to */ private static void checkStateMappingCompleteness( boolean allowNonRestoredState, Map<OperatorID, OperatorState> operatorStates, Map<JobVertexID, ExecutionJobVertex> tasks) { Set<OperatorID> allOperatorIDs = new HashSet<>(); for (ExecutionJobVertex executionJobVertex : tasks.values()) { allOperatorIDs.addAll(executionJobVertex.getOperatorIDs()); } for (Map.Entry<OperatorID, OperatorState> operatorGroupStateEntry : operatorStates.entrySet()) { OperatorState operatorState = operatorGroupStateEntry.getValue(); //----------------------------------------find operator for state--------------------------------------------- if (!allOperatorIDs.contains(operatorGroupStateEntry.getKey())) { if (allowNonRestoredState) { LOG.info("Skipped checkpoint state for operator {}.", operatorState.getOperatorID()); } else { throw new IllegalStateException("There is no operator for the state " + operatorState.getOperatorID()); } } } }
/** * Create a new {@link Savepoint} by replacing certain * {@link OperatorState}s of an old {@link Savepoint} * * @param oldSavepoint * {@link Savepoint} to base the new state on * @param statesToReplace * States that will be replaced, all else will be kept * @return A new valid {@link Savepoint} metadata object. */ public static Savepoint createNewSavepoint(Savepoint oldSavepoint, Collection<OperatorState> statesToReplace) { Map<OperatorID, OperatorState> newStates = oldSavepoint.getOperatorStates().stream() .collect(Collectors.toMap(OperatorState::getOperatorID, o -> o)); statesToReplace.forEach(os -> newStates.put(os.getOperatorID(), os)); return new SavepointV2(oldSavepoint.getCheckpointId(), newStates.values(), oldSavepoint.getMasterStates()); }
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart());
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart());
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart());
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart());
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart());
for (OperatorState operatorState : savepoint.getOperatorStates()) { ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to savepoint %s. " + "in a non-compatible way after the savepoint.", savepoint, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { String msg = String.format("Failed to rollback to savepoint %s. " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", savepointPath, operatorState.getOperatorID());
for (OperatorState operatorState : checkpointMetadata.getOperatorStates()) { ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) { "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", checkpointPointer, operatorState.getOperatorID()); LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
for (OperatorState operatorState : checkpointMetadata.getOperatorStates()) { ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) { "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", checkpointPointer, operatorState.getOperatorID()); LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
private Path makeOutputDir() { final Path outDir = new Path(new Path(newCheckpointBasePath, "mchk-" + checkpointId), "op-" + baseOpState.getOperatorID()); try { outDir.getFileSystem().mkdirs(outDir); } catch (IOException ignore) {} return outDir; }
for (OperatorState operatorState : checkpointMetadata.getOperatorStates()) { ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) { "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", checkpointPointer, operatorState.getOperatorID()); LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
OperatorState newOperatorState = new OperatorState(baseOpState.getOperatorID(), parallelism, maxParallelism);