/** * Verifies that all operator states can be mapped to an execution job vertex. * * @param allowNonRestoredState if false an exception will be thrown if a state could not be mapped * @param operatorStates operator states to map * @param tasks task to map to */ private static void checkStateMappingCompleteness( boolean allowNonRestoredState, Map<OperatorID, OperatorState> operatorStates, Map<JobVertexID, ExecutionJobVertex> tasks) { Set<OperatorID> allOperatorIDs = new HashSet<>(); for (ExecutionJobVertex executionJobVertex : tasks.values()) { allOperatorIDs.addAll(executionJobVertex.getOperatorIDs()); } for (Map.Entry<OperatorID, OperatorState> operatorGroupStateEntry : operatorStates.entrySet()) { OperatorState operatorState = operatorGroupStateEntry.getValue(); //----------------------------------------find operator for state--------------------------------------------- if (!allOperatorIDs.contains(operatorGroupStateEntry.getKey())) { if (allowNonRestoredState) { LOG.info("Skipped checkpoint state for operator {}.", operatorState.getOperatorID()); } else { throw new IllegalStateException("There is no operator for the state " + operatorState.getOperatorID()); } } } }
/** * Collect {@link KeyGroupsStateHandle managedKeyedStateHandles} which have intersection with given * {@link KeyGroupRange} from {@link TaskState operatorState} * * @param operatorState all state handles of a operator * @param subtaskKeyGroupRange the KeyGroupRange of a subtask * @return all managedKeyedStateHandles which have intersection with given KeyGroupRange */ public static List<KeyedStateHandle> getManagedKeyedStateHandles( OperatorState operatorState, KeyGroupRange subtaskKeyGroupRange) { List<KeyedStateHandle> subtaskKeyedStateHandles = new ArrayList<>(); for (int i = 0; i < operatorState.getParallelism(); i++) { if (operatorState.getState(i) != null) { Collection<KeyedStateHandle> keyedStateHandles = operatorState.getState(i).getManagedKeyedState(); extractIntersectingState( keyedStateHandles, subtaskKeyGroupRange, subtaskKeyedStateHandles); } } return subtaskKeyedStateHandles; }
@Override public KeyedStateInputSplit[] createInputSplits(int minNumSplits) throws IOException { return operatorState.getSubtaskStates().entrySet().stream() .map(entry -> new KeyedStateInputSplit(entry.getKey(), entry.getValue())) .toArray(KeyedStateInputSplit[]::new); }
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart()); int parallelism = operatorState.getParallelism(); dos.writeInt(parallelism); dos.writeInt(operatorState.getMaxParallelism()); dos.writeInt(1); Map<Integer, OperatorSubtaskState> subtaskStateMap = operatorState.getSubtaskStates(); dos.writeInt(subtaskStateMap.size()); for (Map.Entry<Integer, OperatorSubtaskState> entry : subtaskStateMap.entrySet()) {
int maxParallelism = baseOpState.getMaxParallelism(); int parallelism = baseOpState.getParallelism(); Path outDir = makeOutputDir(); OperatorState newOperatorState = new OperatorState(baseOpState.getOperatorID(), parallelism, maxParallelism); baseOpState.getSubtaskStates().forEach((subtaskId, subtaskState) -> { KeyedStateHandle newKeyedHandle = handleMap.get(subtaskId); StateObjectCollection<OperatorStateHandle> opHandle = transformSubtaskOpState(outDir, subtaskId, subtaskState.getManagedOperatorState()); newOperatorState.putState(subtaskId, new OperatorSubtaskState( opHandle,
OperatorState taskState = new OperatorState(jobVertexId, parallelism, maxParallelism); operatorStates.add(taskState); taskState.putState(subtaskIndex, subtaskState);
ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); if (executionJobVertex.getMaxParallelism() == operatorState.getMaxParallelism() || !executionJobVertex.isMaxParallelismConfigured()) { operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) { if (operatorSubtaskState.hasState()) { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", checkpointPointer, operatorState.getOperatorID()); LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
if (operatorState.getMaxParallelism() < executionJobVertex.getParallelism()) { throw new IllegalStateException("The state for task " + executionJobVertex.getJobVertexId() + " can not be restored. The maximum parallelism (" + operatorState.getMaxParallelism() + ") of the restored state is lower than the configured parallelism (" + executionJobVertex.getParallelism() + "). Please reduce the parallelism of the task to be lower or equal to the maximum parallelism." if (operatorState.getMaxParallelism() != executionJobVertex.getMaxParallelism()) { executionJobVertex.getJobVertexId(), executionJobVertex.getMaxParallelism(), operatorState.getMaxParallelism()); executionJobVertex.setMaxParallelism(operatorState.getMaxParallelism()); } else { operatorState.getMaxParallelism() + ") with which the latest " + "checkpoint of the execution job vertex " + executionJobVertex + " has been taken and the current maximum parallelism (" + final int oldParallelism = operatorState.getParallelism(); final int newParallelism = executionJobVertex.getParallelism(); if (operatorState.hasNonPartitionedState() && (oldParallelism != newParallelism)) { throw new IllegalStateException("Cannot restore the latest checkpoint because " + "the operator " + executionJobVertex.getJobVertexId() + " has non-partitioned " +
private void reDistributePartitionableStates( List<OperatorState> operatorStates, int newParallelism, List<List<Collection<OperatorStateHandle>>> newManagedOperatorStates, List<List<Collection<OperatorStateHandle>>> newRawOperatorStates) { //collect the old partitionalbe state List<List<OperatorStateHandle>> oldManagedOperatorStates = new ArrayList<>(); List<List<OperatorStateHandle>> oldRawOperatorStates = new ArrayList<>(); collectPartionableStates(operatorStates, oldManagedOperatorStates, oldRawOperatorStates); //redistribute OperatorStateRepartitioner opStateRepartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE; for (int operatorIndex = 0; operatorIndex < operatorStates.size(); operatorIndex++) { int oldParallelism = operatorStates.get(operatorIndex).getParallelism(); newManagedOperatorStates.add(applyRepartitioner(opStateRepartitioner, oldManagedOperatorStates.get(operatorIndex), oldParallelism, newParallelism)); newRawOperatorStates.add(applyRepartitioner(opStateRepartitioner, oldRawOperatorStates.get(operatorIndex), oldParallelism, newParallelism)); } }
for (OperatorState operatorState : savepoint.getOperatorStates()) { ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); if (executionJobVertex.getMaxParallelism() == operatorState.getMaxParallelism() || !executionJobVertex.isMaxParallelismConfigured()) { operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to savepoint %s. " + "in a non-compatible way after the savepoint.", savepoint, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { String msg = String.format("Failed to rollback to savepoint %s. " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", savepointPath, operatorState.getOperatorID());
private void reAssignSubNonPartitionedStates( OperatorState operatorState, int subTaskIndex, int newParallelism, int oldParallelism, List<StreamStateHandle> subNonPartitionableState) { if (oldParallelism == newParallelism) { if (operatorState.getState(subTaskIndex) != null) { subNonPartitionableState.add(operatorState.getState(subTaskIndex).getLegacyOperatorState()); } else { subNonPartitionableState.add(null); } } else { subNonPartitionableState.add(null); } }
if (operatorState.getMaxParallelism() < executionJobVertex.getParallelism()) { throw new IllegalStateException("The state for task " + executionJobVertex.getJobVertexId() + " can not be restored. The maximum parallelism (" + operatorState.getMaxParallelism() + ") of the restored state is lower than the configured parallelism (" + executionJobVertex.getParallelism() + "). Please reduce the parallelism of the task to be lower or equal to the maximum parallelism." if (operatorState.getMaxParallelism() != executionJobVertex.getMaxParallelism()) { executionJobVertex.getJobVertexId(), executionJobVertex.getMaxParallelism(), operatorState.getMaxParallelism()); executionJobVertex.setMaxParallelism(operatorState.getMaxParallelism()); } else { operatorState.getMaxParallelism() + ") with which the latest " + "checkpoint of the execution job vertex " + executionJobVertex + " has been taken and the current maximum parallelism (" +
public long getStateSize() { long result = 0L; for (OperatorState operatorState : operatorStates.values()) { result += operatorState.getStateSize(); } return result; }
operatorState = new OperatorState( operatorID, executionJobVertex.getParallelism(),
@Override public void dispose() throws Exception { for (OperatorState operatorState : operatorStates) { operatorState.discardState(); } operatorStates.clear(); masterStates.clear(); }
public static Optional<KeyedBackendSerializationProxy<?>> getKeyedBackendSerializationProxy(OperatorState opState) { try { KeyedStateHandle firstHandle = opState.getStates().iterator().next().getManagedKeyedState().iterator() .next(); if (firstHandle instanceof IncrementalKeyedStateHandle) { return Optional.of(getKeyedBackendSerializationProxy( ((IncrementalKeyedStateHandle) firstHandle).getMetaStateHandle())); } else { return Optional.of(getKeyedBackendSerializationProxy((StreamStateHandle) firstHandle)); } } catch (Exception e) { return Optional.empty(); } }
dos.writeLong(operatorState.getOperatorID().getLowerPart()); dos.writeLong(operatorState.getOperatorID().getUpperPart()); int parallelism = operatorState.getParallelism(); dos.writeInt(parallelism); dos.writeInt(operatorState.getMaxParallelism()); dos.writeInt(1); Map<Integer, OperatorSubtaskState> subtaskStateMap = operatorState.getSubtaskStates(); dos.writeInt(subtaskStateMap.size()); for (Map.Entry<Integer, OperatorSubtaskState> entry : subtaskStateMap.entrySet()) {
OperatorState taskState = new OperatorState(jobVertexId, parallelism, maxParallelism); operatorStates.add(taskState); taskState.putState(subtaskIndex, subtaskState);
ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID()); expandedToLegacyIds = true; LOG.info("Could not find ExecutionJobVertex. Including user-defined OperatorIDs in search."); if (executionJobVertex.getMaxParallelism() == operatorState.getMaxParallelism() || !executionJobVertex.isMaxParallelismConfigured()) { operatorStates.put(operatorState.getOperatorID(), operatorState); } else { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism()); LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID()); } else { for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) { if (operatorSubtaskState.hasState()) { String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", checkpointPointer, operatorState.getOperatorID()); LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
int oldParallelism = oldOperatorStates.get(operatorIndex).getParallelism(); newManagedOperatorStates.putAll(applyRepartitioner( operatorID,