@VisibleForTesting static void setAbandonedJobsToFailedState(IndexedStore<JobId, JobResult> jobStore) { final Set<Entry<JobId, JobResult>> apparentlyAbandoned = FluentIterable.from(jobStore.find(new FindByCondition() .setCondition(JobsServiceUtil.getApparentlyAbandonedQuery()))) .toSet(); for (final Entry<JobId, JobResult> entry : apparentlyAbandoned) { final List<JobAttempt> attempts = entry.getValue().getAttemptsList(); final int numAttempts = attempts.size(); if (numAttempts > 0) { final JobAttempt lastAttempt = attempts.get(numAttempts - 1); // .. check again; the index may not be updated, but the store maybe if (JobsServiceUtil.isNonFinalState(lastAttempt.getState())) { final JobAttempt newLastAttempt = lastAttempt.setState(JobState.FAILED) .setInfo(lastAttempt.getInfo() .setFinishTime(System.currentTimeMillis()) .setFailureInfo("Query failed as Dremio was restarted. Details and profile information " + "for this job may be missing.")); attempts.remove(numAttempts - 1); attempts.add(newLastAttempt); jobStore.put(entry.getKey(), entry.getValue()); } } } }
ExternalJobResultListener(AttemptId attemptId, UserResponseHandler connection, Job job, BufferAllocator allocator) { super(attemptId, job, allocator, NoOpJobStatusListener.INSTANCE); this.connection = connection; this.externalId = JobsServiceUtil.getJobIdAsExternalId(job.getJobId()); }
private void addAttemptToJob(Job job, QueryState state, QueryProfile profile) throws IOException { jobAttempt.setState(JobsServiceUtil.queryStatusToJobStatus(state)); final JobInfo jobInfo = jobAttempt.getInfo(); final QueryProfileParser profileParser = new QueryProfileParser(job.getJobId(), profile); jobInfo.setDetailedFailureInfo(JobsServiceUtil.toFailureInfo(profile.getVerboseError()));
@Override public QueryProfile getProfile(JobId jobId, int attempt) throws JobNotFoundException { Job job = getJob(jobId); final AttemptId attemptId = new AttemptId(JobsServiceUtil.getJobIdAsExternalId(jobId), attempt); if(jobIsDone(job.getJobAttempt())){ return profileStore.get(attemptId); } // Check if the profile for given attempt already exists. Even if the job is not done, it is possible that // profile exists for previous attempts final QueryProfile queryProfile = profileStore.get(attemptId); if (queryProfile != null) { return queryProfile; } final NodeEndpoint endpoint = job.getJobAttempt().getEndpoint(); if(endpoint.equals(identity)){ final ForemenTool tool = this.foremenTool.get(); Optional<QueryProfile> profile = tool.getProfile(attemptId.getExternalId()); return profile.orNull(); } try{ CoordTunnel tunnel = coordTunnelCreator.get().getTunnel(JobsServiceUtil.toPB(endpoint)); return tunnel.requestQueryProfile(attemptId.getExternalId()).checkedGet(15, TimeUnit.SECONDS); }catch(TimeoutException | RpcException | RuntimeException e){ logger.info("Unable to retrieve remote query profile for external id: {}", ExternalIdHelper.toString(attemptId.getExternalId()), e); return null; } }
@Test public void convertExceptionToFailureInfo() { // Fake logger to not pollute logs org.slf4j.Logger logger = mock(org.slf4j.Logger.class); SqlParseException parseException = new SqlParseException("test message", new SqlParserPos(7, 42, 13, 57), null, null, null); UserException userException = SqlExceptionHelper.parseError("SELECT FOO", parseException) .build(logger); String verboseError = userException.getVerboseMessage(false); JobFailureInfo jobFailureInfo = JobsServiceUtil.toFailureInfo(verboseError); assertEquals(JobFailureInfo.Type.PARSE, jobFailureInfo.getType()); assertEquals(1, jobFailureInfo.getErrorsList().size()); JobFailureInfo.Error error = jobFailureInfo.getErrorsList().get(0); assertEquals("test message", error.getMessage()); assertEquals(7, (int) error.getStartLine()); assertEquals(42, (int) error.getStartColumn()); assertEquals(13, (int) error.getEndLine()); assertEquals(57, (int) error.getEndColumn()); } }
@Override public QueryObserver createNewQueryObserver(ExternalId id, UserSession session, UserResponseHandler handler) { final JobId jobId = JobsServiceUtil.getExternalIdAsJobId(id); final RpcEndpointInfos clientInfos = session.getClientInfos(); final QueryType queryType = QueryTypeUtils.getQueryType(clientInfos); final JobInfo jobInfo = new JobInfo(jobId, "UNKNOWN", "UNKNOWN", queryType) .setUser(session.getCredentials().getUserName()) .setDatasetPathList(Arrays.asList("UNKNOWN")) .setStartTime(System.currentTimeMillis()); final JobAttempt jobAttempt = new JobAttempt() .setInfo(jobInfo) .setEndpoint(identity) .setDetails(new JobDetails()) .setState(ENQUEUED); final Job job = new Job(jobId, jobAttempt); storeJob(job); QueryListener listener = new QueryListener(job, handler); runningJobs.put(jobId, listener); return listener; }
logger.info("Starting JobsService"); this.identity = JobsServiceUtil.toStuff(contextProvider.get().getEndpoint()); this.store = kvStoreProvider.get().getStore(JobsStoreCreator.class); this.profileStore = kvStoreProvider.get().getStore(JobsProfileCreator.class);
@Override public void cancel(String username, JobId jobId, String reason) throws JobException { final ForemenTool tool = this.foremenTool.get(); final ExternalId id = ExternalIdHelper.toExternal(QueryIdHelper.getQueryIdFromString(jobId.getId())); if(tool.cancel(id, reason)){ logger.debug("Job cancellation requested on current node."); return; } // now remote... final Job job = getJob(jobId); NodeEndpoint endpoint = job.getJobAttempt().getEndpoint(); if(endpoint.equals(identity)){ throw new JobWarningException(jobId, "Unable to cancel job started on current node. It may have completed before cancellation was requested."); } try{ final CoordTunnel tunnel = coordTunnelCreator.get().getTunnel(JobsServiceUtil.toPB(endpoint)); Ack ack = tunnel.requestCancelQuery(id, reason).checkedGet(15, TimeUnit.SECONDS); if(ack.getOk()){ logger.debug("Job cancellation requested on {}.", endpoint.getAddress()); return; } else { throw new JobWarningException(jobId, String.format("Unable to cancel job started on %s. It may have completed before cancellation was requested.", endpoint.getAddress())); } }catch(TimeoutException | RpcException | RuntimeException e){ logger.info("Unable to cancel remote job for external id: {}", ExternalIdHelper.toString(id), e); throw new JobWarningException(jobId, String.format("Unable to cancel job on node %s.", endpoint.getAddress())); } }
final List<String> partitions = JobsServiceUtil.getPartitions(planningSet.get()); jobInfo.setPartitionsList(partitions);
private Job startJob(JobRequest jobRequest, JobStatusListener statusListener) { final JobId jobId = JobsServiceUtil.getExternalIdAsJobId(externalId); final String inSpace = !jobRequest.getDatasetPathComponents().isEmpty() &&
private QueryListener(Job job, UserResponseHandler connection) { this.job = job; externalId = JobsServiceUtil.getJobIdAsExternalId(job.getJobId()); this.responseHandler = Preconditions.checkNotNull(connection, "handler cannot be null"); this.statusListener = null; isInternal = false; setupJobData(); }
private QueryListener(Job job, JobStatusListener statusListener) { this.job = job; externalId = JobsServiceUtil.getJobIdAsExternalId(job.getJobId()); this.responseHandler = null; this.statusListener = Preconditions.checkNotNull(statusListener, "statusListener cannot be null"); isInternal = true; setupJobData(); }