ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter()); scheduler.hostFailed(host1.getHostName()); scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output); scheduler.copyFailed(failedAttemptID, host1, true, false);
.thenReturn(odmo); doNothing().when(mm).waitForResource(); when(ss.getHost()).thenReturn(host);
@Before @SuppressWarnings("unchecked") // mocked generics public void setup() { LOG.info(">>>> " + name.getMethodName()); job = new JobConf(); job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false); jobWithRetry = new JobConf(); jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true); id = TaskAttemptID.forName("attempt_0_1_r_1_1"); ss = mock(ShuffleSchedulerImpl.class); mm = mock(MergeManagerImpl.class); r = mock(Reporter.class); metrics = mock(ShuffleClientMetrics.class); except = mock(ExceptionReporter.class); key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0}); connection = mock(HttpURLConnection.class); allErrs = mock(Counters.Counter.class); when(r.getCounter(anyString(), anyString())).thenReturn(allErrs); ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1); maps.add(map1ID); maps.add(map2ID); when(ss.getMapsForHost(host)).thenReturn(maps); }
@Test(timeout=30000) public void testCopyFromHostConnectionTimeout() throws Exception { when(connection.getInputStream()).thenThrow( new SocketTimeoutException("This is a fake timeout :)")); Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm, r, metrics, except, key, connection); underTest.copyFromHost(host); verify(connection).addRequestProperty( SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); verify(allErrs).increment(1); verify(ss).copyFailed(map1ID, host, false, false); verify(ss).copyFailed(map2ID, host, false, false); verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); }
@Test public void testCopyFromHostConnectionRejected() throws Exception { when(connection.getResponseCode()) .thenReturn(Fetcher.TOO_MANY_REQ_STATUS_CODE); Fetcher<Text, Text> fetcher = new FakeFetcher<>(job, id, ss, mm, r, metrics, except, key, connection); fetcher.copyFromHost(host); Assert.assertEquals("No host failure is expected.", ss.hostFailureCount(host.getHostName()), 0); Assert.assertEquals("No fetch failure is expected.", ss.fetchFailureCount(map1ID), 0); Assert.assertEquals("No fetch failure is expected.", ss.fetchFailureCount(map2ID), 0); verify(ss).penalize(eq(host), anyLong()); verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); }
ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status, reduceId, null, progress, null, null, null); scheduler.tipFailed(taskId1); Assert.assertFalse(scheduler.waitUntilDone(1)); scheduler.tipFailed(taskId0); Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(), 0.0f); Assert.assertTrue(scheduler.waitUntilDone(1));
List<TaskAttemptID> maps = scheduler.getMapsForHost(host); scheduler.hostFailed(host.getHostName()); for(TaskAttemptID left: failedTasks) { scheduler.copyFailed(left, host, true, false); scheduler.putBackKnownMapOutput(host, left);
@Override public void resolve(TaskCompletionEvent event) { switch (event.getTaskStatus()) { case SUCCEEDED: URI u = getBaseURI(reduceId, event.getTaskTrackerHttp()); addKnownMapOutput(u.getHost() + ":" + u.getPort(), u.toString(), event.getTaskAttemptId()); maxMapRuntime = Math.max(maxMapRuntime, event.getTaskRunTime()); break; case FAILED: case KILLED: case OBSOLETE: obsoleteMapOutput(event.getTaskAttemptId()); LOG.info("Ignoring obsolete output of " + event.getTaskStatus() + " map-task: '" + event.getTaskAttemptId() + "'"); break; case TIPFAILED: tipFailed(event.getTaskAttemptId().getTaskID()); LOG.info("Ignoring output of failed map TIP: '" + event.getTaskAttemptId() + "'"); break; } }
private DataInputStream openShuffleUrl(MapHost host, Set<TaskAttemptID> remaining, URL url) { DataInputStream input = null; try { setupConnectionsWithRetry(url); if (stopped) { abortConnect(host, remaining); } else { input = new DataInputStream(connection.getInputStream()); } } catch (IOException ie) { boolean connectExcpt = ie instanceof ConnectException; ioErrs.increment(1); LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " map outputs", ie); // If connect did not succeed, just mark all the maps as failed, // indirectly penalizing the host scheduler.hostFailed(host.getHostName()); for(TaskAttemptID left: remaining) { scheduler.copyFailed(left, host, false, connectExcpt); } // Add back all the remaining maps, WITHOUT marking them as failed for(TaskAttemptID left: remaining) { scheduler.putBackKnownMapOutput(host, left); } } return input; }
scheduler.reportLocalError(ioe); return EMPTY_ATTEMPT_ID_ARRAY; scheduler.copySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput);
public void run() { try { while (!stopped && !Thread.currentThread().isInterrupted()) { MapHost host = null; try { // If merge is on, block merger.waitForResource(); // Get a host to shuffle from host = scheduler.getHost(); metrics.threadBusy(); // Shuffle copyFromHost(host); } finally { if (host != null) { scheduler.freeHost(host); metrics.threadFree(); } } } } catch (InterruptedException ie) { return; } catch (Throwable t) { exceptionReporter.reportException(t); } }
while (!scheduler.waitUntilDone(PROGRESS_FREQUENCY)) { reporter.progress(); scheduler.close();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter()); scheduler.copySucceeded(succeedAttempt1ID, host1, bytes, 0, 1, output1); verify(output1).commit(); scheduler.copySucceeded(succeedAttempt2ID, host1, bytes, 0, 1, output2); verify(output2).abort(); scheduler.copySucceeded(succeedAttempt3ID, host1, bytes, 0, 1, output3); verify(output3).commit();
private DataInputStream openShuffleUrl(MapHost host, Set<TaskAttemptID> remaining, URL url) { DataInputStream input = null; try { setupConnectionsWithRetry(host, remaining, url); if (stopped) { abortConnect(host, remaining); } else { input = new DataInputStream(connection.getInputStream()); } } catch (IOException ie) { boolean connectExcpt = ie instanceof ConnectException; ioErrs.increment(1); LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " map outputs", ie); // If connect did not succeed, just mark all the maps as failed, // indirectly penalizing the host scheduler.hostFailed(host.getHostName()); for(TaskAttemptID left: remaining) { scheduler.copyFailed(left, host, false, connectExcpt); } } return input; }
private void abortConnect(MapHost host, Set<TaskAttemptID> remaining) { for (TaskAttemptID left : remaining) { scheduler.putBackKnownMapOutput(host, left); } closeConnection(); }
@Override public void init(ShuffleConsumerPlugin.Context context) { this.context = context; this.reduceId = context.getReduceId(); this.jobConf = context.getJobConf(); this.umbilical = context.getUmbilical(); this.reporter = context.getReporter(); this.metrics = new ShuffleClientMetrics(reduceId, jobConf); this.copyPhase = context.getCopyPhase(); this.taskStatus = context.getStatus(); this.reduceTask = context.getReduceTask(); this.localMapFiles = context.getLocalMapFiles(); scheduler = new ShuffleSchedulerImpl<K, V>(jobConf, taskStatus, reduceId, this, copyPhase, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter()); merger = createMergeManager(context); }
scheduler.copySucceeded(mapTaskId, LOCALHOST, compressedLength, 0, 0, mapOutput); return true; // successful fetch.
@SuppressWarnings("unchecked") @Test(timeout=10000) public void testCopyFromHostOnAnyException() throws Exception { InMemoryMapOutput<Text, Text> immo = mock(InMemoryMapOutput.class); Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm, r, metrics, except, key, connection); String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); when(connection.getResponseCode()).thenReturn(200); when(connection.getHeaderField( SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash); ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1); ByteArrayOutputStream bout = new ByteArrayOutputStream(); header.write(new DataOutputStream(bout)); ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray()); when(connection.getInputStream()).thenReturn(in); when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)) .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())) .thenReturn(immo); doThrow(new ArrayIndexOutOfBoundsException()).when(immo) .shuffle(any(MapHost.class), any(InputStream.class), anyLong(), anyLong(), any(ShuffleClientMetrics.class), any(Reporter.class)); underTest.copyFromHost(host); verify(connection) .addRequestProperty(SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); verify(ss, times(1)).copyFailed(map1ID, host, true, false); }
@Test public void testReduceOutOfDiskSpace() throws Throwable { LOG.info("testReduceOutOfDiskSpace"); Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm, r, metrics, except, key, connection); String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1); ByteArrayOutputStream bout = new ByteArrayOutputStream(); header.write(new DataOutputStream(bout)); ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray()); when(connection.getResponseCode()).thenReturn(200); when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)) .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)) .thenReturn(replyHash); when(connection.getInputStream()).thenReturn(in); when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())) .thenThrow(new DiskErrorException("No disk space available")); underTest.copyFromHost(host); verify(ss).reportLocalError(any(IOException.class)); }
List<TaskAttemptID> maps = scheduler.getMapsForHost(host); scheduler.hostFailed(host.getHostName()); for(TaskAttemptID left: failedTasks) { scheduler.copyFailed(left, host, true, false); scheduler.putBackKnownMapOutput(host, left);