private static String[] extractHosts(FileSplit split, boolean isInMemory) throws IOException { SplitLocationInfo[] locInfo = split.getLocationInfo(); if (locInfo == null) return new String[0]; List<String> hosts = null; // TODO: most of the time, there's no in-memory. Use an array? for (int i = 0; i < locInfo.length; i++) { if (locInfo[i].isInMemory() != isInMemory) continue; if (hosts == null) { hosts = new ArrayList<>(); } hosts.add(locInfo[i].getLocation()); } if (hosts == null) return new String[0]; return hosts.toArray(new String[hosts.size()]); }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); Job job = Job.getInstance(conf); TextInputFormat fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); String[] locations = splits.get(0).getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1); String[] locations = splits[0].getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits[0].getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
private static String[] extractHosts(FileSplit split, boolean isInMemory) throws IOException { SplitLocationInfo[] locInfo = split.getLocationInfo(); if (locInfo == null) return new String[0]; List<String> hosts = null; // TODO: most of the time, there's no in-memory. Use an array? for (int i = 0; i < locInfo.length; i++) { if (locInfo[i].isInMemory() != isInMemory) continue; if (hosts == null) { hosts = new ArrayList<>(); } hosts.add(locInfo[i].getLocation()); } if (hosts == null) return new String[0]; return hosts.toArray(new String[hosts.size()]); }