@Test (timeout = 5000) public void testNonFileSplits() throws IOException { HostAffinitySplitLocationProvider locationProvider = new HostAffinitySplitLocationProvider(executorLocations); InputSplit inputSplit1 = createMockInputSplit(new String[] {locations.get(0), locations.get(1)}); InputSplit inputSplit2 = createMockInputSplit(new String[] {locations.get(2), locations.get(3)}); assertArrayEquals(new String[] {locations.get(0), locations.get(1)}, locationProvider.getLocations(inputSplit1)); assertArrayEquals(new String[] {locations.get(2), locations.get(3)}, locationProvider.getLocations(inputSplit2)); }
@Override public String[] getLocations(InputSplit split) throws IOException { if (!(split instanceof FileSplit)) { if (LOG.isDebugEnabled()) { LOG.debug("Split: " + split + " is not a FileSplit. Using default locations"); } return split.getLocations(); } FileSplit fsplit = (FileSplit) split; String splitDesc = "Split at " + fsplit.getPath() + " with offset= " + fsplit.getStart() + ", length=" + fsplit.getLength(); String location = locations.get(determineLocation( locations, fsplit.getPath().toString(), fsplit.getStart(), splitDesc)); return (location != null) ? new String[] { location } : null; }
@VisibleForTesting public static int determineLocation( List<String> locations, String path, long start, String desc) { byte[] bytes = getHashInputForSplit(path, start); long hash1 = hash1(bytes); int index = Hashing.consistentHash(hash1, locations.size()); String location = locations.get(index); if (LOG.isDebugEnabled()) { LOG.debug(desc + " mapped to index=" + index + ", location=" + location); } int iter = 1; long hash2 = 0; // Since our probing method is totally bogus, give up after some time. while (location == null && iter < locations.size() * 2) { if (iter == 1) { hash2 = hash2(bytes); } // Note that this is not real double hashing since we have consistent hash on top. index = Hashing.consistentHash(hash1 + iter * hash2, locations.size()); location = locations.get(index); if (LOG.isDebugEnabled()) { LOG.debug(desc + " remapped to index=" + index + ", location=" + location); } ++iter; } return index; }
splitLocationProvider = new HostAffinitySplitLocationProvider(locations); } else { splitLocationProvider = new SplitLocationProvider() {
splitLocationProvider = new HostAffinitySplitLocationProvider(locations); } else { splitLocationProvider = new SplitLocationProvider() {
@VisibleForTesting public static int determineLocation( List<String> locations, String path, long start, String desc) { byte[] bytes = getHashInputForSplit(path, start); long hash1 = hash1(bytes); int index = Hashing.consistentHash(hash1, locations.size()); String location = locations.get(index); if (LOG.isDebugEnabled()) { LOG.debug(desc + " mapped to index=" + index + ", location=" + location); } int iter = 1; long hash2 = 0; // Since our probing method is totally bogus, give up after some time. while (location == null && iter < locations.size() * 2) { if (iter == 1) { hash2 = hash2(bytes); } // Note that this is not real double hashing since we have consistent hash on top. index = Hashing.consistentHash(hash1 + iter * hash2, locations.size()); location = locations.get(index); if (LOG.isDebugEnabled()) { LOG.debug(desc + " remapped to index=" + index + ", location=" + location); } ++iter; } return index; }
@Test (timeout = 5000) public void testOrcSplitsBasic() throws IOException { HostAffinitySplitLocationProvider locationProvider = new HostAffinitySplitLocationProvider(executorLocations); InputSplit os1 = createMockFileSplit(true, "path1", 0, 1000, new String[] {locations.get(0), locations.get(1)}); InputSplit os2 = createMockFileSplit(true, "path2", 0, 2000, new String[] {locations.get(2), locations.get(3)}); InputSplit os3 = createMockFileSplit(true, "path3", 1000, 2000, new String[] {locations.get(0), locations.get(3)}); String[] retLoc1 = locationProvider.getLocations(os1); String[] retLoc2 = locationProvider.getLocations(os2); String[] retLoc3 = locationProvider.getLocations(os3); assertEquals(1, retLoc1.length); assertFalse(locationsSet.contains(retLoc1[0])); assertTrue(executorLocationsSet.contains(retLoc1[0])); assertEquals(1, retLoc2.length); assertFalse(locationsSet.contains(retLoc2[0])); assertTrue(executorLocationsSet.contains(retLoc2[0])); assertEquals(1, retLoc3.length); assertFalse(locationsSet.contains(retLoc3[0])); assertTrue(executorLocationsSet.contains(retLoc3[0])); }
@Override public String[] getLocations(InputSplit split) throws IOException { if (!(split instanceof FileSplit)) { if (isDebugEnabled) { LOG.debug("Split: " + split + " is not a FileSplit. Using default locations"); } return split.getLocations(); } FileSplit fsplit = (FileSplit) split; String splitDesc = "Split at " + fsplit.getPath() + " with offset= " + fsplit.getStart() + ", length=" + fsplit.getLength(); String location = locations.get(determineLocation( locations, fsplit.getPath().toString(), fsplit.getStart(), splitDesc)); return (location != null) ? new String[] { location } : null; }
for (int locs = MIN_LOC_COUNT; locs <= locations.size(); ++locs) { List<String> partLoc = locations.subList(0, locs); HostAffinitySplitLocationProvider lp = new HostAffinitySplitLocationProvider(partLoc); int moved = 0, newLoc = 0; String newNode = partLoc.get(locs - 1); for (int splitIx = 0; splitIx < splits.length; ++splitIx) { String[] splitLocations = lp.getLocations(splits[splitIx]); assertEquals(1, splitLocations.length); String splitLocation = splitLocations[0];
for (int splitIx = 0; splitIx < splits.length; ++splitIx) { state.set(0); int index = HostAffinitySplitLocationProvider.determineLocation(partLocs, splits[splitIx].getPath().toString(), splits[splitIx].getStart(), null); ++hitCounts[index];
@Test (timeout = 5000) public void testOrcSplitsLocationAffinity() throws IOException { HostAffinitySplitLocationProvider locationProvider = new HostAffinitySplitLocationProvider(executorLocations); String[] retLoc11 = locationProvider.getLocations(os11); String[] retLoc12 = locationProvider.getLocations(os12); String[] retLoc13 = locationProvider.getLocations(os13); String[] retLoc112 = locationProvider.getLocations(os11); String[] retLoc122 = locationProvider.getLocations(os12); String[] retLoc132 = locationProvider.getLocations(os13); assertArrayEquals(retLoc11, retLoc112); assertArrayEquals(retLoc12, retLoc122);