LOG.trace(MessageFormat.format( "Fragment found (path={0}, offset={1}, size={2}, owners={3})", //$NON-NLS-1$ fragment.getPath(), fragment.getOffset(), fragment.getSize(), fragment.getOwnerNodeNames()));
/** * Returns {@link DirectInputFragment} for the range. * @param start the start offset (inclusive) * @param end the end offset (exclusive) * @return the computed fragment */ public DirectInputFragment get(long start, long end) { List<String> hosts = computeHosts(start, end); return new DirectInputFragment(path, start, end - start, hosts); }
private boolean validFragments(BlockMap map, List<DirectInputFragment> results) { assert map != null; assert results != null; Collections.sort(results, (o1, o2) -> Long.compare(o1.getOffset(), o2.getOffset())); long expectedOffset = 0; for (DirectInputFragment fragment : results) { long offset = fragment.getOffset(); assert offset == expectedOffset : offset + " != " + expectedOffset; //$NON-NLS-1$ expectedOffset = offset + fragment.getSize(); } assert map.getFileSize() == expectedOffset : map.getFileSize() + " != " + expectedOffset; //$NON-NLS-1$ return true; } }
"Start opening input (id={0}, path={1}, offset={2}, size={3})", //$NON-NLS-1$ profile.getId(), fragment.getPath(), fragment.getOffset(), fragment.getSize())); dataType, profile.getFileSystem(), new Path(fragment.getPath()), fragment.getOffset(), fragment.getSize(), counter); if (LOG.isDebugEnabled()) { "Finish opening input (id={0}, path={1}, offset={2}, size={3})", //$NON-NLS-1$ profile.getId(), fragment.getPath(), fragment.getOffset(), fragment.getSize()));
@Override public String[] getLocations() throws IOException, InterruptedException { List<String> locations = fragment.getOwnerNodeNames(); return locations.toArray(new String[locations.size()]); }
/** * per prefered size without block join. */ @Test public void pref_size_without_join() { BlockBuilder builder = new BlockBuilder(); builder.add(100, "a"); builder.add(100, "b"); builder.add(100, "c"); builder.add(100, "d"); List<DirectInputFragment> results = builder.compute(10, 80, true, true); assertThat(results.size(), is(4)); assertThat(results.get(0).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(0).getSize(), is(100L)); assertThat(results.get(1).getOwnerNodeNames(), hasItem("b")); assertThat(results.get(1).getSize(), is(100L)); assertThat(results.get(2).getOwnerNodeNames(), hasItem("c")); assertThat(results.get(2).getSize(), is(100L)); assertThat(results.get(3).getOwnerNodeNames(), hasItem("d")); assertThat(results.get(3).getSize(), is(100L)); }
@Override public long getLength() throws IOException, InterruptedException { return fragment.getSize(); }
private DirectInputFragment find(List<DirectInputFragment> results, long position) { for (DirectInputFragment fragment : results) { long offset = fragment.getOffset(); long size = fragment.getSize(); if (offset <= position && position < offset + size) { return fragment; } } throw new AssertionError(position); }
private static FileSplit getSplit(BlockMap blockMap, Path path, long start, long end) { DirectInputFragment f = blockMap.get(start, end); List<String> owners = f.getOwnerNodeNames(); FileSplit split = new FileSplit( path, start, end - start, owners.toArray(new String[owners.size()])); return split; }
private <T> RecordReader<NullWritable, Object> createRecordReader( DataDefinition<T> definition, BridgeInputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { assert definition != null; assert split != null; assert context != null; Configuration conf = context.getConfiguration(); T buffer = ReflectionUtils.newInstance(definition.getDataClass(), conf); Counter counter = new Counter(); ModelInput<T> input = createInput(context, split.group.containerPath, definition, counter, split.fragment); return new BridgeRecordReader<>(input, buffer, counter, split.fragment.getSize()); }
@Override public void write(DataOutput out) throws IOException { DirectInputGroup groupCopy = group; WritableUtils.writeString(out, groupCopy.containerPath); WritableUtils.writeString(out, groupCopy.dataType.getName()); WritableUtils.writeString(out, groupCopy.formatClass.getName()); if (groupCopy.filterClass == null) { out.writeBoolean(false); } else { out.writeBoolean(true); WritableUtils.writeString(out, groupCopy.filterClass.getName()); } DirectInputFragment fragmentCopy = fragment; WritableUtils.writeString(out, fragmentCopy.getPath()); WritableUtils.writeVLong(out, fragmentCopy.getOffset()); WritableUtils.writeVLong(out, fragmentCopy.getSize()); List<String> ownerNodeNames = fragmentCopy.getOwnerNodeNames(); WritableUtils.writeStringArray(out, ownerNodeNames.toArray(new String[ownerNodeNames.size()])); Map<String, String> attributes = fragmentCopy.getAttributes(); WritableUtils.writeVInt(out, attributes.size()); for (Map.Entry<String, String> entry : attributes.entrySet()) { WritableUtils.writeString(out, entry.getKey()); WritableUtils.writeString(out, entry.getValue()); } }
private List<DirectInputFragment> validate(List<DirectInputFragment> fragments) { List<DirectInputFragment> results = new ArrayList<>(fragments); Collections.sort(results, (o1, o2) -> Long.compare(o1.getOffset(), o2.getOffset())); long expectedOffset = 0; for (DirectInputFragment fragment : results) { assertThat(fragment.getOffset(), is(expectedOffset)); expectedOffset = fragment.getOffset() + fragment.getSize(); } assertThat(offset, is(expectedOffset)); return results; } }
/** * per prefered size. */ @Test public void pref_size() { BlockBuilder builder = new BlockBuilder(); builder.add(400, "a"); List<DirectInputFragment> results = builder.compute(10, 80, true, true); assertThat(results.size(), is(5)); assertThat(results.get(0).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(1).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(2).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(3).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(4).getOwnerNodeNames(), hasItem("a")); }
List<DirectInputFragment> fragments = getFragments(repo, group, path, definition, dataSource); for (DirectInputFragment fragment : fragments) { totalSize += fragment.getSize(); results.add(new BridgeInputSplit(group, fragment));
this.fragment = new DirectInputFragment(path, offset, length, Arrays.asList(locations), attributes);
LOG.debug(MessageFormat.format( "Detect ORCFile stripe: path={0}, rows={1}, range={2}+{3}, allocation={4}", //$NON-NLS-1$ fragment.getPath(), stripe.getNumberOfRows(), fragment.getOffset(), fragment.getSize(), fragment.getOwnerNodeNames()));
begin, end - begin, fragment.getOwnerNodeNames()));
/** * input multiple records. * @throws Exception if failed */ @Test public void input_large() throws Exception { long fragmentSize = 1 * 1024 * 1024; int fragmentCount = 20; put(new File(mapping, "input/file.txt"), fragmentSize * fragmentCount); profile.setMinimumFragmentSize(1); profile.setPreferredFragmentSize(fragmentSize); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(greaterThanOrEqualTo(fragmentCount / 2))); for (DirectInputFragment fragment : fragments) { assertThat(fragment.getSize(), is(greaterThanOrEqualTo(fragmentSize / 2))); assertThat(fragment.getSize(), is(lessThanOrEqualTo(fragmentSize * 2))); } }
/** * tail blocks are too small. */ @Test public void ignore_little_locality() { BlockBuilder builder = new BlockBuilder(); builder.add(100, "a"); builder.add(1, "b", "c", "d"); List<DirectInputFragment> results = builder.compute(50, 200, true, true); assertThat(results.size(), is(1)); assertThat(results.get(0).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(0).getOwnerNodeNames(), not(hasItem("b"))); }
/** * per prefered size with block join. */ @Test public void pref_size_with_join() { BlockBuilder builder = new BlockBuilder(); builder.add(100, "a"); builder.add(100, "a"); builder.add(100, "a"); builder.add(100, "a"); List<DirectInputFragment> results = builder.compute(10, 80, true, true); assertThat(results.size(), is(5)); assertThat(results.get(0).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(1).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(2).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(3).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(4).getOwnerNodeNames(), hasItem("a")); }