@Override public long getLength() throws IOException, InterruptedException { return fragment.getSize(); }
private <T> RecordReader<NullWritable, Object> createRecordReader( DataDefinition<T> definition, BridgeInputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { assert definition != null; assert split != null; assert context != null; Configuration conf = context.getConfiguration(); T buffer = ReflectionUtils.newInstance(definition.getDataClass(), conf); Counter counter = new Counter(); ModelInput<T> input = createInput(context, split.group.containerPath, definition, counter, split.fragment); return new BridgeRecordReader<>(input, buffer, counter, split.fragment.getSize()); }
private DirectInputFragment find(List<DirectInputFragment> results, long position) { for (DirectInputFragment fragment : results) { long offset = fragment.getOffset(); long size = fragment.getSize(); if (offset <= position && position < offset + size) { return fragment; } } throw new AssertionError(position); }
List<DirectInputFragment> fragments = getFragments(repo, group, path, definition, dataSource); for (DirectInputFragment fragment : fragments) { totalSize += fragment.getSize(); results.add(new BridgeInputSplit(group, fragment));
private boolean validFragments(BlockMap map, List<DirectInputFragment> results) { assert map != null; assert results != null; Collections.sort(results, (o1, o2) -> Long.compare(o1.getOffset(), o2.getOffset())); long expectedOffset = 0; for (DirectInputFragment fragment : results) { long offset = fragment.getOffset(); assert offset == expectedOffset : offset + " != " + expectedOffset; //$NON-NLS-1$ expectedOffset = offset + fragment.getSize(); } assert map.getFileSize() == expectedOffset : map.getFileSize() + " != " + expectedOffset; //$NON-NLS-1$ return true; } }
fragment.getPath(), fragment.getOffset(), fragment.getSize(), fragment.getOwnerNodeNames()));
@Override public void write(DataOutput out) throws IOException { DirectInputGroup groupCopy = group; WritableUtils.writeString(out, groupCopy.containerPath); WritableUtils.writeString(out, groupCopy.dataType.getName()); WritableUtils.writeString(out, groupCopy.formatClass.getName()); if (groupCopy.filterClass == null) { out.writeBoolean(false); } else { out.writeBoolean(true); WritableUtils.writeString(out, groupCopy.filterClass.getName()); } DirectInputFragment fragmentCopy = fragment; WritableUtils.writeString(out, fragmentCopy.getPath()); WritableUtils.writeVLong(out, fragmentCopy.getOffset()); WritableUtils.writeVLong(out, fragmentCopy.getSize()); List<String> ownerNodeNames = fragmentCopy.getOwnerNodeNames(); WritableUtils.writeStringArray(out, ownerNodeNames.toArray(new String[ownerNodeNames.size()])); Map<String, String> attributes = fragmentCopy.getAttributes(); WritableUtils.writeVInt(out, attributes.size()); for (Map.Entry<String, String> entry : attributes.entrySet()) { WritableUtils.writeString(out, entry.getKey()); WritableUtils.writeString(out, entry.getValue()); } }
stripe.getNumberOfRows(), fragment.getOffset(), fragment.getSize(), fragment.getOwnerNodeNames()));
private List<DirectInputFragment> validate(List<DirectInputFragment> fragments) { List<DirectInputFragment> results = new ArrayList<>(fragments); Collections.sort(results, (o1, o2) -> Long.compare(o1.getOffset(), o2.getOffset())); long expectedOffset = 0; for (DirectInputFragment fragment : results) { assertThat(fragment.getOffset(), is(expectedOffset)); expectedOffset = fragment.getOffset() + fragment.getSize(); } assertThat(offset, is(expectedOffset)); return results; } }
/** * per prefered size without block join. */ @Test public void pref_size_without_join() { BlockBuilder builder = new BlockBuilder(); builder.add(100, "a"); builder.add(100, "b"); builder.add(100, "c"); builder.add(100, "d"); List<DirectInputFragment> results = builder.compute(10, 80, true, true); assertThat(results.size(), is(4)); assertThat(results.get(0).getOwnerNodeNames(), hasItem("a")); assertThat(results.get(0).getSize(), is(100L)); assertThat(results.get(1).getOwnerNodeNames(), hasItem("b")); assertThat(results.get(1).getSize(), is(100L)); assertThat(results.get(2).getOwnerNodeNames(), hasItem("c")); assertThat(results.get(2).getSize(), is(100L)); assertThat(results.get(3).getOwnerNodeNames(), hasItem("d")); assertThat(results.get(3).getSize(), is(100L)); }
/** * input multiple records. * @throws Exception if failed */ @Test public void input_large() throws Exception { long fragmentSize = 1 * 1024 * 1024; int fragmentCount = 20; put(new File(mapping, "input/file.txt"), fragmentSize * fragmentCount); profile.setMinimumFragmentSize(1); profile.setPreferredFragmentSize(fragmentSize); HadoopDataSourceCore core = new HadoopDataSourceCore(profile); List<DirectInputFragment> fragments = core.findInputFragments( definition, "input", FilePattern.compile("**")); assertThat(fragments.size(), is(greaterThanOrEqualTo(fragmentCount / 2))); for (DirectInputFragment fragment : fragments) { assertThat(fragment.getSize(), is(greaterThanOrEqualTo(fragmentSize / 2))); assertThat(fragment.getSize(), is(lessThanOrEqualTo(fragmentSize * 2))); } }